1//===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit OpenMP nodes as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGCleanup.h"
14#include "CGDebugInfo.h"
15#include "CGOpenMPRuntime.h"
16#include "CodeGenFunction.h"
17#include "CodeGenModule.h"
18#include "CodeGenPGO.h"
19#include "TargetInfo.h"
20#include "clang/AST/ASTContext.h"
21#include "clang/AST/Attr.h"
22#include "clang/AST/DeclOpenMP.h"
23#include "clang/AST/OpenMPClause.h"
24#include "clang/AST/Stmt.h"
25#include "clang/AST/StmtOpenMP.h"
26#include "clang/AST/StmtVisitor.h"
27#include "clang/Basic/DiagnosticFrontend.h"
28#include "clang/Basic/OpenMPKinds.h"
29#include "clang/Basic/PrettyStackTrace.h"
30#include "clang/Basic/SourceManager.h"
31#include "llvm/ADT/SmallSet.h"
32#include "llvm/BinaryFormat/Dwarf.h"
33#include "llvm/Frontend/OpenMP/OMPConstants.h"
34#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DebugInfoMetadata.h"
37#include "llvm/IR/Instructions.h"
38#include "llvm/IR/IntrinsicInst.h"
39#include "llvm/IR/Metadata.h"
40#include "llvm/Support/AtomicOrdering.h"
41#include "llvm/Support/Debug.h"
42#include <optional>
43using namespace clang;
44using namespace CodeGen;
45using namespace llvm::omp;
46
47#define TTL_CODEGEN_TYPE "target-teams-loop-codegen"
48
49static const VarDecl *getBaseDecl(const Expr *Ref);
50static OpenMPDirectiveKind
51getEffectiveDirectiveKind(const OMPExecutableDirective &S);
52
53namespace {
54/// Lexical scope for OpenMP executable constructs, that handles correct codegen
55/// for captured expressions.
56class OMPLexicalScope : public CodeGenFunction::LexicalScope {
57 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
58 for (const auto *C : S.clauses()) {
59 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
60 if (const auto *PreInit =
61 cast_or_null<DeclStmt>(Val: CPI->getPreInitStmt())) {
62 for (const auto *I : PreInit->decls()) {
63 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
64 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
65 } else {
66 CodeGenFunction::AutoVarEmission Emission =
67 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
68 CGF.EmitAutoVarCleanups(emission: Emission);
69 }
70 }
71 }
72 }
73 }
74 }
75 CodeGenFunction::OMPPrivateScope InlinedShareds;
76
77 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
78 return CGF.LambdaCaptureFields.lookup(Val: VD) ||
79 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
80 (isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl) &&
81 cast<BlockDecl>(Val: CGF.CurCodeDecl)->capturesVariable(var: VD));
82 }
83
84public:
85 OMPLexicalScope(
86 CodeGenFunction &CGF, const OMPExecutableDirective &S,
87 const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt,
88 const bool EmitPreInitStmt = true)
89 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
90 InlinedShareds(CGF) {
91 if (EmitPreInitStmt)
92 emitPreInitStmt(CGF, S);
93 if (!CapturedRegion)
94 return;
95 assert(S.hasAssociatedStmt() &&
96 "Expected associated statement for inlined directive.");
97 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: *CapturedRegion);
98 for (const auto &C : CS->captures()) {
99 if (C.capturesVariable() || C.capturesVariableByCopy()) {
100 auto *VD = C.getCapturedVar();
101 assert(VD == VD->getCanonicalDecl() &&
102 "Canonical decl must be captured.");
103 DeclRefExpr DRE(
104 CGF.getContext(), const_cast<VarDecl *>(VD),
105 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
106 InlinedShareds.isGlobalVarCaptured(VD)),
107 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
108 InlinedShareds.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(E: &DRE).getAddress());
109 }
110 }
111 (void)InlinedShareds.Privatize();
112 }
113};
114
115/// Lexical scope for OpenMP parallel construct, that handles correct codegen
116/// for captured expressions.
117class OMPParallelScope final : public OMPLexicalScope {
118 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
119 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
120 return !(isOpenMPTargetExecutionDirective(DKind: EKind) ||
121 isOpenMPLoopBoundSharingDirective(Kind: EKind)) &&
122 isOpenMPParallelDirective(DKind: EKind);
123 }
124
125public:
126 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
127 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
128 EmitPreInitStmt(S)) {}
129};
130
131/// Lexical scope for OpenMP teams construct, that handles correct codegen
132/// for captured expressions.
133class OMPTeamsScope final : public OMPLexicalScope {
134 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
135 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
136 return !isOpenMPTargetExecutionDirective(DKind: EKind) &&
137 isOpenMPTeamsDirective(DKind: EKind);
138 }
139
140public:
141 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
142 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
143 EmitPreInitStmt(S)) {}
144};
145
146/// Private scope for OpenMP loop-based directives, that supports capturing
147/// of used expression from loop statement.
148class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
149 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
150 const Stmt *PreInits;
151 CodeGenFunction::OMPMapVars PreCondVars;
152 if (auto *LD = dyn_cast<OMPLoopDirective>(Val: &S)) {
153 // Emit init, __range, __begin and __end variables for C++ range loops.
154 (void)OMPLoopBasedDirective::doForAllLoops(
155 CurStmt: LD->getInnermostCapturedStmt()->getCapturedStmt(),
156 /*TryImperfectlyNestedLoops=*/true, NumLoops: LD->getLoopsNumber(),
157 Callback: [&CGF](unsigned Cnt, const Stmt *CurStmt) {
158 if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(Val: CurStmt)) {
159 if (const Stmt *Init = CXXFor->getInit())
160 CGF.EmitStmt(S: Init);
161 CGF.EmitStmt(S: CXXFor->getRangeStmt());
162 CGF.EmitStmt(S: CXXFor->getBeginStmt());
163 CGF.EmitStmt(S: CXXFor->getEndStmt());
164 }
165 return false;
166 });
167 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
168 for (const auto *E : LD->counters()) {
169 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
170 EmittedAsPrivate.insert(V: VD->getCanonicalDecl());
171 (void)PreCondVars.setVarAddr(
172 CGF, LocalVD: VD, TempAddr: CGF.CreateMemTemp(T: VD->getType().getNonReferenceType()));
173 }
174 // Mark private vars as undefs.
175 for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
176 for (const Expr *IRef : C->varlist()) {
177 const auto *OrigVD =
178 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IRef)->getDecl());
179 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
180 QualType OrigVDTy = OrigVD->getType().getNonReferenceType();
181 (void)PreCondVars.setVarAddr(
182 CGF, LocalVD: OrigVD,
183 TempAddr: Address(llvm::UndefValue::get(T: CGF.ConvertTypeForMem(
184 T: CGF.getContext().getPointerType(T: OrigVDTy))),
185 CGF.ConvertTypeForMem(T: OrigVDTy),
186 CGF.getContext().getDeclAlign(D: OrigVD)));
187 }
188 }
189 }
190 (void)PreCondVars.apply(CGF);
191 PreInits = LD->getPreInits();
192 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(Val: &S)) {
193 PreInits = Tile->getPreInits();
194 } else if (const auto *Stripe = dyn_cast<OMPStripeDirective>(Val: &S)) {
195 PreInits = Stripe->getPreInits();
196 } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(Val: &S)) {
197 PreInits = Unroll->getPreInits();
198 } else if (const auto *Reverse = dyn_cast<OMPReverseDirective>(Val: &S)) {
199 PreInits = Reverse->getPreInits();
200 } else if (const auto *Interchange =
201 dyn_cast<OMPInterchangeDirective>(Val: &S)) {
202 PreInits = Interchange->getPreInits();
203 } else {
204 llvm_unreachable("Unknown loop-based directive kind.");
205 }
206 doEmitPreinits(PreInits);
207 PreCondVars.restore(CGF);
208 }
209
210 void
211 emitPreInitStmt(CodeGenFunction &CGF,
212 const OMPCanonicalLoopSequenceTransformationDirective &S) {
213 const Stmt *PreInits;
214 if (const auto *Fuse = dyn_cast<OMPFuseDirective>(Val: &S)) {
215 PreInits = Fuse->getPreInits();
216 } else {
217 llvm_unreachable(
218 "Unknown canonical loop sequence transform directive kind.");
219 }
220 doEmitPreinits(PreInits);
221 }
222
223 void doEmitPreinits(const Stmt *PreInits) {
224 if (PreInits) {
225 // CompoundStmts and DeclStmts are used as lists of PreInit statements and
226 // declarations. Since declarations must be visible in the the following
227 // that they initialize, unpack the CompoundStmt they are nested in.
228 SmallVector<const Stmt *> PreInitStmts;
229 if (auto *PreInitCompound = dyn_cast<CompoundStmt>(Val: PreInits))
230 llvm::append_range(C&: PreInitStmts, R: PreInitCompound->body());
231 else
232 PreInitStmts.push_back(Elt: PreInits);
233
234 for (const Stmt *S : PreInitStmts) {
235 // EmitStmt skips any OMPCapturedExprDecls, but needs to be emitted
236 // here.
237 if (auto *PreInitDecl = dyn_cast<DeclStmt>(Val: S)) {
238 for (Decl *I : PreInitDecl->decls())
239 CGF.EmitVarDecl(D: cast<VarDecl>(Val&: *I));
240 continue;
241 }
242 CGF.EmitStmt(S);
243 }
244 }
245 }
246
247public:
248 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
249 : CodeGenFunction::RunCleanupsScope(CGF) {
250 emitPreInitStmt(CGF, S);
251 }
252 OMPLoopScope(CodeGenFunction &CGF,
253 const OMPCanonicalLoopSequenceTransformationDirective &S)
254 : CodeGenFunction::RunCleanupsScope(CGF) {
255 emitPreInitStmt(CGF, S);
256 }
257};
258
259class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
260 CodeGenFunction::OMPPrivateScope InlinedShareds;
261
262 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
263 return CGF.LambdaCaptureFields.lookup(Val: VD) ||
264 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
265 (isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl) &&
266 cast<BlockDecl>(Val: CGF.CurCodeDecl)->capturesVariable(var: VD));
267 }
268
269public:
270 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
271 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
272 InlinedShareds(CGF) {
273 for (const auto *C : S.clauses()) {
274 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
275 if (const auto *PreInit =
276 cast_or_null<DeclStmt>(Val: CPI->getPreInitStmt())) {
277 for (const auto *I : PreInit->decls()) {
278 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
279 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
280 } else {
281 CodeGenFunction::AutoVarEmission Emission =
282 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
283 CGF.EmitAutoVarCleanups(emission: Emission);
284 }
285 }
286 }
287 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(Val: C)) {
288 for (const Expr *E : UDP->varlist()) {
289 const Decl *D = cast<DeclRefExpr>(Val: E)->getDecl();
290 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D))
291 CGF.EmitVarDecl(D: *OED);
292 }
293 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(Val: C)) {
294 for (const Expr *E : UDP->varlist()) {
295 const Decl *D = getBaseDecl(Ref: E);
296 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D))
297 CGF.EmitVarDecl(D: *OED);
298 }
299 }
300 }
301 if (!isOpenMPSimdDirective(DKind: getEffectiveDirectiveKind(S)))
302 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: InlinedShareds);
303 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(Val: &S)) {
304 if (const Expr *E = TG->getReductionRef())
305 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()));
306 }
307 // Temp copy arrays for inscan reductions should not be emitted as they are
308 // not used in simd only mode.
309 llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
310 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
311 if (C->getModifier() != OMPC_REDUCTION_inscan)
312 continue;
313 for (const Expr *E : C->copy_array_temps())
314 CopyArrayTemps.insert(V: cast<DeclRefExpr>(Val: E)->getDecl());
315 }
316 const auto *CS = cast_or_null<CapturedStmt>(Val: S.getAssociatedStmt());
317 while (CS) {
318 for (auto &C : CS->captures()) {
319 if (C.capturesVariable() || C.capturesVariableByCopy()) {
320 auto *VD = C.getCapturedVar();
321 if (CopyArrayTemps.contains(V: VD))
322 continue;
323 assert(VD == VD->getCanonicalDecl() &&
324 "Canonical decl must be captured.");
325 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
326 isCapturedVar(CGF, VD) ||
327 (CGF.CapturedStmtInfo &&
328 InlinedShareds.isGlobalVarCaptured(VD)),
329 VD->getType().getNonReferenceType(), VK_LValue,
330 C.getLocation());
331 InlinedShareds.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(E: &DRE).getAddress());
332 }
333 }
334 CS = dyn_cast<CapturedStmt>(Val: CS->getCapturedStmt());
335 }
336 (void)InlinedShareds.Privatize();
337 }
338};
339
340} // namespace
341
342// The loop directive with a bind clause will be mapped to a different
343// directive with corresponding semantics.
344static OpenMPDirectiveKind
345getEffectiveDirectiveKind(const OMPExecutableDirective &S) {
346 OpenMPDirectiveKind Kind = S.getDirectiveKind();
347 if (Kind != OMPD_loop)
348 return Kind;
349
350 OpenMPBindClauseKind BindKind = OMPC_BIND_unknown;
351 if (const auto *C = S.getSingleClause<OMPBindClause>())
352 BindKind = C->getBindKind();
353
354 switch (BindKind) {
355 case OMPC_BIND_parallel:
356 return OMPD_for;
357 case OMPC_BIND_teams:
358 return OMPD_distribute;
359 case OMPC_BIND_thread:
360 return OMPD_simd;
361 default:
362 return OMPD_loop;
363 }
364}
365
366static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
367 const OMPExecutableDirective &S,
368 const RegionCodeGenTy &CodeGen);
369
370LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
371 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(Val: E)) {
372 if (const auto *OrigVD = dyn_cast<VarDecl>(Val: OrigDRE->getDecl())) {
373 OrigVD = OrigVD->getCanonicalDecl();
374 bool IsCaptured =
375 LambdaCaptureFields.lookup(Val: OrigVD) ||
376 (CapturedStmtInfo && CapturedStmtInfo->lookup(VD: OrigVD)) ||
377 (isa_and_nonnull<BlockDecl>(Val: CurCodeDecl));
378 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
379 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
380 return EmitLValue(E: &DRE);
381 }
382 }
383 return EmitLValue(E);
384}
385
386llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
387 ASTContext &C = getContext();
388 llvm::Value *Size = nullptr;
389 auto SizeInChars = C.getTypeSizeInChars(T: Ty);
390 if (SizeInChars.isZero()) {
391 // getTypeSizeInChars() returns 0 for a VLA.
392 while (const VariableArrayType *VAT = C.getAsVariableArrayType(T: Ty)) {
393 VlaSizePair VlaSize = getVLASize(vla: VAT);
394 Ty = VlaSize.Type;
395 Size =
396 Size ? Builder.CreateNUWMul(LHS: Size, RHS: VlaSize.NumElts) : VlaSize.NumElts;
397 }
398 SizeInChars = C.getTypeSizeInChars(T: Ty);
399 if (SizeInChars.isZero())
400 return llvm::ConstantInt::get(Ty: SizeTy, /*V=*/0);
401 return Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: SizeInChars));
402 }
403 return CGM.getSize(numChars: SizeInChars);
404}
405
406void CodeGenFunction::GenerateOpenMPCapturedVars(
407 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
408 const RecordDecl *RD = S.getCapturedRecordDecl();
409 auto CurField = RD->field_begin();
410 auto CurCap = S.captures().begin();
411 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
412 E = S.capture_init_end();
413 I != E; ++I, ++CurField, ++CurCap) {
414 if (CurField->hasCapturedVLAType()) {
415 const VariableArrayType *VAT = CurField->getCapturedVLAType();
416 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
417 CapturedVars.push_back(Elt: Val);
418 } else if (CurCap->capturesThis()) {
419 CapturedVars.push_back(Elt: CXXThisValue);
420 } else if (CurCap->capturesVariableByCopy()) {
421 llvm::Value *CV = EmitLoadOfScalar(lvalue: EmitLValue(E: *I), Loc: CurCap->getLocation());
422
423 // If the field is not a pointer, we need to save the actual value
424 // and load it as a void pointer.
425 if (!CurField->getType()->isAnyPointerType()) {
426 ASTContext &Ctx = getContext();
427 Address DstAddr = CreateMemTemp(
428 T: Ctx.getUIntPtrType(),
429 Name: Twine(CurCap->getCapturedVar()->getName(), ".casted"));
430 LValue DstLV = MakeAddrLValue(Addr: DstAddr, T: Ctx.getUIntPtrType());
431
432 llvm::Value *SrcAddrVal = EmitScalarConversion(
433 Src: DstAddr.emitRawPointer(CGF&: *this),
434 SrcTy: Ctx.getPointerType(T: Ctx.getUIntPtrType()),
435 DstTy: Ctx.getPointerType(T: CurField->getType()), Loc: CurCap->getLocation());
436 LValue SrcLV =
437 MakeNaturalAlignAddrLValue(V: SrcAddrVal, T: CurField->getType());
438
439 // Store the value using the source type pointer.
440 EmitStoreThroughLValue(Src: RValue::get(V: CV), Dst: SrcLV);
441
442 // Load the value using the destination type pointer.
443 CV = EmitLoadOfScalar(lvalue: DstLV, Loc: CurCap->getLocation());
444 }
445 CapturedVars.push_back(Elt: CV);
446 } else {
447 assert(CurCap->capturesVariable() && "Expected capture by reference.");
448 CapturedVars.push_back(Elt: EmitLValue(E: *I).getAddress().emitRawPointer(CGF&: *this));
449 }
450 }
451}
452
453static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
454 QualType DstType, StringRef Name,
455 LValue AddrLV) {
456 ASTContext &Ctx = CGF.getContext();
457
458 llvm::Value *CastedPtr = CGF.EmitScalarConversion(
459 Src: AddrLV.getAddress().emitRawPointer(CGF), SrcTy: Ctx.getUIntPtrType(),
460 DstTy: Ctx.getPointerType(T: DstType), Loc);
461 // FIXME: should the pointee type (DstType) be passed?
462 Address TmpAddr =
463 CGF.MakeNaturalAlignAddrLValue(V: CastedPtr, T: DstType).getAddress();
464 return TmpAddr;
465}
466
467static QualType getCanonicalParamType(ASTContext &C, QualType T) {
468 if (T->isLValueReferenceType())
469 return C.getLValueReferenceType(
470 T: getCanonicalParamType(C, T: T.getNonReferenceType()),
471 /*SpelledAsLValue=*/false);
472 if (T->isPointerType())
473 return C.getPointerType(T: getCanonicalParamType(C, T: T->getPointeeType()));
474 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
475 if (const auto *VLA = dyn_cast<VariableArrayType>(Val: A))
476 return getCanonicalParamType(C, T: VLA->getElementType());
477 if (!A->isVariablyModifiedType())
478 return C.getCanonicalType(T);
479 }
480 return C.getCanonicalParamType(T);
481}
482
483namespace {
484/// Contains required data for proper outlined function codegen.
485struct FunctionOptions {
486 /// Captured statement for which the function is generated.
487 const CapturedStmt *S = nullptr;
488 /// true if cast to/from UIntPtr is required for variables captured by
489 /// value.
490 const bool UIntPtrCastRequired = true;
491 /// true if only casted arguments must be registered as local args or VLA
492 /// sizes.
493 const bool RegisterCastedArgsOnly = false;
494 /// Name of the generated function.
495 const StringRef FunctionName;
496 /// Location of the non-debug version of the outlined function.
497 SourceLocation Loc;
498 const bool IsDeviceKernel = false;
499 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
500 bool RegisterCastedArgsOnly, StringRef FunctionName,
501 SourceLocation Loc, bool IsDeviceKernel)
502 : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
503 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
504 FunctionName(FunctionName), Loc(Loc), IsDeviceKernel(IsDeviceKernel) {}
505};
506} // namespace
507
508static llvm::Function *emitOutlinedFunctionPrologue(
509 CodeGenFunction &CGF, FunctionArgList &Args,
510 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
511 &LocalAddrs,
512 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
513 &VLASizes,
514 llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
515 const CapturedDecl *CD = FO.S->getCapturedDecl();
516 const RecordDecl *RD = FO.S->getCapturedRecordDecl();
517 assert(CD->hasBody() && "missing CapturedDecl body");
518
519 CXXThisValue = nullptr;
520 // Build the argument list.
521 CodeGenModule &CGM = CGF.CGM;
522 ASTContext &Ctx = CGM.getContext();
523 FunctionArgList TargetArgs;
524 Args.append(in_start: CD->param_begin(),
525 in_end: std::next(x: CD->param_begin(), n: CD->getContextParamPosition()));
526 TargetArgs.append(
527 in_start: CD->param_begin(),
528 in_end: std::next(x: CD->param_begin(), n: CD->getContextParamPosition()));
529 auto I = FO.S->captures().begin();
530 FunctionDecl *DebugFunctionDecl = nullptr;
531 if (!FO.UIntPtrCastRequired) {
532 FunctionProtoType::ExtProtoInfo EPI;
533 QualType FunctionTy = Ctx.getFunctionType(ResultTy: Ctx.VoidTy, Args: {}, EPI);
534 DebugFunctionDecl = FunctionDecl::Create(
535 C&: Ctx, DC: Ctx.getTranslationUnitDecl(), StartLoc: FO.S->getBeginLoc(),
536 NLoc: SourceLocation(), N: DeclarationName(), T: FunctionTy,
537 TInfo: Ctx.getTrivialTypeSourceInfo(T: FunctionTy), SC: SC_Static,
538 /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
539 /*hasWrittenPrototype=*/false);
540 }
541 for (const FieldDecl *FD : RD->fields()) {
542 QualType ArgType = FD->getType();
543 IdentifierInfo *II = nullptr;
544 VarDecl *CapVar = nullptr;
545
546 // If this is a capture by copy and the type is not a pointer, the outlined
547 // function argument type should be uintptr and the value properly casted to
548 // uintptr. This is necessary given that the runtime library is only able to
549 // deal with pointers. We can pass in the same way the VLA type sizes to the
550 // outlined function.
551 if (FO.UIntPtrCastRequired &&
552 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
553 I->capturesVariableArrayType()))
554 ArgType = Ctx.getUIntPtrType();
555
556 if (I->capturesVariable() || I->capturesVariableByCopy()) {
557 CapVar = I->getCapturedVar();
558 II = CapVar->getIdentifier();
559 } else if (I->capturesThis()) {
560 II = &Ctx.Idents.get(Name: "this");
561 } else {
562 assert(I->capturesVariableArrayType());
563 II = &Ctx.Idents.get(Name: "vla");
564 }
565 if (ArgType->isVariablyModifiedType())
566 ArgType = getCanonicalParamType(C&: Ctx, T: ArgType);
567 VarDecl *Arg;
568 if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) {
569 Arg = ImplicitParamDecl::Create(C&: Ctx, /*DC=*/nullptr, IdLoc: FD->getLocation(),
570 Id: II, T: ArgType,
571 ParamKind: ImplicitParamKind::ThreadPrivateVar);
572 } else if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
573 Arg = ParmVarDecl::Create(
574 C&: Ctx, DC: DebugFunctionDecl,
575 StartLoc: CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
576 IdLoc: CapVar ? CapVar->getLocation() : FD->getLocation(), Id: II, T: ArgType,
577 /*TInfo=*/nullptr, S: SC_None, /*DefArg=*/nullptr);
578 } else {
579 Arg = ImplicitParamDecl::Create(C&: Ctx, /*DC=*/nullptr, IdLoc: FD->getLocation(),
580 Id: II, T: ArgType, ParamKind: ImplicitParamKind::Other);
581 }
582 Args.emplace_back(Args&: Arg);
583 // Do not cast arguments if we emit function with non-original types.
584 TargetArgs.emplace_back(
585 Args: FO.UIntPtrCastRequired
586 ? Arg
587 : CGM.getOpenMPRuntime().translateParameter(FD, NativeParam: Arg));
588 ++I;
589 }
590 Args.append(in_start: std::next(x: CD->param_begin(), n: CD->getContextParamPosition() + 1),
591 in_end: CD->param_end());
592 TargetArgs.append(
593 in_start: std::next(x: CD->param_begin(), n: CD->getContextParamPosition() + 1),
594 in_end: CD->param_end());
595
596 // Create the function declaration.
597 const CGFunctionInfo &FuncInfo =
598 FO.IsDeviceKernel
599 ? CGM.getTypes().arrangeDeviceKernelCallerDeclaration(resultType: Ctx.VoidTy,
600 args: TargetArgs)
601 : CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: Ctx.VoidTy,
602 args: TargetArgs);
603 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(Info: FuncInfo);
604
605 auto *F =
606 llvm::Function::Create(Ty: FuncLLVMTy, Linkage: llvm::GlobalValue::InternalLinkage,
607 N: FO.FunctionName, M: &CGM.getModule());
608 CGM.SetInternalFunctionAttributes(GD: CD, F, FI: FuncInfo);
609 if (CD->isNothrow())
610 F->setDoesNotThrow();
611 F->setDoesNotRecurse();
612
613 // Always inline the outlined function if optimizations are enabled.
614 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
615 F->removeFnAttr(Kind: llvm::Attribute::NoInline);
616 F->addFnAttr(Kind: llvm::Attribute::AlwaysInline);
617 }
618 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
619 F->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
620
621 // Generate the function.
622 CGF.StartFunction(GD: CD, RetTy: Ctx.VoidTy, Fn: F, FnInfo: FuncInfo, Args: TargetArgs,
623 Loc: FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
624 StartLoc: FO.UIntPtrCastRequired ? FO.Loc
625 : CD->getBody()->getBeginLoc());
626 unsigned Cnt = CD->getContextParamPosition();
627 I = FO.S->captures().begin();
628 for (const FieldDecl *FD : RD->fields()) {
629 // Do not map arguments if we emit function with non-original types.
630 Address LocalAddr(Address::invalid());
631 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
632 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, NativeParam: Args[Cnt],
633 TargetParam: TargetArgs[Cnt]);
634 } else {
635 LocalAddr = CGF.GetAddrOfLocalVar(VD: Args[Cnt]);
636 }
637 // If we are capturing a pointer by copy we don't need to do anything, just
638 // use the value that we get from the arguments.
639 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
640 const VarDecl *CurVD = I->getCapturedVar();
641 if (!FO.RegisterCastedArgsOnly)
642 LocalAddrs.insert(KV: {Args[Cnt], {CurVD, LocalAddr}});
643 ++Cnt;
644 ++I;
645 continue;
646 }
647
648 LValue ArgLVal = CGF.MakeAddrLValue(Addr: LocalAddr, T: Args[Cnt]->getType(),
649 Source: AlignmentSource::Decl);
650 if (FD->hasCapturedVLAType()) {
651 if (FO.UIntPtrCastRequired) {
652 ArgLVal = CGF.MakeAddrLValue(
653 Addr: castValueFromUintptr(CGF, Loc: I->getLocation(), DstType: FD->getType(),
654 Name: Args[Cnt]->getName(), AddrLV: ArgLVal),
655 T: FD->getType(), Source: AlignmentSource::Decl);
656 }
657 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(lvalue: ArgLVal, Loc: I->getLocation());
658 const VariableArrayType *VAT = FD->getCapturedVLAType();
659 VLASizes.try_emplace(Key: Args[Cnt], Args: VAT->getSizeExpr(), Args&: ExprArg);
660 } else if (I->capturesVariable()) {
661 const VarDecl *Var = I->getCapturedVar();
662 QualType VarTy = Var->getType();
663 Address ArgAddr = ArgLVal.getAddress();
664 if (ArgLVal.getType()->isLValueReferenceType()) {
665 ArgAddr = CGF.EmitLoadOfReference(RefLVal: ArgLVal);
666 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
667 assert(ArgLVal.getType()->isPointerType());
668 ArgAddr = CGF.EmitLoadOfPointer(
669 Ptr: ArgAddr, PtrTy: ArgLVal.getType()->castAs<PointerType>());
670 }
671 if (!FO.RegisterCastedArgsOnly) {
672 LocalAddrs.insert(
673 KV: {Args[Cnt], {Var, ArgAddr.withAlignment(NewAlignment: Ctx.getDeclAlign(D: Var))}});
674 }
675 } else if (I->capturesVariableByCopy()) {
676 assert(!FD->getType()->isAnyPointerType() &&
677 "Not expecting a captured pointer.");
678 const VarDecl *Var = I->getCapturedVar();
679 LocalAddrs.insert(KV: {Args[Cnt],
680 {Var, FO.UIntPtrCastRequired
681 ? castValueFromUintptr(
682 CGF, Loc: I->getLocation(), DstType: FD->getType(),
683 Name: Args[Cnt]->getName(), AddrLV: ArgLVal)
684 : ArgLVal.getAddress()}});
685 } else {
686 // If 'this' is captured, load it into CXXThisValue.
687 assert(I->capturesThis());
688 CXXThisValue = CGF.EmitLoadOfScalar(lvalue: ArgLVal, Loc: I->getLocation());
689 LocalAddrs.insert(KV: {Args[Cnt], {nullptr, ArgLVal.getAddress()}});
690 }
691 ++Cnt;
692 ++I;
693 }
694
695 return F;
696}
697
698llvm::Function *CodeGenFunction::GenerateOpenMPCapturedStmtFunction(
699 const CapturedStmt &S, const OMPExecutableDirective &D) {
700 SourceLocation Loc = D.getBeginLoc();
701 assert(
702 CapturedStmtInfo &&
703 "CapturedStmtInfo should be set when generating the captured function");
704 const CapturedDecl *CD = S.getCapturedDecl();
705 // Build the argument list.
706 bool NeedWrapperFunction =
707 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
708 FunctionArgList Args, WrapperArgs;
709 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs,
710 WrapperLocalAddrs;
711 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes,
712 WrapperVLASizes;
713 SmallString<256> Buffer;
714 llvm::raw_svector_ostream Out(Buffer);
715 Out << CapturedStmtInfo->getHelperName();
716 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
717 bool IsDeviceKernel = CGM.getOpenMPRuntime().isGPU() &&
718 isOpenMPTargetExecutionDirective(DKind: EKind) &&
719 D.getCapturedStmt(RegionKind: OMPD_target) == &S;
720 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
721 llvm::Function *WrapperF = nullptr;
722 if (NeedWrapperFunction) {
723 // Emit the final kernel early to allow attributes to be added by the
724 // OpenMPI-IR-Builder.
725 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
726 /*RegisterCastedArgsOnly=*/true,
727 CapturedStmtInfo->getHelperName(), Loc,
728 IsDeviceKernel);
729 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
730 WrapperF =
731 emitOutlinedFunctionPrologue(CGF&: WrapperCGF, Args, LocalAddrs, VLASizes,
732 CXXThisValue&: WrapperCGF.CXXThisValue, FO: WrapperFO);
733 Out << "_debug__";
734 }
735 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
736 Out.str(), Loc, !NeedWrapperFunction && IsDeviceKernel);
737 llvm::Function *F = emitOutlinedFunctionPrologue(
738 CGF&: *this, Args&: WrapperArgs, LocalAddrs&: WrapperLocalAddrs, VLASizes&: WrapperVLASizes, CXXThisValue, FO);
739 CodeGenFunction::OMPPrivateScope LocalScope(*this);
740 for (const auto &LocalAddrPair : WrapperLocalAddrs) {
741 if (LocalAddrPair.second.first) {
742 LocalScope.addPrivate(LocalVD: LocalAddrPair.second.first,
743 Addr: LocalAddrPair.second.second);
744 }
745 }
746 (void)LocalScope.Privatize();
747 for (const auto &VLASizePair : WrapperVLASizes)
748 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
749 PGO->assignRegionCounters(GD: GlobalDecl(CD), Fn: F);
750 CapturedStmtInfo->EmitBody(CGF&: *this, S: CD->getBody());
751 LocalScope.ForceCleanup();
752 FinishFunction(EndLoc: CD->getBodyRBrace());
753 if (!NeedWrapperFunction)
754 return F;
755
756 // Reverse the order.
757 WrapperF->removeFromParent();
758 F->getParent()->getFunctionList().insertAfter(where: F->getIterator(), New: WrapperF);
759
760 llvm::SmallVector<llvm::Value *, 4> CallArgs;
761 auto *PI = F->arg_begin();
762 for (const auto *Arg : Args) {
763 llvm::Value *CallArg;
764 auto I = LocalAddrs.find(Key: Arg);
765 if (I != LocalAddrs.end()) {
766 LValue LV = WrapperCGF.MakeAddrLValue(
767 Addr: I->second.second,
768 T: I->second.first ? I->second.first->getType() : Arg->getType(),
769 Source: AlignmentSource::Decl);
770 if (LV.getType()->isAnyComplexType())
771 LV.setAddress(LV.getAddress().withElementType(ElemTy: PI->getType()));
772 CallArg = WrapperCGF.EmitLoadOfScalar(lvalue: LV, Loc: S.getBeginLoc());
773 } else {
774 auto EI = VLASizes.find(Val: Arg);
775 if (EI != VLASizes.end()) {
776 CallArg = EI->second.second;
777 } else {
778 LValue LV =
779 WrapperCGF.MakeAddrLValue(Addr: WrapperCGF.GetAddrOfLocalVar(VD: Arg),
780 T: Arg->getType(), Source: AlignmentSource::Decl);
781 CallArg = WrapperCGF.EmitLoadOfScalar(lvalue: LV, Loc: S.getBeginLoc());
782 }
783 }
784 CallArgs.emplace_back(Args: WrapperCGF.EmitFromMemory(Value: CallArg, Ty: Arg->getType()));
785 ++PI;
786 }
787 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF&: WrapperCGF, Loc, OutlinedFn: F, Args: CallArgs);
788 WrapperCGF.FinishFunction();
789 return WrapperF;
790}
791
792//===----------------------------------------------------------------------===//
793// OpenMP Directive Emission
794//===----------------------------------------------------------------------===//
795void CodeGenFunction::EmitOMPAggregateAssign(
796 Address DestAddr, Address SrcAddr, QualType OriginalType,
797 const llvm::function_ref<void(Address, Address)> CopyGen) {
798 // Perform element-by-element initialization.
799 QualType ElementTy;
800
801 // Drill down to the base element type on both arrays.
802 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
803 llvm::Value *NumElements = emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: DestAddr);
804 SrcAddr = SrcAddr.withElementType(ElemTy: DestAddr.getElementType());
805
806 llvm::Value *SrcBegin = SrcAddr.emitRawPointer(CGF&: *this);
807 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF&: *this);
808 // Cast from pointer to array type to pointer to single element.
809 llvm::Value *DestEnd = Builder.CreateInBoundsGEP(Ty: DestAddr.getElementType(),
810 Ptr: DestBegin, IdxList: NumElements);
811
812 // The basic structure here is a while-do loop.
813 llvm::BasicBlock *BodyBB = createBasicBlock(name: "omp.arraycpy.body");
814 llvm::BasicBlock *DoneBB = createBasicBlock(name: "omp.arraycpy.done");
815 llvm::Value *IsEmpty =
816 Builder.CreateICmpEQ(LHS: DestBegin, RHS: DestEnd, Name: "omp.arraycpy.isempty");
817 Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
818
819 // Enter the loop body, making that address the current address.
820 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
821 EmitBlock(BB: BodyBB);
822
823 CharUnits ElementSize = getContext().getTypeSizeInChars(T: ElementTy);
824
825 llvm::PHINode *SrcElementPHI =
826 Builder.CreatePHI(Ty: SrcBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.srcElementPast");
827 SrcElementPHI->addIncoming(V: SrcBegin, BB: EntryBB);
828 Address SrcElementCurrent =
829 Address(SrcElementPHI, SrcAddr.getElementType(),
830 SrcAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
831
832 llvm::PHINode *DestElementPHI = Builder.CreatePHI(
833 Ty: DestBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
834 DestElementPHI->addIncoming(V: DestBegin, BB: EntryBB);
835 Address DestElementCurrent =
836 Address(DestElementPHI, DestAddr.getElementType(),
837 DestAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
838
839 // Emit copy.
840 CopyGen(DestElementCurrent, SrcElementCurrent);
841
842 // Shift the address forward by one element.
843 llvm::Value *DestElementNext =
844 Builder.CreateConstGEP1_32(Ty: DestAddr.getElementType(), Ptr: DestElementPHI,
845 /*Idx0=*/1, Name: "omp.arraycpy.dest.element");
846 llvm::Value *SrcElementNext =
847 Builder.CreateConstGEP1_32(Ty: SrcAddr.getElementType(), Ptr: SrcElementPHI,
848 /*Idx0=*/1, Name: "omp.arraycpy.src.element");
849 // Check whether we've reached the end.
850 llvm::Value *Done =
851 Builder.CreateICmpEQ(LHS: DestElementNext, RHS: DestEnd, Name: "omp.arraycpy.done");
852 Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
853 DestElementPHI->addIncoming(V: DestElementNext, BB: Builder.GetInsertBlock());
854 SrcElementPHI->addIncoming(V: SrcElementNext, BB: Builder.GetInsertBlock());
855
856 // Done.
857 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
858}
859
860void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
861 Address SrcAddr, const VarDecl *DestVD,
862 const VarDecl *SrcVD, const Expr *Copy) {
863 if (OriginalType->isArrayType()) {
864 const auto *BO = dyn_cast<BinaryOperator>(Val: Copy);
865 if (BO && BO->getOpcode() == BO_Assign) {
866 // Perform simple memcpy for simple copying.
867 LValue Dest = MakeAddrLValue(Addr: DestAddr, T: OriginalType);
868 LValue Src = MakeAddrLValue(Addr: SrcAddr, T: OriginalType);
869 EmitAggregateAssign(Dest, Src, EltTy: OriginalType);
870 } else {
871 // For arrays with complex element types perform element by element
872 // copying.
873 EmitOMPAggregateAssign(
874 DestAddr, SrcAddr, OriginalType,
875 CopyGen: [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
876 // Working with the single array element, so have to remap
877 // destination and source variables to corresponding array
878 // elements.
879 CodeGenFunction::OMPPrivateScope Remap(*this);
880 Remap.addPrivate(LocalVD: DestVD, Addr: DestElement);
881 Remap.addPrivate(LocalVD: SrcVD, Addr: SrcElement);
882 (void)Remap.Privatize();
883 EmitIgnoredExpr(E: Copy);
884 });
885 }
886 } else {
887 // Remap pseudo source variable to private copy.
888 CodeGenFunction::OMPPrivateScope Remap(*this);
889 Remap.addPrivate(LocalVD: SrcVD, Addr: SrcAddr);
890 Remap.addPrivate(LocalVD: DestVD, Addr: DestAddr);
891 (void)Remap.Privatize();
892 // Emit copying of the whole variable.
893 EmitIgnoredExpr(E: Copy);
894 }
895}
896
897bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
898 OMPPrivateScope &PrivateScope) {
899 if (!HaveInsertPoint())
900 return false;
901 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
902 bool DeviceConstTarget = getLangOpts().OpenMPIsTargetDevice &&
903 isOpenMPTargetExecutionDirective(DKind: EKind);
904 bool FirstprivateIsLastprivate = false;
905 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
906 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
907 for (const auto *D : C->varlist())
908 Lastprivates.try_emplace(
909 Key: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D)->getDecl())->getCanonicalDecl(),
910 Args: C->getKind());
911 }
912 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
913 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
914 getOpenMPCaptureRegions(CaptureRegions, DKind: EKind);
915 // Force emission of the firstprivate copy if the directive does not emit
916 // outlined function, like omp for, omp simd, omp distribute etc.
917 bool MustEmitFirstprivateCopy =
918 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
919 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
920 const auto *IRef = C->varlist_begin();
921 const auto *InitsRef = C->inits().begin();
922 for (const Expr *IInit : C->private_copies()) {
923 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
924 bool ThisFirstprivateIsLastprivate =
925 Lastprivates.count(Val: OrigVD->getCanonicalDecl()) > 0;
926 const FieldDecl *FD = CapturedStmtInfo->lookup(VD: OrigVD);
927 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl());
928 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
929 !FD->getType()->isReferenceType() &&
930 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
931 EmittedAsFirstprivate.insert(V: OrigVD->getCanonicalDecl());
932 ++IRef;
933 ++InitsRef;
934 continue;
935 }
936 // Do not emit copy for firstprivate constant variables in target regions,
937 // captured by reference.
938 if (DeviceConstTarget && OrigVD->getType().isConstant(Ctx: getContext()) &&
939 FD && FD->getType()->isReferenceType() &&
940 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
941 EmittedAsFirstprivate.insert(V: OrigVD->getCanonicalDecl());
942 ++IRef;
943 ++InitsRef;
944 continue;
945 }
946 FirstprivateIsLastprivate =
947 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
948 if (EmittedAsFirstprivate.insert(V: OrigVD->getCanonicalDecl()).second) {
949 const auto *VDInit =
950 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *InitsRef)->getDecl());
951 bool IsRegistered;
952 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
953 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
954 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
955 LValue OriginalLVal;
956 if (!FD) {
957 // Check if the firstprivate variable is just a constant value.
958 ConstantEmission CE = tryEmitAsConstant(RefExpr: &DRE);
959 if (CE && !CE.isReference()) {
960 // Constant value, no need to create a copy.
961 ++IRef;
962 ++InitsRef;
963 continue;
964 }
965 if (CE && CE.isReference()) {
966 OriginalLVal = CE.getReferenceLValue(CGF&: *this, RefExpr: &DRE);
967 } else {
968 assert(!CE && "Expected non-constant firstprivate.");
969 OriginalLVal = EmitLValue(E: &DRE);
970 }
971 } else {
972 OriginalLVal = EmitLValue(E: &DRE);
973 }
974 QualType Type = VD->getType();
975 if (Type->isArrayType()) {
976 // Emit VarDecl with copy init for arrays.
977 // Get the address of the original variable captured in current
978 // captured region.
979 AutoVarEmission Emission = EmitAutoVarAlloca(var: *VD);
980 const Expr *Init = VD->getInit();
981 if (!isa<CXXConstructExpr>(Val: Init) || isTrivialInitializer(Init)) {
982 // Perform simple memcpy.
983 LValue Dest = MakeAddrLValue(Addr: Emission.getAllocatedAddress(), T: Type);
984 EmitAggregateAssign(Dest, Src: OriginalLVal, EltTy: Type);
985 } else {
986 EmitOMPAggregateAssign(
987 DestAddr: Emission.getAllocatedAddress(), SrcAddr: OriginalLVal.getAddress(), OriginalType: Type,
988 CopyGen: [this, VDInit, Init](Address DestElement, Address SrcElement) {
989 // Clean up any temporaries needed by the
990 // initialization.
991 RunCleanupsScope InitScope(*this);
992 // Emit initialization for single element.
993 setAddrOfLocalVar(VD: VDInit, Addr: SrcElement);
994 EmitAnyExprToMem(E: Init, Location: DestElement,
995 Quals: Init->getType().getQualifiers(),
996 /*IsInitializer*/ false);
997 LocalDeclMap.erase(Val: VDInit);
998 });
999 }
1000 EmitAutoVarCleanups(emission: Emission);
1001 IsRegistered =
1002 PrivateScope.addPrivate(LocalVD: OrigVD, Addr: Emission.getAllocatedAddress());
1003 } else {
1004 Address OriginalAddr = OriginalLVal.getAddress();
1005 // Emit private VarDecl with copy init.
1006 // Remap temp VDInit variable to the address of the original
1007 // variable (for proper handling of captured global variables).
1008 setAddrOfLocalVar(VD: VDInit, Addr: OriginalAddr);
1009 EmitDecl(D: *VD);
1010 LocalDeclMap.erase(Val: VDInit);
1011 Address VDAddr = GetAddrOfLocalVar(VD);
1012 if (ThisFirstprivateIsLastprivate &&
1013 Lastprivates[OrigVD->getCanonicalDecl()] ==
1014 OMPC_LASTPRIVATE_conditional) {
1015 // Create/init special variable for lastprivate conditionals.
1016 llvm::Value *V =
1017 EmitLoadOfScalar(lvalue: MakeAddrLValue(Addr: VDAddr, T: (*IRef)->getType(),
1018 Source: AlignmentSource::Decl),
1019 Loc: (*IRef)->getExprLoc());
1020 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
1021 CGF&: *this, VD: OrigVD);
1022 EmitStoreOfScalar(value: V, lvalue: MakeAddrLValue(Addr: VDAddr, T: (*IRef)->getType(),
1023 Source: AlignmentSource::Decl));
1024 LocalDeclMap.erase(Val: VD);
1025 setAddrOfLocalVar(VD, Addr: VDAddr);
1026 }
1027 IsRegistered = PrivateScope.addPrivate(LocalVD: OrigVD, Addr: VDAddr);
1028 }
1029 assert(IsRegistered &&
1030 "firstprivate var already registered as private");
1031 // Silence the warning about unused variable.
1032 (void)IsRegistered;
1033 }
1034 ++IRef;
1035 ++InitsRef;
1036 }
1037 }
1038 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
1039}
1040
1041void CodeGenFunction::EmitOMPPrivateClause(
1042 const OMPExecutableDirective &D,
1043 CodeGenFunction::OMPPrivateScope &PrivateScope) {
1044 if (!HaveInsertPoint())
1045 return;
1046 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
1047 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
1048 auto IRef = C->varlist_begin();
1049 for (const Expr *IInit : C->private_copies()) {
1050 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1051 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
1052 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl());
1053 EmitDecl(D: *VD);
1054 // Emit private VarDecl with copy init.
1055 bool IsRegistered =
1056 PrivateScope.addPrivate(LocalVD: OrigVD, Addr: GetAddrOfLocalVar(VD));
1057 assert(IsRegistered && "private var already registered as private");
1058 // Silence the warning about unused variable.
1059 (void)IsRegistered;
1060 }
1061 ++IRef;
1062 }
1063 }
1064}
1065
1066bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
1067 if (!HaveInsertPoint())
1068 return false;
1069 // threadprivate_var1 = master_threadprivate_var1;
1070 // operator=(threadprivate_var2, master_threadprivate_var2);
1071 // ...
1072 // __kmpc_barrier(&loc, global_tid);
1073 llvm::DenseSet<const VarDecl *> CopiedVars;
1074 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
1075 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
1076 auto IRef = C->varlist_begin();
1077 auto ISrcRef = C->source_exprs().begin();
1078 auto IDestRef = C->destination_exprs().begin();
1079 for (const Expr *AssignOp : C->assignment_ops()) {
1080 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1081 QualType Type = VD->getType();
1082 if (CopiedVars.insert(V: VD->getCanonicalDecl()).second) {
1083 // Get the address of the master variable. If we are emitting code with
1084 // TLS support, the address is passed from the master as field in the
1085 // captured declaration.
1086 Address MasterAddr = Address::invalid();
1087 if (getLangOpts().OpenMPUseTLS &&
1088 getContext().getTargetInfo().isTLSSupported()) {
1089 assert(CapturedStmtInfo->lookup(VD) &&
1090 "Copyin threadprivates should have been captured!");
1091 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
1092 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1093 MasterAddr = EmitLValue(E: &DRE).getAddress();
1094 LocalDeclMap.erase(Val: VD);
1095 } else {
1096 MasterAddr =
1097 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(D: VD)
1098 : CGM.GetAddrOfGlobal(GD: VD),
1099 CGM.getTypes().ConvertTypeForMem(T: VD->getType()),
1100 getContext().getDeclAlign(D: VD));
1101 }
1102 // Get the address of the threadprivate variable.
1103 Address PrivateAddr = EmitLValue(E: *IRef).getAddress();
1104 if (CopiedVars.size() == 1) {
1105 // At first check if current thread is a master thread. If it is, no
1106 // need to copy data.
1107 CopyBegin = createBasicBlock(name: "copyin.not.master");
1108 CopyEnd = createBasicBlock(name: "copyin.not.master.end");
1109 // TODO: Avoid ptrtoint conversion.
1110 auto *MasterAddrInt = Builder.CreatePtrToInt(
1111 V: MasterAddr.emitRawPointer(CGF&: *this), DestTy: CGM.IntPtrTy);
1112 auto *PrivateAddrInt = Builder.CreatePtrToInt(
1113 V: PrivateAddr.emitRawPointer(CGF&: *this), DestTy: CGM.IntPtrTy);
1114 Builder.CreateCondBr(
1115 Cond: Builder.CreateICmpNE(LHS: MasterAddrInt, RHS: PrivateAddrInt), True: CopyBegin,
1116 False: CopyEnd);
1117 EmitBlock(BB: CopyBegin);
1118 }
1119 const auto *SrcVD =
1120 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ISrcRef)->getDecl());
1121 const auto *DestVD =
1122 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl());
1123 EmitOMPCopy(OriginalType: Type, DestAddr: PrivateAddr, SrcAddr: MasterAddr, DestVD, SrcVD, Copy: AssignOp);
1124 }
1125 ++IRef;
1126 ++ISrcRef;
1127 ++IDestRef;
1128 }
1129 }
1130 if (CopyEnd) {
1131 // Exit out of copying procedure for non-master thread.
1132 EmitBlock(BB: CopyEnd, /*IsFinished=*/true);
1133 return true;
1134 }
1135 return false;
1136}
1137
1138bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1139 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1140 if (!HaveInsertPoint())
1141 return false;
1142 bool HasAtLeastOneLastprivate = false;
1143 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
1144 llvm::DenseSet<const VarDecl *> SIMDLCVs;
1145 if (isOpenMPSimdDirective(DKind: EKind)) {
1146 const auto *LoopDirective = cast<OMPLoopDirective>(Val: &D);
1147 for (const Expr *C : LoopDirective->counters()) {
1148 SIMDLCVs.insert(
1149 V: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: C)->getDecl())->getCanonicalDecl());
1150 }
1151 }
1152 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1153 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1154 HasAtLeastOneLastprivate = true;
1155 if (isOpenMPTaskLoopDirective(DKind: EKind) && !getLangOpts().OpenMPSimd)
1156 break;
1157 const auto *IRef = C->varlist_begin();
1158 const auto *IDestRef = C->destination_exprs().begin();
1159 for (const Expr *IInit : C->private_copies()) {
1160 // Keep the address of the original variable for future update at the end
1161 // of the loop.
1162 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1163 // Taskloops do not require additional initialization, it is done in
1164 // runtime support library.
1165 if (AlreadyEmittedVars.insert(V: OrigVD->getCanonicalDecl()).second) {
1166 const auto *DestVD =
1167 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl());
1168 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1169 /*RefersToEnclosingVariableOrCapture=*/
1170 CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
1171 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1172 PrivateScope.addPrivate(LocalVD: DestVD, Addr: EmitLValue(E: &DRE).getAddress());
1173 // Check if the variable is also a firstprivate: in this case IInit is
1174 // not generated. Initialization of this variable will happen in codegen
1175 // for 'firstprivate' clause.
1176 if (IInit && !SIMDLCVs.count(V: OrigVD->getCanonicalDecl())) {
1177 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl());
1178 Address VDAddr = Address::invalid();
1179 if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1180 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
1181 CGF&: *this, VD: OrigVD);
1182 setAddrOfLocalVar(VD, Addr: VDAddr);
1183 } else {
1184 // Emit private VarDecl with copy init.
1185 EmitDecl(D: *VD);
1186 VDAddr = GetAddrOfLocalVar(VD);
1187 }
1188 bool IsRegistered = PrivateScope.addPrivate(LocalVD: OrigVD, Addr: VDAddr);
1189 assert(IsRegistered &&
1190 "lastprivate var already registered as private");
1191 (void)IsRegistered;
1192 }
1193 }
1194 ++IRef;
1195 ++IDestRef;
1196 }
1197 }
1198 return HasAtLeastOneLastprivate;
1199}
1200
1201void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1202 const OMPExecutableDirective &D, bool NoFinals,
1203 llvm::Value *IsLastIterCond) {
1204 if (!HaveInsertPoint())
1205 return;
1206 // Emit following code:
1207 // if (<IsLastIterCond>) {
1208 // orig_var1 = private_orig_var1;
1209 // ...
1210 // orig_varn = private_orig_varn;
1211 // }
1212 llvm::BasicBlock *ThenBB = nullptr;
1213 llvm::BasicBlock *DoneBB = nullptr;
1214 if (IsLastIterCond) {
1215 // Emit implicit barrier if at least one lastprivate conditional is found
1216 // and this is not a simd mode.
1217 if (!getLangOpts().OpenMPSimd &&
1218 llvm::any_of(Range: D.getClausesOfKind<OMPLastprivateClause>(),
1219 P: [](const OMPLastprivateClause *C) {
1220 return C->getKind() == OMPC_LASTPRIVATE_conditional;
1221 })) {
1222 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: D.getBeginLoc(),
1223 Kind: OMPD_unknown,
1224 /*EmitChecks=*/false,
1225 /*ForceSimpleCall=*/true);
1226 }
1227 ThenBB = createBasicBlock(name: ".omp.lastprivate.then");
1228 DoneBB = createBasicBlock(name: ".omp.lastprivate.done");
1229 Builder.CreateCondBr(Cond: IsLastIterCond, True: ThenBB, False: DoneBB);
1230 EmitBlock(BB: ThenBB);
1231 }
1232 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1233 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1234 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(Val: &D)) {
1235 auto IC = LoopDirective->counters().begin();
1236 for (const Expr *F : LoopDirective->finals()) {
1237 const auto *D =
1238 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IC)->getDecl())->getCanonicalDecl();
1239 if (NoFinals)
1240 AlreadyEmittedVars.insert(V: D);
1241 else
1242 LoopCountersAndUpdates[D] = F;
1243 ++IC;
1244 }
1245 }
1246 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1247 auto IRef = C->varlist_begin();
1248 auto ISrcRef = C->source_exprs().begin();
1249 auto IDestRef = C->destination_exprs().begin();
1250 for (const Expr *AssignOp : C->assignment_ops()) {
1251 const auto *PrivateVD =
1252 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1253 QualType Type = PrivateVD->getType();
1254 const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1255 if (AlreadyEmittedVars.insert(V: CanonicalVD).second) {
1256 // If lastprivate variable is a loop control variable for loop-based
1257 // directive, update its value before copyin back to original
1258 // variable.
1259 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(Val: CanonicalVD))
1260 EmitIgnoredExpr(E: FinalExpr);
1261 const auto *SrcVD =
1262 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ISrcRef)->getDecl());
1263 const auto *DestVD =
1264 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl());
1265 // Get the address of the private variable.
1266 Address PrivateAddr = GetAddrOfLocalVar(VD: PrivateVD);
1267 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1268 PrivateAddr = Address(
1269 Builder.CreateLoad(Addr: PrivateAddr),
1270 CGM.getTypes().ConvertTypeForMem(T: RefTy->getPointeeType()),
1271 CGM.getNaturalTypeAlignment(T: RefTy->getPointeeType()));
1272 // Store the last value to the private copy in the last iteration.
1273 if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1274 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1275 CGF&: *this, PrivLVal: MakeAddrLValue(Addr: PrivateAddr, T: (*IRef)->getType()), VD: PrivateVD,
1276 Loc: (*IRef)->getExprLoc());
1277 // Get the address of the original variable.
1278 Address OriginalAddr = GetAddrOfLocalVar(VD: DestVD);
1279 EmitOMPCopy(OriginalType: Type, DestAddr: OriginalAddr, SrcAddr: PrivateAddr, DestVD, SrcVD, Copy: AssignOp);
1280 }
1281 ++IRef;
1282 ++ISrcRef;
1283 ++IDestRef;
1284 }
1285 if (const Expr *PostUpdate = C->getPostUpdateExpr())
1286 EmitIgnoredExpr(E: PostUpdate);
1287 }
1288 if (IsLastIterCond)
1289 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
1290}
1291
1292void CodeGenFunction::EmitOMPReductionClauseInit(
1293 const OMPExecutableDirective &D,
1294 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1295 if (!HaveInsertPoint())
1296 return;
1297 SmallVector<const Expr *, 4> Shareds;
1298 SmallVector<const Expr *, 4> Privates;
1299 SmallVector<const Expr *, 4> ReductionOps;
1300 SmallVector<const Expr *, 4> LHSs;
1301 SmallVector<const Expr *, 4> RHSs;
1302 OMPTaskDataTy Data;
1303 SmallVector<const Expr *, 4> TaskLHSs;
1304 SmallVector<const Expr *, 4> TaskRHSs;
1305 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1306 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1307 continue;
1308 Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
1309 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
1310 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
1311 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
1312 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
1313 if (C->getModifier() == OMPC_REDUCTION_task) {
1314 Data.ReductionVars.append(in_start: C->privates().begin(), in_end: C->privates().end());
1315 Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
1316 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
1317 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
1318 in_end: C->reduction_ops().end());
1319 TaskLHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
1320 TaskRHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
1321 }
1322 }
1323 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1324 unsigned Count = 0;
1325 auto *ILHS = LHSs.begin();
1326 auto *IRHS = RHSs.begin();
1327 auto *IPriv = Privates.begin();
1328 for (const Expr *IRef : Shareds) {
1329 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IPriv)->getDecl());
1330 // Emit private VarDecl with reduction init.
1331 RedCG.emitSharedOrigLValue(CGF&: *this, N: Count);
1332 RedCG.emitAggregateType(CGF&: *this, N: Count);
1333 AutoVarEmission Emission = EmitAutoVarAlloca(var: *PrivateVD);
1334 RedCG.emitInitialization(CGF&: *this, N: Count, PrivateAddr: Emission.getAllocatedAddress(),
1335 SharedAddr: RedCG.getSharedLValue(N: Count).getAddress(),
1336 DefaultInit: [&Emission](CodeGenFunction &CGF) {
1337 CGF.EmitAutoVarInit(emission: Emission);
1338 return true;
1339 });
1340 EmitAutoVarCleanups(emission: Emission);
1341 Address BaseAddr = RedCG.adjustPrivateAddress(
1342 CGF&: *this, N: Count, PrivateAddr: Emission.getAllocatedAddress());
1343 bool IsRegistered =
1344 PrivateScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Count), Addr: BaseAddr);
1345 assert(IsRegistered && "private var already registered as private");
1346 // Silence the warning about unused variable.
1347 (void)IsRegistered;
1348
1349 const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
1350 const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
1351 QualType Type = PrivateVD->getType();
1352 bool isaOMPArraySectionExpr = isa<ArraySectionExpr>(Val: IRef);
1353 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1354 // Store the address of the original variable associated with the LHS
1355 // implicit variable.
1356 PrivateScope.addPrivate(LocalVD: LHSVD, Addr: RedCG.getSharedLValue(N: Count).getAddress());
1357 PrivateScope.addPrivate(LocalVD: RHSVD, Addr: GetAddrOfLocalVar(VD: PrivateVD));
1358 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1359 isa<ArraySubscriptExpr>(Val: IRef)) {
1360 // Store the address of the original variable associated with the LHS
1361 // implicit variable.
1362 PrivateScope.addPrivate(LocalVD: LHSVD, Addr: RedCG.getSharedLValue(N: Count).getAddress());
1363 PrivateScope.addPrivate(LocalVD: RHSVD,
1364 Addr: GetAddrOfLocalVar(VD: PrivateVD).withElementType(
1365 ElemTy: ConvertTypeForMem(T: RHSVD->getType())));
1366 } else {
1367 QualType Type = PrivateVD->getType();
1368 bool IsArray = getContext().getAsArrayType(T: Type) != nullptr;
1369 Address OriginalAddr = RedCG.getSharedLValue(N: Count).getAddress();
1370 // Store the address of the original variable associated with the LHS
1371 // implicit variable.
1372 if (IsArray) {
1373 OriginalAddr =
1374 OriginalAddr.withElementType(ElemTy: ConvertTypeForMem(T: LHSVD->getType()));
1375 }
1376 PrivateScope.addPrivate(LocalVD: LHSVD, Addr: OriginalAddr);
1377 PrivateScope.addPrivate(
1378 LocalVD: RHSVD, Addr: IsArray ? GetAddrOfLocalVar(VD: PrivateVD).withElementType(
1379 ElemTy: ConvertTypeForMem(T: RHSVD->getType()))
1380 : GetAddrOfLocalVar(VD: PrivateVD));
1381 }
1382 ++ILHS;
1383 ++IRHS;
1384 ++IPriv;
1385 ++Count;
1386 }
1387 if (!Data.ReductionVars.empty()) {
1388 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
1389 Data.IsReductionWithTaskMod = true;
1390 Data.IsWorksharingReduction = isOpenMPWorksharingDirective(DKind: EKind);
1391 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1392 CGF&: *this, Loc: D.getBeginLoc(), LHSExprs: TaskLHSs, RHSExprs: TaskRHSs, Data);
1393 const Expr *TaskRedRef = nullptr;
1394 switch (EKind) {
1395 case OMPD_parallel:
1396 TaskRedRef = cast<OMPParallelDirective>(Val: D).getTaskReductionRefExpr();
1397 break;
1398 case OMPD_for:
1399 TaskRedRef = cast<OMPForDirective>(Val: D).getTaskReductionRefExpr();
1400 break;
1401 case OMPD_sections:
1402 TaskRedRef = cast<OMPSectionsDirective>(Val: D).getTaskReductionRefExpr();
1403 break;
1404 case OMPD_parallel_for:
1405 TaskRedRef = cast<OMPParallelForDirective>(Val: D).getTaskReductionRefExpr();
1406 break;
1407 case OMPD_parallel_master:
1408 TaskRedRef =
1409 cast<OMPParallelMasterDirective>(Val: D).getTaskReductionRefExpr();
1410 break;
1411 case OMPD_parallel_sections:
1412 TaskRedRef =
1413 cast<OMPParallelSectionsDirective>(Val: D).getTaskReductionRefExpr();
1414 break;
1415 case OMPD_target_parallel:
1416 TaskRedRef =
1417 cast<OMPTargetParallelDirective>(Val: D).getTaskReductionRefExpr();
1418 break;
1419 case OMPD_target_parallel_for:
1420 TaskRedRef =
1421 cast<OMPTargetParallelForDirective>(Val: D).getTaskReductionRefExpr();
1422 break;
1423 case OMPD_distribute_parallel_for:
1424 TaskRedRef =
1425 cast<OMPDistributeParallelForDirective>(Val: D).getTaskReductionRefExpr();
1426 break;
1427 case OMPD_teams_distribute_parallel_for:
1428 TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(Val: D)
1429 .getTaskReductionRefExpr();
1430 break;
1431 case OMPD_target_teams_distribute_parallel_for:
1432 TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(Val: D)
1433 .getTaskReductionRefExpr();
1434 break;
1435 case OMPD_simd:
1436 case OMPD_for_simd:
1437 case OMPD_section:
1438 case OMPD_single:
1439 case OMPD_master:
1440 case OMPD_critical:
1441 case OMPD_parallel_for_simd:
1442 case OMPD_task:
1443 case OMPD_taskyield:
1444 case OMPD_error:
1445 case OMPD_barrier:
1446 case OMPD_taskwait:
1447 case OMPD_taskgroup:
1448 case OMPD_flush:
1449 case OMPD_depobj:
1450 case OMPD_scan:
1451 case OMPD_ordered:
1452 case OMPD_atomic:
1453 case OMPD_teams:
1454 case OMPD_target:
1455 case OMPD_cancellation_point:
1456 case OMPD_cancel:
1457 case OMPD_target_data:
1458 case OMPD_target_enter_data:
1459 case OMPD_target_exit_data:
1460 case OMPD_taskloop:
1461 case OMPD_taskloop_simd:
1462 case OMPD_master_taskloop:
1463 case OMPD_master_taskloop_simd:
1464 case OMPD_parallel_master_taskloop:
1465 case OMPD_parallel_master_taskloop_simd:
1466 case OMPD_distribute:
1467 case OMPD_target_update:
1468 case OMPD_distribute_parallel_for_simd:
1469 case OMPD_distribute_simd:
1470 case OMPD_target_parallel_for_simd:
1471 case OMPD_target_simd:
1472 case OMPD_teams_distribute:
1473 case OMPD_teams_distribute_simd:
1474 case OMPD_teams_distribute_parallel_for_simd:
1475 case OMPD_target_teams:
1476 case OMPD_target_teams_distribute:
1477 case OMPD_target_teams_distribute_parallel_for_simd:
1478 case OMPD_target_teams_distribute_simd:
1479 case OMPD_declare_target:
1480 case OMPD_end_declare_target:
1481 case OMPD_threadprivate:
1482 case OMPD_allocate:
1483 case OMPD_declare_reduction:
1484 case OMPD_declare_mapper:
1485 case OMPD_declare_simd:
1486 case OMPD_requires:
1487 case OMPD_declare_variant:
1488 case OMPD_begin_declare_variant:
1489 case OMPD_end_declare_variant:
1490 case OMPD_unknown:
1491 default:
1492 llvm_unreachable("Unexpected directive with task reductions.");
1493 }
1494
1495 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: TaskRedRef)->getDecl());
1496 EmitVarDecl(D: *VD);
1497 EmitStoreOfScalar(Value: ReductionDesc, Addr: GetAddrOfLocalVar(VD),
1498 /*Volatile=*/false, Ty: TaskRedRef->getType());
1499 }
1500}
1501
1502void CodeGenFunction::EmitOMPReductionClauseFinal(
1503 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1504 if (!HaveInsertPoint())
1505 return;
1506 llvm::SmallVector<const Expr *, 8> Privates;
1507 llvm::SmallVector<const Expr *, 8> LHSExprs;
1508 llvm::SmallVector<const Expr *, 8> RHSExprs;
1509 llvm::SmallVector<const Expr *, 8> ReductionOps;
1510 llvm::SmallVector<bool, 8> IsPrivateVarReduction;
1511 bool HasAtLeastOneReduction = false;
1512 bool IsReductionWithTaskMod = false;
1513 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1514 // Do not emit for inscan reductions.
1515 if (C->getModifier() == OMPC_REDUCTION_inscan)
1516 continue;
1517 HasAtLeastOneReduction = true;
1518 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
1519 LHSExprs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
1520 RHSExprs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
1521 IsPrivateVarReduction.append(in_start: C->private_var_reduction_flags().begin(),
1522 in_end: C->private_var_reduction_flags().end());
1523 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
1524 IsReductionWithTaskMod =
1525 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1526 }
1527 if (HasAtLeastOneReduction) {
1528 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
1529 if (IsReductionWithTaskMod) {
1530 CGM.getOpenMPRuntime().emitTaskReductionFini(
1531 CGF&: *this, Loc: D.getBeginLoc(), IsWorksharingReduction: isOpenMPWorksharingDirective(DKind: EKind));
1532 }
1533 bool TeamsLoopCanBeParallel = false;
1534 if (auto *TTLD = dyn_cast<OMPTargetTeamsGenericLoopDirective>(Val: &D))
1535 TeamsLoopCanBeParallel = TTLD->canBeParallelFor();
1536 bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1537 isOpenMPParallelDirective(DKind: EKind) ||
1538 TeamsLoopCanBeParallel || ReductionKind == OMPD_simd;
1539 bool SimpleReduction = ReductionKind == OMPD_simd;
1540 // Emit nowait reduction if nowait clause is present or directive is a
1541 // parallel directive (it always has implicit barrier).
1542 CGM.getOpenMPRuntime().emitReduction(
1543 CGF&: *this, Loc: D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1544 Options: {.WithNowait: WithNowait, .SimpleReduction: SimpleReduction, .IsPrivateVarReduction: IsPrivateVarReduction, .ReductionKind: ReductionKind});
1545 }
1546}
1547
1548static void emitPostUpdateForReductionClause(
1549 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1550 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1551 if (!CGF.HaveInsertPoint())
1552 return;
1553 llvm::BasicBlock *DoneBB = nullptr;
1554 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1555 if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1556 if (!DoneBB) {
1557 if (llvm::Value *Cond = CondGen(CGF)) {
1558 // If the first post-update expression is found, emit conditional
1559 // block if it was requested.
1560 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: ".omp.reduction.pu");
1561 DoneBB = CGF.createBasicBlock(name: ".omp.reduction.pu.done");
1562 CGF.Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB);
1563 CGF.EmitBlock(BB: ThenBB);
1564 }
1565 }
1566 CGF.EmitIgnoredExpr(E: PostUpdate);
1567 }
1568 }
1569 if (DoneBB)
1570 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
1571}
1572
1573namespace {
1574/// Codegen lambda for appending distribute lower and upper bounds to outlined
1575/// parallel function. This is necessary for combined constructs such as
1576/// 'distribute parallel for'
1577typedef llvm::function_ref<void(CodeGenFunction &,
1578 const OMPExecutableDirective &,
1579 llvm::SmallVectorImpl<llvm::Value *> &)>
1580 CodeGenBoundParametersTy;
1581} // anonymous namespace
1582
1583static void
1584checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1585 const OMPExecutableDirective &S) {
1586 if (CGF.getLangOpts().OpenMP < 50)
1587 return;
1588 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1589 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1590 for (const Expr *Ref : C->varlist()) {
1591 if (!Ref->getType()->isScalarType())
1592 continue;
1593 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
1594 if (!DRE)
1595 continue;
1596 PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl()));
1597 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: Ref);
1598 }
1599 }
1600 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1601 for (const Expr *Ref : C->varlist()) {
1602 if (!Ref->getType()->isScalarType())
1603 continue;
1604 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
1605 if (!DRE)
1606 continue;
1607 PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl()));
1608 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: Ref);
1609 }
1610 }
1611 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1612 for (const Expr *Ref : C->varlist()) {
1613 if (!Ref->getType()->isScalarType())
1614 continue;
1615 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
1616 if (!DRE)
1617 continue;
1618 PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl()));
1619 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: Ref);
1620 }
1621 }
1622 // Privates should ne analyzed since they are not captured at all.
1623 // Task reductions may be skipped - tasks are ignored.
1624 // Firstprivates do not return value but may be passed by reference - no need
1625 // to check for updated lastprivate conditional.
1626 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1627 for (const Expr *Ref : C->varlist()) {
1628 if (!Ref->getType()->isScalarType())
1629 continue;
1630 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
1631 if (!DRE)
1632 continue;
1633 PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl()));
1634 }
1635 }
1636 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1637 CGF, D: S, IgnoredDecls: PrivateDecls);
1638}
1639
1640static void emitCommonOMPParallelDirective(
1641 CodeGenFunction &CGF, const OMPExecutableDirective &S,
1642 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1643 const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1644 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_parallel);
1645 llvm::Value *NumThreads = nullptr;
1646 OpenMPNumThreadsClauseModifier Modifier = OMPC_NUMTHREADS_unknown;
1647 // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is as
1648 // if sev-level is fatal."
1649 OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal;
1650 clang::Expr *Message = nullptr;
1651 SourceLocation SeverityLoc = SourceLocation();
1652 SourceLocation MessageLoc = SourceLocation();
1653
1654 llvm::Function *OutlinedFn =
1655 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1656 CGF, D: S, ThreadIDVar: *CS->getCapturedDecl()->param_begin(), InnermostKind,
1657 CodeGen);
1658
1659 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1660 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1661 NumThreads = CGF.EmitScalarExpr(E: NumThreadsClause->getNumThreads(),
1662 /*IgnoreResultAssign=*/true);
1663 Modifier = NumThreadsClause->getModifier();
1664 if (const auto *MessageClause = S.getSingleClause<OMPMessageClause>()) {
1665 Message = MessageClause->getMessageString();
1666 MessageLoc = MessageClause->getBeginLoc();
1667 }
1668 if (const auto *SeverityClause = S.getSingleClause<OMPSeverityClause>()) {
1669 Severity = SeverityClause->getSeverityKind();
1670 SeverityLoc = SeverityClause->getBeginLoc();
1671 }
1672 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1673 CGF, NumThreads, Loc: NumThreadsClause->getBeginLoc(), Modifier, Severity,
1674 SeverityLoc, Message, MessageLoc);
1675 }
1676 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1677 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1678 CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1679 CGF, ProcBind: ProcBindClause->getProcBindKind(), Loc: ProcBindClause->getBeginLoc());
1680 }
1681 const Expr *IfCond = nullptr;
1682 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1683 if (C->getNameModifier() == OMPD_unknown ||
1684 C->getNameModifier() == OMPD_parallel) {
1685 IfCond = C->getCondition();
1686 break;
1687 }
1688 }
1689
1690 OMPParallelScope Scope(CGF, S);
1691 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1692 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1693 // lower and upper bounds with the pragma 'for' chunking mechanism.
1694 // The following lambda takes care of appending the lower and upper bound
1695 // parameters when necessary
1696 CodeGenBoundParameters(CGF, S, CapturedVars);
1697 CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
1698 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, Loc: S.getBeginLoc(), OutlinedFn,
1699 CapturedVars, IfCond, NumThreads,
1700 NumThreadsModifier: Modifier, Severity, Message);
1701}
1702
1703static bool isAllocatableDecl(const VarDecl *VD) {
1704 const VarDecl *CVD = VD->getCanonicalDecl();
1705 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1706 return false;
1707 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1708 // Use the default allocation.
1709 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1710 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1711 !AA->getAllocator());
1712}
1713
1714static void emitEmptyBoundParameters(CodeGenFunction &,
1715 const OMPExecutableDirective &,
1716 llvm::SmallVectorImpl<llvm::Value *> &) {}
1717
1718static void emitOMPCopyinClause(CodeGenFunction &CGF,
1719 const OMPExecutableDirective &S) {
1720 bool Copyins = CGF.EmitOMPCopyinClause(D: S);
1721 if (Copyins) {
1722 // Emit implicit barrier to synchronize threads and avoid data races on
1723 // propagation master's thread values of threadprivate variables to local
1724 // instances of that variables of all other implicit threads.
1725 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1726 CGF, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
1727 /*ForceSimpleCall=*/true);
1728 }
1729}
1730
1731Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1732 CodeGenFunction &CGF, const VarDecl *VD) {
1733 CodeGenModule &CGM = CGF.CGM;
1734 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1735
1736 if (!VD)
1737 return Address::invalid();
1738 const VarDecl *CVD = VD->getCanonicalDecl();
1739 if (!isAllocatableDecl(VD: CVD))
1740 return Address::invalid();
1741 llvm::Value *Size;
1742 CharUnits Align = CGM.getContext().getDeclAlign(D: CVD);
1743 if (CVD->getType()->isVariablyModifiedType()) {
1744 Size = CGF.getTypeSize(Ty: CVD->getType());
1745 // Align the size: ((size + align - 1) / align) * align
1746 Size = CGF.Builder.CreateNUWAdd(
1747 LHS: Size, RHS: CGM.getSize(numChars: Align - CharUnits::fromQuantity(Quantity: 1)));
1748 Size = CGF.Builder.CreateUDiv(LHS: Size, RHS: CGM.getSize(numChars: Align));
1749 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: Align));
1750 } else {
1751 CharUnits Sz = CGM.getContext().getTypeSizeInChars(T: CVD->getType());
1752 Size = CGM.getSize(numChars: Sz.alignTo(Align));
1753 }
1754
1755 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1756 assert(AA->getAllocator() &&
1757 "Expected allocator expression for non-default allocator.");
1758 llvm::Value *Allocator = CGF.EmitScalarExpr(E: AA->getAllocator());
1759 // According to the standard, the original allocator type is a enum (integer).
1760 // Convert to pointer type, if required.
1761 if (Allocator->getType()->isIntegerTy())
1762 Allocator = CGF.Builder.CreateIntToPtr(V: Allocator, DestTy: CGM.VoidPtrTy);
1763 else if (Allocator->getType()->isPointerTy())
1764 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: Allocator,
1765 DestTy: CGM.VoidPtrTy);
1766
1767 llvm::Value *Addr = OMPBuilder.createOMPAlloc(
1768 Loc: CGF.Builder, Size, Allocator,
1769 Name: getNameWithSeparators(Parts: {CVD->getName(), ".void.addr"}, FirstSeparator: ".", Separator: "."));
1770 llvm::CallInst *FreeCI =
1771 OMPBuilder.createOMPFree(Loc: CGF.Builder, Addr, Allocator);
1772
1773 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(Kind: NormalAndEHCleanup, A: FreeCI);
1774 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1775 V: Addr,
1776 DestTy: CGF.ConvertTypeForMem(T: CGM.getContext().getPointerType(T: CVD->getType())),
1777 Name: getNameWithSeparators(Parts: {CVD->getName(), ".addr"}, FirstSeparator: ".", Separator: "."));
1778 return Address(Addr, CGF.ConvertTypeForMem(T: CVD->getType()), Align);
1779}
1780
1781Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1782 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1783 SourceLocation Loc) {
1784 CodeGenModule &CGM = CGF.CGM;
1785 if (CGM.getLangOpts().OpenMPUseTLS &&
1786 CGM.getContext().getTargetInfo().isTLSSupported())
1787 return VDAddr;
1788
1789 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1790
1791 llvm::Type *VarTy = VDAddr.getElementType();
1792 llvm::Value *Data =
1793 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.Int8PtrTy);
1794 llvm::ConstantInt *Size = CGM.getSize(numChars: CGM.GetTargetTypeStoreSize(Ty: VarTy));
1795 std::string Suffix = getNameWithSeparators(Parts: {"cache", ""});
1796 llvm::Twine CacheName = Twine(CGM.getMangledName(GD: VD)).concat(Suffix);
1797
1798 llvm::CallInst *ThreadPrivateCacheCall =
1799 OMPBuilder.createCachedThreadPrivate(Loc: CGF.Builder, Pointer: Data, Size, Name: CacheName);
1800
1801 return Address(ThreadPrivateCacheCall, CGM.Int8Ty, VDAddr.getAlignment());
1802}
1803
1804std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1805 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1806 SmallString<128> Buffer;
1807 llvm::raw_svector_ostream OS(Buffer);
1808 StringRef Sep = FirstSeparator;
1809 for (StringRef Part : Parts) {
1810 OS << Sep << Part;
1811 Sep = Separator;
1812 }
1813 return OS.str().str();
1814}
1815
1816void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
1817 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
1818 InsertPointTy CodeGenIP, Twine RegionName) {
1819 CGBuilderTy &Builder = CGF.Builder;
1820 Builder.restoreIP(IP: CodeGenIP);
1821 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
1822 Suffix: "." + RegionName + ".after");
1823
1824 {
1825 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
1826 CGF.EmitStmt(S: RegionBodyStmt);
1827 }
1828
1829 if (Builder.saveIP().isSet())
1830 Builder.CreateBr(Dest: FiniBB);
1831}
1832
1833void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1834 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
1835 InsertPointTy CodeGenIP, Twine RegionName) {
1836 CGBuilderTy &Builder = CGF.Builder;
1837 Builder.restoreIP(IP: CodeGenIP);
1838 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
1839 Suffix: "." + RegionName + ".after");
1840
1841 {
1842 OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
1843 CGF.EmitStmt(S: RegionBodyStmt);
1844 }
1845
1846 if (Builder.saveIP().isSet())
1847 Builder.CreateBr(Dest: FiniBB);
1848}
1849
1850void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1851 if (CGM.getLangOpts().OpenMPIRBuilder) {
1852 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1853 // Check if we have any if clause associated with the directive.
1854 llvm::Value *IfCond = nullptr;
1855 if (const auto *C = S.getSingleClause<OMPIfClause>())
1856 IfCond = EmitScalarExpr(E: C->getCondition(),
1857 /*IgnoreResultAssign=*/true);
1858
1859 llvm::Value *NumThreads = nullptr;
1860 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1861 NumThreads = EmitScalarExpr(E: NumThreadsClause->getNumThreads(),
1862 /*IgnoreResultAssign=*/true);
1863
1864 ProcBindKind ProcBind = OMP_PROC_BIND_default;
1865 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1866 ProcBind = ProcBindClause->getProcBindKind();
1867
1868 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1869
1870 // The cleanup callback that finalizes all variables at the given location,
1871 // thus calls destructors etc.
1872 auto FiniCB = [this](InsertPointTy IP) {
1873 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
1874 return llvm::Error::success();
1875 };
1876
1877 // Privatization callback that performs appropriate action for
1878 // shared/private/firstprivate/lastprivate/copyin/... variables.
1879 //
1880 // TODO: This defaults to shared right now.
1881 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1882 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
1883 // The next line is appropriate only for variables (Val) with the
1884 // data-sharing attribute "shared".
1885 ReplVal = &Val;
1886
1887 return CodeGenIP;
1888 };
1889
1890 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_parallel);
1891 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1892
1893 auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
1894 InsertPointTy CodeGenIP) {
1895 OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1896 CGF&: *this, RegionBodyStmt: ParallelRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "parallel");
1897 return llvm::Error::success();
1898 };
1899
1900 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1901 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1902 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
1903 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
1904 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
1905 ValOrErr: OMPBuilder.createParallel(Loc: Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1906 IfCondition: IfCond, NumThreads, ProcBind, IsCancellable: S.hasCancel()));
1907 Builder.restoreIP(IP: AfterIP);
1908 return;
1909 }
1910
1911 // Emit parallel region as a standalone region.
1912 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1913 Action.Enter(CGF);
1914 OMPPrivateScope PrivateScope(CGF);
1915 emitOMPCopyinClause(CGF, S);
1916 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
1917 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
1918 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
1919 (void)PrivateScope.Privatize();
1920 CGF.EmitStmt(S: S.getCapturedStmt(RegionKind: OMPD_parallel)->getCapturedStmt());
1921 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
1922 };
1923 {
1924 auto LPCRegion =
1925 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
1926 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_parallel, CodeGen,
1927 CodeGenBoundParameters: emitEmptyBoundParameters);
1928 emitPostUpdateForReductionClause(CGF&: *this, D: S,
1929 CondGen: [](CodeGenFunction &) { return nullptr; });
1930 }
1931 // Check for outer lastprivate conditional update.
1932 checkForLastprivateConditionalUpdate(CGF&: *this, S);
1933}
1934
1935void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) {
1936 EmitStmt(S: S.getIfStmt());
1937}
1938
1939namespace {
1940/// RAII to handle scopes for loop transformation directives.
1941class OMPTransformDirectiveScopeRAII {
1942 OMPLoopScope *Scope = nullptr;
1943 CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr;
1944 CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
1945
1946 OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII &) =
1947 delete;
1948 OMPTransformDirectiveScopeRAII &
1949 operator=(const OMPTransformDirectiveScopeRAII &) = delete;
1950
1951public:
1952 OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
1953 if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(Val: S)) {
1954 Scope = new OMPLoopScope(CGF, *Dir);
1955 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
1956 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
1957 } else if (const auto *Dir =
1958 dyn_cast<OMPCanonicalLoopSequenceTransformationDirective>(
1959 Val: S)) {
1960 // For simplicity we reuse the loop scope similarly to what we do with
1961 // OMPCanonicalLoopNestTransformationDirective do by being a subclass
1962 // of OMPLoopBasedDirective.
1963 Scope = new OMPLoopScope(CGF, *Dir);
1964 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
1965 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
1966 }
1967 }
1968 ~OMPTransformDirectiveScopeRAII() {
1969 if (!Scope)
1970 return;
1971 delete CapInfoRAII;
1972 delete CGSI;
1973 delete Scope;
1974 }
1975};
1976} // namespace
1977
1978static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1979 int MaxLevel, int Level = 0) {
1980 assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1981 const Stmt *SimplifiedS = S->IgnoreContainers();
1982 if (const auto *CS = dyn_cast<CompoundStmt>(Val: SimplifiedS)) {
1983 PrettyStackTraceLoc CrashInfo(
1984 CGF.getContext().getSourceManager(), CS->getLBracLoc(),
1985 "LLVM IR generation of compound statement ('{}')");
1986
1987 // Keep track of the current cleanup stack depth, including debug scopes.
1988 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
1989 for (const Stmt *CurStmt : CS->body())
1990 emitBody(CGF, S: CurStmt, NextLoop, MaxLevel, Level);
1991 return;
1992 }
1993 if (SimplifiedS == NextLoop) {
1994 if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(Val: SimplifiedS))
1995 SimplifiedS = Dir->getTransformedStmt();
1996 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(Val: SimplifiedS))
1997 SimplifiedS = CanonLoop->getLoopStmt();
1998 if (const auto *For = dyn_cast<ForStmt>(Val: SimplifiedS)) {
1999 S = For->getBody();
2000 } else {
2001 assert(isa<CXXForRangeStmt>(SimplifiedS) &&
2002 "Expected canonical for loop or range-based for loop.");
2003 const auto *CXXFor = cast<CXXForRangeStmt>(Val: SimplifiedS);
2004 CGF.EmitStmt(S: CXXFor->getLoopVarStmt());
2005 S = CXXFor->getBody();
2006 }
2007 if (Level + 1 < MaxLevel) {
2008 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
2009 CurStmt: S, /*TryImperfectlyNestedLoops=*/true);
2010 emitBody(CGF, S, NextLoop, MaxLevel, Level: Level + 1);
2011 return;
2012 }
2013 }
2014 CGF.EmitStmt(S);
2015}
2016
2017void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
2018 JumpDest LoopExit) {
2019 RunCleanupsScope BodyScope(*this);
2020 // Update counters values on current iteration.
2021 for (const Expr *UE : D.updates())
2022 EmitIgnoredExpr(E: UE);
2023 // Update the linear variables.
2024 // In distribute directives only loop counters may be marked as linear, no
2025 // need to generate the code for them.
2026 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
2027 if (!isOpenMPDistributeDirective(DKind: EKind)) {
2028 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2029 for (const Expr *UE : C->updates())
2030 EmitIgnoredExpr(E: UE);
2031 }
2032 }
2033
2034 // On a continue in the body, jump to the end.
2035 JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.body.continue");
2036 BreakContinueStack.push_back(Elt: BreakContinue(D, LoopExit, Continue));
2037 for (const Expr *E : D.finals_conditions()) {
2038 if (!E)
2039 continue;
2040 // Check that loop counter in non-rectangular nest fits into the iteration
2041 // space.
2042 llvm::BasicBlock *NextBB = createBasicBlock(name: "omp.body.next");
2043 EmitBranchOnBoolExpr(Cond: E, TrueBlock: NextBB, FalseBlock: Continue.getBlock(),
2044 TrueCount: getProfileCount(S: D.getBody()));
2045 EmitBlock(BB: NextBB);
2046 }
2047
2048 OMPPrivateScope InscanScope(*this);
2049 EmitOMPReductionClauseInit(D, PrivateScope&: InscanScope, /*ForInscan=*/true);
2050 bool IsInscanRegion = InscanScope.Privatize();
2051 if (IsInscanRegion) {
2052 // Need to remember the block before and after scan directive
2053 // to dispatch them correctly depending on the clause used in
2054 // this directive, inclusive or exclusive. For inclusive scan the natural
2055 // order of the blocks is used, for exclusive clause the blocks must be
2056 // executed in reverse order.
2057 OMPBeforeScanBlock = createBasicBlock(name: "omp.before.scan.bb");
2058 OMPAfterScanBlock = createBasicBlock(name: "omp.after.scan.bb");
2059 // No need to allocate inscan exit block, in simd mode it is selected in the
2060 // codegen for the scan directive.
2061 if (EKind != OMPD_simd && !getLangOpts().OpenMPSimd)
2062 OMPScanExitBlock = createBasicBlock(name: "omp.exit.inscan.bb");
2063 OMPScanDispatch = createBasicBlock(name: "omp.inscan.dispatch");
2064 EmitBranch(Block: OMPScanDispatch);
2065 EmitBlock(BB: OMPBeforeScanBlock);
2066 }
2067
2068 // Emit loop variables for C++ range loops.
2069 const Stmt *Body =
2070 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
2071 // Emit loop body.
2072 emitBody(CGF&: *this, S: Body,
2073 NextLoop: OMPLoopBasedDirective::tryToFindNextInnerLoop(
2074 CurStmt: Body, /*TryImperfectlyNestedLoops=*/true),
2075 MaxLevel: D.getLoopsNumber());
2076
2077 // Jump to the dispatcher at the end of the loop body.
2078 if (IsInscanRegion)
2079 EmitBranch(Block: OMPScanExitBlock);
2080
2081 // The end (updates/cleanups).
2082 EmitBlock(BB: Continue.getBlock());
2083 BreakContinueStack.pop_back();
2084}
2085
2086using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
2087
2088/// Emit a captured statement and return the function as well as its captured
2089/// closure context.
2090static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF,
2091 const CapturedStmt *S) {
2092 LValue CapStruct = ParentCGF.InitCapturedStruct(S: *S);
2093 CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
2094 std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
2095 std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(args: *S);
2096 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
2097 llvm::Function *F = CGF.GenerateCapturedStmtFunction(S: *S);
2098
2099 return {F, CapStruct.getPointer(CGF&: ParentCGF)};
2100}
2101
2102/// Emit a call to a previously captured closure.
2103static llvm::CallInst *
2104emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap,
2105 llvm::ArrayRef<llvm::Value *> Args) {
2106 // Append the closure context to the argument.
2107 SmallVector<llvm::Value *> EffectiveArgs;
2108 EffectiveArgs.reserve(N: Args.size() + 1);
2109 llvm::append_range(C&: EffectiveArgs, R&: Args);
2110 EffectiveArgs.push_back(Elt: Cap.second);
2111
2112 return ParentCGF.Builder.CreateCall(Callee: Cap.first, Args: EffectiveArgs);
2113}
2114
2115llvm::CanonicalLoopInfo *
2116CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
2117 assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
2118
2119 // The caller is processing the loop-associated directive processing the \p
2120 // Depth loops nested in \p S. Put the previous pending loop-associated
2121 // directive to the stack. If the current loop-associated directive is a loop
2122 // transformation directive, it will push its generated loops onto the stack
2123 // such that together with the loops left here they form the combined loop
2124 // nest for the parent loop-associated directive.
2125 int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth;
2126 ExpectedOMPLoopDepth = Depth;
2127
2128 EmitStmt(S);
2129 assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
2130
2131 // The last added loop is the outermost one.
2132 llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back();
2133
2134 // Pop the \p Depth loops requested by the call from that stack and restore
2135 // the previous context.
2136 OMPLoopNestStack.pop_back_n(NumItems: Depth);
2137 ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth;
2138
2139 return Result;
2140}
2141
2142void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
2143 const Stmt *SyntacticalLoop = S->getLoopStmt();
2144 if (!getLangOpts().OpenMPIRBuilder) {
2145 // Ignore if OpenMPIRBuilder is not enabled.
2146 EmitStmt(S: SyntacticalLoop);
2147 return;
2148 }
2149
2150 LexicalScope ForScope(*this, S->getSourceRange());
2151
2152 // Emit init statements. The Distance/LoopVar funcs may reference variable
2153 // declarations they contain.
2154 const Stmt *BodyStmt;
2155 if (const auto *For = dyn_cast<ForStmt>(Val: SyntacticalLoop)) {
2156 if (const Stmt *InitStmt = For->getInit())
2157 EmitStmt(S: InitStmt);
2158 BodyStmt = For->getBody();
2159 } else if (const auto *RangeFor =
2160 dyn_cast<CXXForRangeStmt>(Val: SyntacticalLoop)) {
2161 if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
2162 EmitStmt(S: RangeStmt);
2163 if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
2164 EmitStmt(S: BeginStmt);
2165 if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
2166 EmitStmt(S: EndStmt);
2167 if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
2168 EmitStmt(S: LoopVarStmt);
2169 BodyStmt = RangeFor->getBody();
2170 } else
2171 llvm_unreachable("Expected for-stmt or range-based for-stmt");
2172
2173 // Emit closure for later use. By-value captures will be captured here.
2174 const CapturedStmt *DistanceFunc = S->getDistanceFunc();
2175 EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(ParentCGF&: *this, S: DistanceFunc);
2176 const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
2177 EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(ParentCGF&: *this, S: LoopVarFunc);
2178
2179 // Call the distance function to get the number of iterations of the loop to
2180 // come.
2181 QualType LogicalTy = DistanceFunc->getCapturedDecl()
2182 ->getParam(i: 0)
2183 ->getType()
2184 .getNonReferenceType();
2185 RawAddress CountAddr = CreateMemTemp(T: LogicalTy, Name: ".count.addr");
2186 emitCapturedStmtCall(ParentCGF&: *this, Cap: DistanceClosure, Args: {CountAddr.getPointer()});
2187 llvm::Value *DistVal = Builder.CreateLoad(Addr: CountAddr, Name: ".count");
2188
2189 // Emit the loop structure.
2190 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2191 auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
2192 llvm::Value *IndVar) {
2193 Builder.restoreIP(IP: CodeGenIP);
2194
2195 // Emit the loop body: Convert the logical iteration number to the loop
2196 // variable and emit the body.
2197 const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
2198 LValue LCVal = EmitLValue(E: LoopVarRef);
2199 Address LoopVarAddress = LCVal.getAddress();
2200 emitCapturedStmtCall(ParentCGF&: *this, Cap: LoopVarClosure,
2201 Args: {LoopVarAddress.emitRawPointer(CGF&: *this), IndVar});
2202
2203 RunCleanupsScope BodyScope(*this);
2204 EmitStmt(S: BodyStmt);
2205 return llvm::Error::success();
2206 };
2207
2208 llvm::CanonicalLoopInfo *CL =
2209 cantFail(ValOrErr: OMPBuilder.createCanonicalLoop(Loc: Builder, BodyGenCB: BodyGen, TripCount: DistVal));
2210
2211 // Finish up the loop.
2212 Builder.restoreIP(IP: CL->getAfterIP());
2213 ForScope.ForceCleanup();
2214
2215 // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2216 OMPLoopNestStack.push_back(Elt: CL);
2217}
2218
2219void CodeGenFunction::EmitOMPInnerLoop(
2220 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
2221 const Expr *IncExpr,
2222 const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
2223 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
2224 auto LoopExit = getJumpDestInCurrentScope(Name: "omp.inner.for.end");
2225
2226 // Start the loop with a block that tests the condition.
2227 auto CondBlock = createBasicBlock(name: "omp.inner.for.cond");
2228 EmitBlock(BB: CondBlock);
2229 const SourceRange R = S.getSourceRange();
2230
2231 // If attributes are attached, push to the basic block with them.
2232 const auto &OMPED = cast<OMPExecutableDirective>(Val: S);
2233 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
2234 const Stmt *SS = ICS->getCapturedStmt();
2235 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(Val: SS);
2236 OMPLoopNestStack.clear();
2237 if (AS)
2238 LoopStack.push(Header: CondBlock, Ctx&: CGM.getContext(), CGOpts: CGM.getCodeGenOpts(),
2239 Attrs: AS->getAttrs(), StartLoc: SourceLocToDebugLoc(Location: R.getBegin()),
2240 EndLoc: SourceLocToDebugLoc(Location: R.getEnd()));
2241 else
2242 LoopStack.push(Header: CondBlock, StartLoc: SourceLocToDebugLoc(Location: R.getBegin()),
2243 EndLoc: SourceLocToDebugLoc(Location: R.getEnd()));
2244
2245 // If there are any cleanups between here and the loop-exit scope,
2246 // create a block to stage a loop exit along.
2247 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2248 if (RequiresCleanup)
2249 ExitBlock = createBasicBlock(name: "omp.inner.for.cond.cleanup");
2250
2251 llvm::BasicBlock *LoopBody = createBasicBlock(name: "omp.inner.for.body");
2252
2253 // Emit condition.
2254 EmitBranchOnBoolExpr(Cond: LoopCond, TrueBlock: LoopBody, FalseBlock: ExitBlock, TrueCount: getProfileCount(S: &S));
2255 if (ExitBlock != LoopExit.getBlock()) {
2256 EmitBlock(BB: ExitBlock);
2257 EmitBranchThroughCleanup(Dest: LoopExit);
2258 }
2259
2260 EmitBlock(BB: LoopBody);
2261 incrementProfileCounter(S: &S);
2262
2263 // Create a block for the increment.
2264 JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.inner.for.inc");
2265 BreakContinueStack.push_back(Elt: BreakContinue(S, LoopExit, Continue));
2266
2267 BodyGen(*this);
2268
2269 // Emit "IV = IV + 1" and a back-edge to the condition block.
2270 EmitBlock(BB: Continue.getBlock());
2271 EmitIgnoredExpr(E: IncExpr);
2272 PostIncGen(*this);
2273 BreakContinueStack.pop_back();
2274 EmitBranch(Block: CondBlock);
2275 LoopStack.pop();
2276 // Emit the fall-through block.
2277 EmitBlock(BB: LoopExit.getBlock());
2278}
2279
2280bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
2281 if (!HaveInsertPoint())
2282 return false;
2283 // Emit inits for the linear variables.
2284 bool HasLinears = false;
2285 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2286 for (const Expr *Init : C->inits()) {
2287 HasLinears = true;
2288 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Init)->getDecl());
2289 if (const auto *Ref =
2290 dyn_cast<DeclRefExpr>(Val: VD->getInit()->IgnoreImpCasts())) {
2291 AutoVarEmission Emission = EmitAutoVarAlloca(var: *VD);
2292 const auto *OrigVD = cast<VarDecl>(Val: Ref->getDecl());
2293 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2294 CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
2295 VD->getInit()->getType(), VK_LValue,
2296 VD->getInit()->getExprLoc());
2297 EmitExprAsInit(
2298 init: &DRE, D: VD,
2299 lvalue: MakeAddrLValue(Addr: Emission.getAllocatedAddress(), T: VD->getType()),
2300 /*capturedByInit=*/false);
2301 EmitAutoVarCleanups(emission: Emission);
2302 } else {
2303 EmitVarDecl(D: *VD);
2304 }
2305 }
2306 // Emit the linear steps for the linear clauses.
2307 // If a step is not constant, it is pre-calculated before the loop.
2308 if (const auto *CS = cast_or_null<BinaryOperator>(Val: C->getCalcStep()))
2309 if (const auto *SaveRef = cast<DeclRefExpr>(Val: CS->getLHS())) {
2310 EmitVarDecl(D: *cast<VarDecl>(Val: SaveRef->getDecl()));
2311 // Emit calculation of the linear step.
2312 EmitIgnoredExpr(E: CS);
2313 }
2314 }
2315 return HasLinears;
2316}
2317
2318void CodeGenFunction::EmitOMPLinearClauseFinal(
2319 const OMPLoopDirective &D,
2320 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2321 if (!HaveInsertPoint())
2322 return;
2323 llvm::BasicBlock *DoneBB = nullptr;
2324 // Emit the final values of the linear variables.
2325 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2326 auto IC = C->varlist_begin();
2327 for (const Expr *F : C->finals()) {
2328 if (!DoneBB) {
2329 if (llvm::Value *Cond = CondGen(*this)) {
2330 // If the first post-update expression is found, emit conditional
2331 // block if it was requested.
2332 llvm::BasicBlock *ThenBB = createBasicBlock(name: ".omp.linear.pu");
2333 DoneBB = createBasicBlock(name: ".omp.linear.pu.done");
2334 Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB);
2335 EmitBlock(BB: ThenBB);
2336 }
2337 }
2338 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IC)->getDecl());
2339 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2340 CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
2341 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
2342 Address OrigAddr = EmitLValue(E: &DRE).getAddress();
2343 CodeGenFunction::OMPPrivateScope VarScope(*this);
2344 VarScope.addPrivate(LocalVD: OrigVD, Addr: OrigAddr);
2345 (void)VarScope.Privatize();
2346 EmitIgnoredExpr(E: F);
2347 ++IC;
2348 }
2349 if (const Expr *PostUpdate = C->getPostUpdateExpr())
2350 EmitIgnoredExpr(E: PostUpdate);
2351 }
2352 if (DoneBB)
2353 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
2354}
2355
2356static void emitAlignedClause(CodeGenFunction &CGF,
2357 const OMPExecutableDirective &D) {
2358 if (!CGF.HaveInsertPoint())
2359 return;
2360 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2361 llvm::APInt ClauseAlignment(64, 0);
2362 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2363 auto *AlignmentCI =
2364 cast<llvm::ConstantInt>(Val: CGF.EmitScalarExpr(E: AlignmentExpr));
2365 ClauseAlignment = AlignmentCI->getValue();
2366 }
2367 for (const Expr *E : Clause->varlist()) {
2368 llvm::APInt Alignment(ClauseAlignment);
2369 if (Alignment == 0) {
2370 // OpenMP [2.8.1, Description]
2371 // If no optional parameter is specified, implementation-defined default
2372 // alignments for SIMD instructions on the target platforms are assumed.
2373 Alignment =
2374 CGF.getContext()
2375 .toCharUnitsFromBits(BitSize: CGF.getContext().getOpenMPDefaultSimdAlign(
2376 T: E->getType()->getPointeeType()))
2377 .getQuantity();
2378 }
2379 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2380 "alignment is not power of 2");
2381 if (Alignment != 0) {
2382 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2383 CGF.emitAlignmentAssumption(
2384 PtrValue, E, /*No second loc needed*/ AssumptionLoc: SourceLocation(),
2385 Alignment: llvm::ConstantInt::get(Context&: CGF.getLLVMContext(), V: Alignment));
2386 }
2387 }
2388 }
2389}
2390
2391void CodeGenFunction::EmitOMPPrivateLoopCounters(
2392 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2393 if (!HaveInsertPoint())
2394 return;
2395 auto I = S.private_counters().begin();
2396 for (const Expr *E : S.counters()) {
2397 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
2398 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl());
2399 // Emit var without initialization.
2400 AutoVarEmission VarEmission = EmitAutoVarAlloca(var: *PrivateVD);
2401 EmitAutoVarCleanups(emission: VarEmission);
2402 LocalDeclMap.erase(Val: PrivateVD);
2403 (void)LoopScope.addPrivate(LocalVD: VD, Addr: VarEmission.getAllocatedAddress());
2404 if (LocalDeclMap.count(Val: VD) || CapturedStmtInfo->lookup(VD) ||
2405 VD->hasGlobalStorage()) {
2406 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2407 LocalDeclMap.count(Val: VD) || CapturedStmtInfo->lookup(VD),
2408 E->getType(), VK_LValue, E->getExprLoc());
2409 (void)LoopScope.addPrivate(LocalVD: PrivateVD, Addr: EmitLValue(E: &DRE).getAddress());
2410 } else {
2411 (void)LoopScope.addPrivate(LocalVD: PrivateVD, Addr: VarEmission.getAllocatedAddress());
2412 }
2413 ++I;
2414 }
2415 // Privatize extra loop counters used in loops for ordered(n) clauses.
2416 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2417 if (!C->getNumForLoops())
2418 continue;
2419 for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
2420 I < E; ++I) {
2421 const auto *DRE = cast<DeclRefExpr>(Val: C->getLoopCounter(NumLoop: I));
2422 const auto *VD = cast<VarDecl>(Val: DRE->getDecl());
2423 // Override only those variables that can be captured to avoid re-emission
2424 // of the variables declared within the loops.
2425 if (DRE->refersToEnclosingVariableOrCapture()) {
2426 (void)LoopScope.addPrivate(
2427 LocalVD: VD, Addr: CreateMemTemp(T: DRE->getType(), Name: VD->getName()));
2428 }
2429 }
2430 }
2431}
2432
2433static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2434 const Expr *Cond, llvm::BasicBlock *TrueBlock,
2435 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2436 if (!CGF.HaveInsertPoint())
2437 return;
2438 {
2439 CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2440 CGF.EmitOMPPrivateLoopCounters(S, LoopScope&: PreCondScope);
2441 (void)PreCondScope.Privatize();
2442 // Get initial values of real counters.
2443 for (const Expr *I : S.inits()) {
2444 CGF.EmitIgnoredExpr(E: I);
2445 }
2446 }
2447 // Create temp loop control variables with their init values to support
2448 // non-rectangular loops.
2449 CodeGenFunction::OMPMapVars PreCondVars;
2450 for (const Expr *E : S.dependent_counters()) {
2451 if (!E)
2452 continue;
2453 assert(!E->getType().getNonReferenceType()->isRecordType() &&
2454 "dependent counter must not be an iterator.");
2455 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
2456 Address CounterAddr =
2457 CGF.CreateMemTemp(T: VD->getType().getNonReferenceType());
2458 (void)PreCondVars.setVarAddr(CGF, LocalVD: VD, TempAddr: CounterAddr);
2459 }
2460 (void)PreCondVars.apply(CGF);
2461 for (const Expr *E : S.dependent_inits()) {
2462 if (!E)
2463 continue;
2464 CGF.EmitIgnoredExpr(E);
2465 }
2466 // Check that loop is executed at least one time.
2467 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2468 PreCondVars.restore(CGF);
2469}
2470
2471void CodeGenFunction::EmitOMPLinearClause(
2472 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2473 if (!HaveInsertPoint())
2474 return;
2475 llvm::DenseSet<const VarDecl *> SIMDLCVs;
2476 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
2477 if (isOpenMPSimdDirective(DKind: EKind)) {
2478 const auto *LoopDirective = cast<OMPLoopDirective>(Val: &D);
2479 for (const Expr *C : LoopDirective->counters()) {
2480 SIMDLCVs.insert(
2481 V: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: C)->getDecl())->getCanonicalDecl());
2482 }
2483 }
2484 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2485 auto CurPrivate = C->privates().begin();
2486 for (const Expr *E : C->varlist()) {
2487 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
2488 const auto *PrivateVD =
2489 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *CurPrivate)->getDecl());
2490 if (!SIMDLCVs.count(V: VD->getCanonicalDecl())) {
2491 // Emit private VarDecl with copy init.
2492 EmitVarDecl(D: *PrivateVD);
2493 bool IsRegistered =
2494 PrivateScope.addPrivate(LocalVD: VD, Addr: GetAddrOfLocalVar(VD: PrivateVD));
2495 assert(IsRegistered && "linear var already registered as private");
2496 // Silence the warning about unused variable.
2497 (void)IsRegistered;
2498 } else {
2499 EmitVarDecl(D: *PrivateVD);
2500 }
2501 ++CurPrivate;
2502 }
2503 }
2504}
2505
2506static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
2507 const OMPExecutableDirective &D) {
2508 if (!CGF.HaveInsertPoint())
2509 return;
2510 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2511 RValue Len = CGF.EmitAnyExpr(E: C->getSimdlen(), aggSlot: AggValueSlot::ignored(),
2512 /*ignoreResult=*/true);
2513 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
2514 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2515 // In presence of finite 'safelen', it may be unsafe to mark all
2516 // the memory instructions parallel, because loop-carried
2517 // dependences of 'safelen' iterations are possible.
2518 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2519 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2520 RValue Len = CGF.EmitAnyExpr(E: C->getSafelen(), aggSlot: AggValueSlot::ignored(),
2521 /*ignoreResult=*/true);
2522 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
2523 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2524 // In presence of finite 'safelen', it may be unsafe to mark all
2525 // the memory instructions parallel, because loop-carried
2526 // dependences of 'safelen' iterations are possible.
2527 CGF.LoopStack.setParallel(/*Enable=*/false);
2528 }
2529}
2530
2531// Check for the presence of an `OMPOrderedDirective`,
2532// i.e., `ordered` in `#pragma omp ordered simd`.
2533//
2534// Consider the following source code:
2535// ```
2536// __attribute__((noinline)) void omp_simd_loop(float X[ARRAY_SIZE][ARRAY_SIZE])
2537// {
2538// for (int r = 1; r < ARRAY_SIZE; ++r) {
2539// for (int c = 1; c < ARRAY_SIZE; ++c) {
2540// #pragma omp simd
2541// for (int k = 2; k < ARRAY_SIZE; ++k) {
2542// #pragma omp ordered simd
2543// X[r][k] = X[r][k - 2] + sinf((float)(r / c));
2544// }
2545// }
2546// }
2547// }
2548// ```
2549//
2550// Suppose we are in `CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective
2551// &D)`. By examining `D.dump()` we have the following AST containing
2552// `OMPOrderedDirective`:
2553//
2554// ```
2555// OMPSimdDirective 0x1c32950
2556// `-CapturedStmt 0x1c32028
2557// |-CapturedDecl 0x1c310e8
2558// | |-ForStmt 0x1c31e30
2559// | | |-DeclStmt 0x1c31298
2560// | | | `-VarDecl 0x1c31208 used k 'int' cinit
2561// | | | `-IntegerLiteral 0x1c31278 'int' 2
2562// | | |-<<<NULL>>>
2563// | | |-BinaryOperator 0x1c31308 'int' '<'
2564// | | | |-ImplicitCastExpr 0x1c312f0 'int' <LValueToRValue>
2565// | | | | `-DeclRefExpr 0x1c312b0 'int' lvalue Var 0x1c31208 'k' 'int'
2566// | | | `-IntegerLiteral 0x1c312d0 'int' 256
2567// | | |-UnaryOperator 0x1c31348 'int' prefix '++'
2568// | | | `-DeclRefExpr 0x1c31328 'int' lvalue Var 0x1c31208 'k' 'int'
2569// | | `-CompoundStmt 0x1c31e18
2570// | | `-OMPOrderedDirective 0x1c31dd8
2571// | | |-OMPSimdClause 0x1c31380
2572// | | `-CapturedStmt 0x1c31cd0
2573// ```
2574//
2575// Note the presence of `OMPOrderedDirective` above:
2576// It's (transitively) nested in a `CapturedStmt` representing the pragma
2577// annotated compound statement. Thus, we need to consider this nesting and
2578// include checking the `getCapturedStmt` in this case.
2579static bool hasOrderedDirective(const Stmt *S) {
2580 if (isa<OMPOrderedDirective>(Val: S))
2581 return true;
2582
2583 if (const auto *CS = dyn_cast<CapturedStmt>(Val: S))
2584 return hasOrderedDirective(S: CS->getCapturedStmt());
2585
2586 for (const Stmt *Child : S->children()) {
2587 if (Child && hasOrderedDirective(S: Child))
2588 return true;
2589 }
2590
2591 return false;
2592}
2593
2594static void applyConservativeSimdOrderedDirective(const Stmt &AssociatedStmt,
2595 LoopInfoStack &LoopStack) {
2596 // Check for the presence of an `OMPOrderedDirective`
2597 // i.e., `ordered` in `#pragma omp ordered simd`
2598 bool HasOrderedDirective = hasOrderedDirective(S: &AssociatedStmt);
2599 // If present then conservatively disable loop vectorization
2600 // analogously to how `emitSimdlenSafelenClause` does.
2601 if (HasOrderedDirective)
2602 LoopStack.setParallel(/*Enable=*/false);
2603}
2604
2605void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
2606 // Walk clauses and process safelen/lastprivate.
2607 LoopStack.setParallel(/*Enable=*/true);
2608 LoopStack.setVectorizeEnable();
2609 const Stmt *AssociatedStmt = D.getAssociatedStmt();
2610 applyConservativeSimdOrderedDirective(AssociatedStmt: *AssociatedStmt, LoopStack);
2611 emitSimdlenSafelenClause(CGF&: *this, D);
2612 if (const auto *C = D.getSingleClause<OMPOrderClause>())
2613 if (C->getKind() == OMPC_ORDER_concurrent)
2614 LoopStack.setParallel(/*Enable=*/true);
2615 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
2616 if ((EKind == OMPD_simd ||
2617 (getLangOpts().OpenMPSimd && isOpenMPSimdDirective(DKind: EKind))) &&
2618 llvm::any_of(Range: D.getClausesOfKind<OMPReductionClause>(),
2619 P: [](const OMPReductionClause *C) {
2620 return C->getModifier() == OMPC_REDUCTION_inscan;
2621 }))
2622 // Disable parallel access in case of prefix sum.
2623 LoopStack.setParallel(/*Enable=*/false);
2624}
2625
2626void CodeGenFunction::EmitOMPSimdFinal(
2627 const OMPLoopDirective &D,
2628 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2629 if (!HaveInsertPoint())
2630 return;
2631 llvm::BasicBlock *DoneBB = nullptr;
2632 auto IC = D.counters().begin();
2633 auto IPC = D.private_counters().begin();
2634 for (const Expr *F : D.finals()) {
2635 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: (*IC))->getDecl());
2636 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: (*IPC))->getDecl());
2637 const auto *CED = dyn_cast<OMPCapturedExprDecl>(Val: OrigVD);
2638 if (LocalDeclMap.count(Val: OrigVD) || CapturedStmtInfo->lookup(VD: OrigVD) ||
2639 OrigVD->hasGlobalStorage() || CED) {
2640 if (!DoneBB) {
2641 if (llvm::Value *Cond = CondGen(*this)) {
2642 // If the first post-update expression is found, emit conditional
2643 // block if it was requested.
2644 llvm::BasicBlock *ThenBB = createBasicBlock(name: ".omp.final.then");
2645 DoneBB = createBasicBlock(name: ".omp.final.done");
2646 Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB);
2647 EmitBlock(BB: ThenBB);
2648 }
2649 }
2650 Address OrigAddr = Address::invalid();
2651 if (CED) {
2652 OrigAddr = EmitLValue(E: CED->getInit()->IgnoreImpCasts()).getAddress();
2653 } else {
2654 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2655 /*RefersToEnclosingVariableOrCapture=*/false,
2656 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2657 OrigAddr = EmitLValue(E: &DRE).getAddress();
2658 }
2659 OMPPrivateScope VarScope(*this);
2660 VarScope.addPrivate(LocalVD: OrigVD, Addr: OrigAddr);
2661 (void)VarScope.Privatize();
2662 EmitIgnoredExpr(E: F);
2663 }
2664 ++IC;
2665 ++IPC;
2666 }
2667 if (DoneBB)
2668 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
2669}
2670
2671static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
2672 const OMPLoopDirective &S,
2673 CodeGenFunction::JumpDest LoopExit) {
2674 CGF.EmitOMPLoopBody(D: S, LoopExit);
2675 CGF.EmitStopPoint(S: &S);
2676}
2677
2678/// Emit a helper variable and return corresponding lvalue.
2679static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2680 const DeclRefExpr *Helper) {
2681 auto VDecl = cast<VarDecl>(Val: Helper->getDecl());
2682 CGF.EmitVarDecl(D: *VDecl);
2683 return CGF.EmitLValue(E: Helper);
2684}
2685
2686static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
2687 const RegionCodeGenTy &SimdInitGen,
2688 const RegionCodeGenTy &BodyCodeGen) {
2689 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2690 PrePostActionTy &) {
2691 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2692 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2693 SimdInitGen(CGF);
2694
2695 BodyCodeGen(CGF);
2696 };
2697 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2698 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2699 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2700
2701 BodyCodeGen(CGF);
2702 };
2703 const Expr *IfCond = nullptr;
2704 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
2705 if (isOpenMPSimdDirective(DKind: EKind)) {
2706 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2707 if (CGF.getLangOpts().OpenMP >= 50 &&
2708 (C->getNameModifier() == OMPD_unknown ||
2709 C->getNameModifier() == OMPD_simd)) {
2710 IfCond = C->getCondition();
2711 break;
2712 }
2713 }
2714 }
2715 if (IfCond) {
2716 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, Cond: IfCond, ThenGen, ElseGen);
2717 } else {
2718 RegionCodeGenTy ThenRCG(ThenGen);
2719 ThenRCG(CGF);
2720 }
2721}
2722
2723static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2724 PrePostActionTy &Action) {
2725 Action.Enter(CGF);
2726 OMPLoopScope PreInitScope(CGF, S);
2727 // if (PreCond) {
2728 // for (IV in 0..LastIteration) BODY;
2729 // <Final counter/linear vars updates>;
2730 // }
2731
2732 // The presence of lower/upper bound variable depends on the actual directive
2733 // kind in the AST node. The variables must be emitted because some of the
2734 // expressions associated with the loop will use them.
2735 OpenMPDirectiveKind DKind = S.getDirectiveKind();
2736 if (isOpenMPDistributeDirective(DKind) ||
2737 isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) ||
2738 isOpenMPGenericLoopDirective(DKind)) {
2739 (void)EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: S.getLowerBoundVariable()));
2740 (void)EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: S.getUpperBoundVariable()));
2741 }
2742
2743 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
2744 // Emit: if (PreCond) - begin.
2745 // If the condition constant folds and can be elided, avoid emitting the
2746 // whole loop.
2747 bool CondConstant;
2748 llvm::BasicBlock *ContBlock = nullptr;
2749 if (CGF.ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
2750 if (!CondConstant)
2751 return;
2752 } else {
2753 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "simd.if.then");
2754 ContBlock = CGF.createBasicBlock(name: "simd.if.end");
2755 emitPreCond(CGF, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
2756 TrueCount: CGF.getProfileCount(S: &S));
2757 CGF.EmitBlock(BB: ThenBlock);
2758 CGF.incrementProfileCounter(S: &S);
2759 }
2760
2761 // Emit the loop iteration variable.
2762 const Expr *IVExpr = S.getIterationVariable();
2763 const auto *IVDecl = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IVExpr)->getDecl());
2764 CGF.EmitVarDecl(D: *IVDecl);
2765 CGF.EmitIgnoredExpr(E: S.getInit());
2766
2767 // Emit the iterations count variable.
2768 // If it is not a variable, Sema decided to calculate iterations count on
2769 // each iteration (e.g., it is foldable into a constant).
2770 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
2771 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
2772 // Emit calculation of the iterations count.
2773 CGF.EmitIgnoredExpr(E: S.getCalcLastIteration());
2774 }
2775
2776 emitAlignedClause(CGF, D: S);
2777 (void)CGF.EmitOMPLinearClauseInit(D: S);
2778 {
2779 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2780 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
2781 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
2782 CGF.EmitOMPLinearClause(D: S, PrivateScope&: LoopScope);
2783 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
2784 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2785 CGF, S, CGF.EmitLValue(E: S.getIterationVariable()));
2786 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
2787 (void)LoopScope.Privatize();
2788 if (isOpenMPTargetExecutionDirective(DKind: EKind))
2789 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
2790
2791 emitCommonSimdLoop(
2792 CGF, S,
2793 SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2794 CGF.EmitOMPSimdInit(D: S);
2795 },
2796 BodyCodeGen: [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2797 CGF.EmitOMPInnerLoop(
2798 S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: S.getCond(), IncExpr: S.getInc(),
2799 BodyGen: [&S](CodeGenFunction &CGF) {
2800 emitOMPLoopBodyWithStopPoint(CGF, S,
2801 LoopExit: CodeGenFunction::JumpDest());
2802 },
2803 PostIncGen: [](CodeGenFunction &) {});
2804 });
2805 CGF.EmitOMPSimdFinal(D: S, CondGen: [](CodeGenFunction &) { return nullptr; });
2806 // Emit final copy of the lastprivate variables at the end of loops.
2807 if (HasLastprivateClause)
2808 CGF.EmitOMPLastprivateClauseFinal(D: S, /*NoFinals=*/true);
2809 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_simd);
2810 emitPostUpdateForReductionClause(CGF, D: S,
2811 CondGen: [](CodeGenFunction &) { return nullptr; });
2812 LoopScope.restoreMap();
2813 CGF.EmitOMPLinearClauseFinal(D: S, CondGen: [](CodeGenFunction &) { return nullptr; });
2814 }
2815 // Emit: if (PreCond) - end.
2816 if (ContBlock) {
2817 CGF.EmitBranch(Block: ContBlock);
2818 CGF.EmitBlock(BB: ContBlock, IsFinished: true);
2819 }
2820}
2821
2822// Pass OMPLoopDirective (instead of OMPSimdDirective) to make this function
2823// available for "loop bind(thread)", which maps to "simd".
2824static bool isSimdSupportedByOpenMPIRBuilder(const OMPLoopDirective &S) {
2825 // Check for unsupported clauses
2826 for (OMPClause *C : S.clauses()) {
2827 // Currently only order, simdlen and safelen clauses are supported
2828 if (!(isa<OMPSimdlenClause>(Val: C) || isa<OMPSafelenClause>(Val: C) ||
2829 isa<OMPOrderClause>(Val: C) || isa<OMPAlignedClause>(Val: C)))
2830 return false;
2831 }
2832
2833 // Check if we have a statement with the ordered directive.
2834 // Visit the statement hierarchy to find a compound statement
2835 // with a ordered directive in it.
2836 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(Val: S.getRawStmt())) {
2837 if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) {
2838 for (const Stmt *SubStmt : SyntacticalLoop->children()) {
2839 if (!SubStmt)
2840 continue;
2841 if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(Val: SubStmt)) {
2842 for (const Stmt *CSSubStmt : CS->children()) {
2843 if (!CSSubStmt)
2844 continue;
2845 if (isa<OMPOrderedDirective>(Val: CSSubStmt)) {
2846 return false;
2847 }
2848 }
2849 }
2850 }
2851 }
2852 }
2853 return true;
2854}
2855
2856static llvm::MapVector<llvm::Value *, llvm::Value *>
2857GetAlignedMapping(const OMPLoopDirective &S, CodeGenFunction &CGF) {
2858 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars;
2859 for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) {
2860 llvm::APInt ClauseAlignment(64, 0);
2861 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2862 auto *AlignmentCI =
2863 cast<llvm::ConstantInt>(Val: CGF.EmitScalarExpr(E: AlignmentExpr));
2864 ClauseAlignment = AlignmentCI->getValue();
2865 }
2866 for (const Expr *E : Clause->varlist()) {
2867 llvm::APInt Alignment(ClauseAlignment);
2868 if (Alignment == 0) {
2869 // OpenMP [2.8.1, Description]
2870 // If no optional parameter is specified, implementation-defined default
2871 // alignments for SIMD instructions on the target platforms are assumed.
2872 Alignment =
2873 CGF.getContext()
2874 .toCharUnitsFromBits(BitSize: CGF.getContext().getOpenMPDefaultSimdAlign(
2875 T: E->getType()->getPointeeType()))
2876 .getQuantity();
2877 }
2878 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2879 "alignment is not power of 2");
2880 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2881 AlignedVars[PtrValue] = CGF.Builder.getInt64(C: Alignment.getSExtValue());
2882 }
2883 }
2884 return AlignedVars;
2885}
2886
2887// Pass OMPLoopDirective (instead of OMPSimdDirective) to make this function
2888// available for "loop bind(thread)", which maps to "simd".
2889static void emitOMPSimdDirective(const OMPLoopDirective &S,
2890 CodeGenFunction &CGF, CodeGenModule &CGM) {
2891 bool UseOMPIRBuilder =
2892 CGM.getLangOpts().OpenMPIRBuilder && isSimdSupportedByOpenMPIRBuilder(S);
2893 if (UseOMPIRBuilder) {
2894 auto &&CodeGenIRBuilder = [&S, &CGM, UseOMPIRBuilder](CodeGenFunction &CGF,
2895 PrePostActionTy &) {
2896 // Use the OpenMPIRBuilder if enabled.
2897 if (UseOMPIRBuilder) {
2898 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars =
2899 GetAlignedMapping(S, CGF);
2900 // Emit the associated statement and get its loop representation.
2901 const Stmt *Inner = S.getRawStmt();
2902 llvm::CanonicalLoopInfo *CLI =
2903 CGF.EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1);
2904
2905 llvm::OpenMPIRBuilder &OMPBuilder =
2906 CGM.getOpenMPRuntime().getOMPBuilder();
2907 // Add SIMD specific metadata
2908 llvm::ConstantInt *Simdlen = nullptr;
2909 if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) {
2910 RValue Len = CGF.EmitAnyExpr(E: C->getSimdlen(), aggSlot: AggValueSlot::ignored(),
2911 /*ignoreResult=*/true);
2912 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
2913 Simdlen = Val;
2914 }
2915 llvm::ConstantInt *Safelen = nullptr;
2916 if (const auto *C = S.getSingleClause<OMPSafelenClause>()) {
2917 RValue Len = CGF.EmitAnyExpr(E: C->getSafelen(), aggSlot: AggValueSlot::ignored(),
2918 /*ignoreResult=*/true);
2919 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
2920 Safelen = Val;
2921 }
2922 llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown;
2923 if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
2924 if (C->getKind() == OpenMPOrderClauseKind::OMPC_ORDER_concurrent) {
2925 Order = llvm::omp::OrderKind::OMP_ORDER_concurrent;
2926 }
2927 }
2928 // Add simd metadata to the collapsed loop. Do not generate
2929 // another loop for if clause. Support for if clause is done earlier.
2930 OMPBuilder.applySimd(Loop: CLI, AlignedVars,
2931 /*IfCond*/ nullptr, Order, Simdlen, Safelen);
2932 return;
2933 }
2934 };
2935 {
2936 auto LPCRegion =
2937 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
2938 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
2939 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_simd,
2940 CodeGen: CodeGenIRBuilder);
2941 }
2942 return;
2943 }
2944
2945 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
2946 CGF.OMPFirstScanLoop = true;
2947 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2948 emitOMPSimdRegion(CGF, S, Action);
2949 };
2950 {
2951 auto LPCRegion =
2952 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
2953 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
2954 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_simd, CodeGen);
2955 }
2956 // Check for outer lastprivate conditional update.
2957 checkForLastprivateConditionalUpdate(CGF, S);
2958}
2959
2960void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
2961 emitOMPSimdDirective(S, CGF&: *this, CGM);
2962}
2963
2964void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
2965 // Emit the de-sugared statement.
2966 OMPTransformDirectiveScopeRAII TileScope(*this, &S);
2967 EmitStmt(S: S.getTransformedStmt());
2968}
2969
2970void CodeGenFunction::EmitOMPStripeDirective(const OMPStripeDirective &S) {
2971 // Emit the de-sugared statement.
2972 OMPTransformDirectiveScopeRAII StripeScope(*this, &S);
2973 EmitStmt(S: S.getTransformedStmt());
2974}
2975
2976void CodeGenFunction::EmitOMPReverseDirective(const OMPReverseDirective &S) {
2977 // Emit the de-sugared statement.
2978 OMPTransformDirectiveScopeRAII ReverseScope(*this, &S);
2979 EmitStmt(S: S.getTransformedStmt());
2980}
2981
2982void CodeGenFunction::EmitOMPInterchangeDirective(
2983 const OMPInterchangeDirective &S) {
2984 // Emit the de-sugared statement.
2985 OMPTransformDirectiveScopeRAII InterchangeScope(*this, &S);
2986 EmitStmt(S: S.getTransformedStmt());
2987}
2988
2989void CodeGenFunction::EmitOMPFuseDirective(const OMPFuseDirective &S) {
2990 // Emit the de-sugared statement
2991 OMPTransformDirectiveScopeRAII FuseScope(*this, &S);
2992 EmitStmt(S: S.getTransformedStmt());
2993}
2994
2995void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
2996 bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
2997
2998 if (UseOMPIRBuilder) {
2999 auto DL = SourceLocToDebugLoc(Location: S.getBeginLoc());
3000 const Stmt *Inner = S.getRawStmt();
3001
3002 // Consume nested loop. Clear the entire remaining loop stack because a
3003 // fully unrolled loop is non-transformable. For partial unrolling the
3004 // generated outer loop is pushed back to the stack.
3005 llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1);
3006 OMPLoopNestStack.clear();
3007
3008 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3009
3010 bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1;
3011 llvm::CanonicalLoopInfo *UnrolledCLI = nullptr;
3012
3013 if (S.hasClausesOfKind<OMPFullClause>()) {
3014 assert(ExpectedOMPLoopDepth == 0);
3015 OMPBuilder.unrollLoopFull(DL, Loop: CLI);
3016 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
3017 uint64_t Factor = 0;
3018 if (Expr *FactorExpr = PartialClause->getFactor()) {
3019 Factor = FactorExpr->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue();
3020 assert(Factor >= 1 && "Only positive factors are valid");
3021 }
3022 OMPBuilder.unrollLoopPartial(DL, Loop: CLI, Factor,
3023 UnrolledCLI: NeedsUnrolledCLI ? &UnrolledCLI : nullptr);
3024 } else {
3025 OMPBuilder.unrollLoopHeuristic(DL, Loop: CLI);
3026 }
3027
3028 assert((!NeedsUnrolledCLI || UnrolledCLI) &&
3029 "NeedsUnrolledCLI implies UnrolledCLI to be set");
3030 if (UnrolledCLI)
3031 OMPLoopNestStack.push_back(Elt: UnrolledCLI);
3032
3033 return;
3034 }
3035
3036 // This function is only called if the unrolled loop is not consumed by any
3037 // other loop-associated construct. Such a loop-associated construct will have
3038 // used the transformed AST.
3039
3040 // Set the unroll metadata for the next emitted loop.
3041 LoopStack.setUnrollState(LoopAttributes::Enable);
3042
3043 if (S.hasClausesOfKind<OMPFullClause>()) {
3044 LoopStack.setUnrollState(LoopAttributes::Full);
3045 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
3046 if (Expr *FactorExpr = PartialClause->getFactor()) {
3047 uint64_t Factor =
3048 FactorExpr->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue();
3049 assert(Factor >= 1 && "Only positive factors are valid");
3050 LoopStack.setUnrollCount(Factor);
3051 }
3052 }
3053
3054 EmitStmt(S: S.getAssociatedStmt());
3055}
3056
3057void CodeGenFunction::EmitOMPOuterLoop(
3058 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
3059 CodeGenFunction::OMPPrivateScope &LoopScope,
3060 const CodeGenFunction::OMPLoopArguments &LoopArgs,
3061 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
3062 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
3063 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3064
3065 const Expr *IVExpr = S.getIterationVariable();
3066 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
3067 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3068
3069 JumpDest LoopExit = getJumpDestInCurrentScope(Name: "omp.dispatch.end");
3070
3071 // Start the loop with a block that tests the condition.
3072 llvm::BasicBlock *CondBlock = createBasicBlock(name: "omp.dispatch.cond");
3073 EmitBlock(BB: CondBlock);
3074 const SourceRange R = S.getSourceRange();
3075 OMPLoopNestStack.clear();
3076 LoopStack.push(Header: CondBlock, StartLoc: SourceLocToDebugLoc(Location: R.getBegin()),
3077 EndLoc: SourceLocToDebugLoc(Location: R.getEnd()));
3078
3079 llvm::Value *BoolCondVal = nullptr;
3080 if (!DynamicOrOrdered) {
3081 // UB = min(UB, GlobalUB) or
3082 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
3083 // 'distribute parallel for')
3084 EmitIgnoredExpr(E: LoopArgs.EUB);
3085 // IV = LB
3086 EmitIgnoredExpr(E: LoopArgs.Init);
3087 // IV < UB
3088 BoolCondVal = EvaluateExprAsBool(E: LoopArgs.Cond);
3089 } else {
3090 BoolCondVal =
3091 RT.emitForNext(CGF&: *this, Loc: S.getBeginLoc(), IVSize, IVSigned, IL: LoopArgs.IL,
3092 LB: LoopArgs.LB, UB: LoopArgs.UB, ST: LoopArgs.ST);
3093 }
3094
3095 // If there are any cleanups between here and the loop-exit scope,
3096 // create a block to stage a loop exit along.
3097 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
3098 if (LoopScope.requiresCleanups())
3099 ExitBlock = createBasicBlock(name: "omp.dispatch.cleanup");
3100
3101 llvm::BasicBlock *LoopBody = createBasicBlock(name: "omp.dispatch.body");
3102 Builder.CreateCondBr(Cond: BoolCondVal, True: LoopBody, False: ExitBlock);
3103 if (ExitBlock != LoopExit.getBlock()) {
3104 EmitBlock(BB: ExitBlock);
3105 EmitBranchThroughCleanup(Dest: LoopExit);
3106 }
3107 EmitBlock(BB: LoopBody);
3108
3109 // Emit "IV = LB" (in case of static schedule, we have already calculated new
3110 // LB for loop condition and emitted it above).
3111 if (DynamicOrOrdered)
3112 EmitIgnoredExpr(E: LoopArgs.Init);
3113
3114 // Create a block for the increment.
3115 JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.dispatch.inc");
3116 BreakContinueStack.push_back(Elt: BreakContinue(S, LoopExit, Continue));
3117
3118 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3119 emitCommonSimdLoop(
3120 CGF&: *this, S,
3121 SimdInitGen: [&S, IsMonotonic, EKind](CodeGenFunction &CGF, PrePostActionTy &) {
3122 // Generate !llvm.loop.parallel metadata for loads and stores for loops
3123 // with dynamic/guided scheduling and without ordered clause.
3124 if (!isOpenMPSimdDirective(DKind: EKind)) {
3125 CGF.LoopStack.setParallel(!IsMonotonic);
3126 if (const auto *C = S.getSingleClause<OMPOrderClause>())
3127 if (C->getKind() == OMPC_ORDER_concurrent)
3128 CGF.LoopStack.setParallel(/*Enable=*/true);
3129 } else {
3130 CGF.EmitOMPSimdInit(D: S);
3131 }
3132 },
3133 BodyCodeGen: [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
3134 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3135 SourceLocation Loc = S.getBeginLoc();
3136 // when 'distribute' is not combined with a 'for':
3137 // while (idx <= UB) { BODY; ++idx; }
3138 // when 'distribute' is combined with a 'for'
3139 // (e.g. 'distribute parallel for')
3140 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3141 CGF.EmitOMPInnerLoop(
3142 S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: LoopArgs.Cond, IncExpr: LoopArgs.IncExpr,
3143 BodyGen: [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3144 CodeGenLoop(CGF, S, LoopExit);
3145 },
3146 PostIncGen: [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
3147 CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
3148 });
3149 });
3150
3151 EmitBlock(BB: Continue.getBlock());
3152 BreakContinueStack.pop_back();
3153 if (!DynamicOrOrdered) {
3154 // Emit "LB = LB + Stride", "UB = UB + Stride".
3155 EmitIgnoredExpr(E: LoopArgs.NextLB);
3156 EmitIgnoredExpr(E: LoopArgs.NextUB);
3157 }
3158
3159 EmitBranch(Block: CondBlock);
3160 OMPLoopNestStack.clear();
3161 LoopStack.pop();
3162 // Emit the fall-through block.
3163 EmitBlock(BB: LoopExit.getBlock());
3164
3165 // Tell the runtime we are done.
3166 auto &&CodeGen = [DynamicOrOrdered, &S, &LoopArgs](CodeGenFunction &CGF) {
3167 if (!DynamicOrOrdered)
3168 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, Loc: S.getEndLoc(),
3169 DKind: LoopArgs.DKind);
3170 };
3171 OMPCancelStack.emitExit(CGF&: *this, Kind: EKind, CodeGen);
3172}
3173
3174void CodeGenFunction::EmitOMPForOuterLoop(
3175 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
3176 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
3177 const OMPLoopArguments &LoopArgs,
3178 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3179 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3180
3181 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
3182 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind: ScheduleKind.Schedule);
3183
3184 assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule,
3185 LoopArgs.Chunk != nullptr)) &&
3186 "static non-chunked schedule does not need outer loop");
3187
3188 // Emit outer loop.
3189 //
3190 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3191 // When schedule(dynamic,chunk_size) is specified, the iterations are
3192 // distributed to threads in the team in chunks as the threads request them.
3193 // Each thread executes a chunk of iterations, then requests another chunk,
3194 // until no chunks remain to be distributed. Each chunk contains chunk_size
3195 // iterations, except for the last chunk to be distributed, which may have
3196 // fewer iterations. When no chunk_size is specified, it defaults to 1.
3197 //
3198 // When schedule(guided,chunk_size) is specified, the iterations are assigned
3199 // to threads in the team in chunks as the executing threads request them.
3200 // Each thread executes a chunk of iterations, then requests another chunk,
3201 // until no chunks remain to be assigned. For a chunk_size of 1, the size of
3202 // each chunk is proportional to the number of unassigned iterations divided
3203 // by the number of threads in the team, decreasing to 1. For a chunk_size
3204 // with value k (greater than 1), the size of each chunk is determined in the
3205 // same way, with the restriction that the chunks do not contain fewer than k
3206 // iterations (except for the last chunk to be assigned, which may have fewer
3207 // than k iterations).
3208 //
3209 // When schedule(auto) is specified, the decision regarding scheduling is
3210 // delegated to the compiler and/or runtime system. The programmer gives the
3211 // implementation the freedom to choose any possible mapping of iterations to
3212 // threads in the team.
3213 //
3214 // When schedule(runtime) is specified, the decision regarding scheduling is
3215 // deferred until run time, and the schedule and chunk size are taken from the
3216 // run-sched-var ICV. If the ICV is set to auto, the schedule is
3217 // implementation defined
3218 //
3219 // __kmpc_dispatch_init();
3220 // while(__kmpc_dispatch_next(&LB, &UB)) {
3221 // idx = LB;
3222 // while (idx <= UB) { BODY; ++idx;
3223 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
3224 // } // inner loop
3225 // }
3226 // __kmpc_dispatch_deinit();
3227 //
3228 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3229 // When schedule(static, chunk_size) is specified, iterations are divided into
3230 // chunks of size chunk_size, and the chunks are assigned to the threads in
3231 // the team in a round-robin fashion in the order of the thread number.
3232 //
3233 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
3234 // while (idx <= UB) { BODY; ++idx; } // inner loop
3235 // LB = LB + ST;
3236 // UB = UB + ST;
3237 // }
3238 //
3239
3240 const Expr *IVExpr = S.getIterationVariable();
3241 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
3242 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3243
3244 if (DynamicOrOrdered) {
3245 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
3246 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
3247 llvm::Value *LBVal = DispatchBounds.first;
3248 llvm::Value *UBVal = DispatchBounds.second;
3249 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
3250 LoopArgs.Chunk};
3251 RT.emitForDispatchInit(CGF&: *this, Loc: S.getBeginLoc(), ScheduleKind, IVSize,
3252 IVSigned, Ordered, DispatchValues: DipatchRTInputValues);
3253 } else {
3254 CGOpenMPRuntime::StaticRTInput StaticInit(
3255 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
3256 LoopArgs.ST, LoopArgs.Chunk);
3257 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3258 RT.emitForStaticInit(CGF&: *this, Loc: S.getBeginLoc(), DKind: EKind, ScheduleKind,
3259 Values: StaticInit);
3260 }
3261
3262 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
3263 const unsigned IVSize,
3264 const bool IVSigned) {
3265 if (Ordered) {
3266 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
3267 IVSigned);
3268 }
3269 };
3270
3271 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
3272 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
3273 OuterLoopArgs.IncExpr = S.getInc();
3274 OuterLoopArgs.Init = S.getInit();
3275 OuterLoopArgs.Cond = S.getCond();
3276 OuterLoopArgs.NextLB = S.getNextLowerBound();
3277 OuterLoopArgs.NextUB = S.getNextUpperBound();
3278 OuterLoopArgs.DKind = LoopArgs.DKind;
3279 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, LoopArgs: OuterLoopArgs,
3280 CodeGenLoop: emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
3281 if (DynamicOrOrdered) {
3282 RT.emitForDispatchDeinit(CGF&: *this, Loc: S.getBeginLoc());
3283 }
3284}
3285
3286static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
3287 const unsigned IVSize, const bool IVSigned) {}
3288
3289void CodeGenFunction::EmitOMPDistributeOuterLoop(
3290 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
3291 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
3292 const CodeGenLoopTy &CodeGenLoopContent) {
3293
3294 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3295
3296 // Emit outer loop.
3297 // Same behavior as a OMPForOuterLoop, except that schedule cannot be
3298 // dynamic
3299 //
3300
3301 const Expr *IVExpr = S.getIterationVariable();
3302 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
3303 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3304 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3305
3306 CGOpenMPRuntime::StaticRTInput StaticInit(
3307 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
3308 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
3309 RT.emitDistributeStaticInit(CGF&: *this, Loc: S.getBeginLoc(), SchedKind: ScheduleKind, Values: StaticInit);
3310
3311 // for combined 'distribute' and 'for' the increment expression of distribute
3312 // is stored in DistInc. For 'distribute' alone, it is in Inc.
3313 Expr *IncExpr;
3314 if (isOpenMPLoopBoundSharingDirective(Kind: EKind))
3315 IncExpr = S.getDistInc();
3316 else
3317 IncExpr = S.getInc();
3318
3319 // this routine is shared by 'omp distribute parallel for' and
3320 // 'omp distribute': select the right EUB expression depending on the
3321 // directive
3322 OMPLoopArguments OuterLoopArgs;
3323 OuterLoopArgs.LB = LoopArgs.LB;
3324 OuterLoopArgs.UB = LoopArgs.UB;
3325 OuterLoopArgs.ST = LoopArgs.ST;
3326 OuterLoopArgs.IL = LoopArgs.IL;
3327 OuterLoopArgs.Chunk = LoopArgs.Chunk;
3328 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3329 ? S.getCombinedEnsureUpperBound()
3330 : S.getEnsureUpperBound();
3331 OuterLoopArgs.IncExpr = IncExpr;
3332 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3333 ? S.getCombinedInit()
3334 : S.getInit();
3335 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3336 ? S.getCombinedCond()
3337 : S.getCond();
3338 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3339 ? S.getCombinedNextLowerBound()
3340 : S.getNextLowerBound();
3341 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3342 ? S.getCombinedNextUpperBound()
3343 : S.getNextUpperBound();
3344 OuterLoopArgs.DKind = OMPD_distribute;
3345
3346 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
3347 LoopScope, LoopArgs: OuterLoopArgs, CodeGenLoop: CodeGenLoopContent,
3348 CodeGenOrdered: emitEmptyOrdered);
3349}
3350
3351static std::pair<LValue, LValue>
3352emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
3353 const OMPExecutableDirective &S) {
3354 const OMPLoopDirective &LS = cast<OMPLoopDirective>(Val: S);
3355 LValue LB =
3356 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getLowerBoundVariable()));
3357 LValue UB =
3358 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getUpperBoundVariable()));
3359
3360 // When composing 'distribute' with 'for' (e.g. as in 'distribute
3361 // parallel for') we need to use the 'distribute'
3362 // chunk lower and upper bounds rather than the whole loop iteration
3363 // space. These are parameters to the outlined function for 'parallel'
3364 // and we copy the bounds of the previous schedule into the
3365 // the current ones.
3366 LValue PrevLB = CGF.EmitLValue(E: LS.getPrevLowerBoundVariable());
3367 LValue PrevUB = CGF.EmitLValue(E: LS.getPrevUpperBoundVariable());
3368 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
3369 lvalue: PrevLB, Loc: LS.getPrevLowerBoundVariable()->getExprLoc());
3370 PrevLBVal = CGF.EmitScalarConversion(
3371 Src: PrevLBVal, SrcTy: LS.getPrevLowerBoundVariable()->getType(),
3372 DstTy: LS.getIterationVariable()->getType(),
3373 Loc: LS.getPrevLowerBoundVariable()->getExprLoc());
3374 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
3375 lvalue: PrevUB, Loc: LS.getPrevUpperBoundVariable()->getExprLoc());
3376 PrevUBVal = CGF.EmitScalarConversion(
3377 Src: PrevUBVal, SrcTy: LS.getPrevUpperBoundVariable()->getType(),
3378 DstTy: LS.getIterationVariable()->getType(),
3379 Loc: LS.getPrevUpperBoundVariable()->getExprLoc());
3380
3381 CGF.EmitStoreOfScalar(value: PrevLBVal, lvalue: LB);
3382 CGF.EmitStoreOfScalar(value: PrevUBVal, lvalue: UB);
3383
3384 return {LB, UB};
3385}
3386
3387/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
3388/// we need to use the LB and UB expressions generated by the worksharing
3389/// code generation support, whereas in non combined situations we would
3390/// just emit 0 and the LastIteration expression
3391/// This function is necessary due to the difference of the LB and UB
3392/// types for the RT emission routines for 'for_static_init' and
3393/// 'for_dispatch_init'
3394static std::pair<llvm::Value *, llvm::Value *>
3395emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
3396 const OMPExecutableDirective &S,
3397 Address LB, Address UB) {
3398 const OMPLoopDirective &LS = cast<OMPLoopDirective>(Val: S);
3399 const Expr *IVExpr = LS.getIterationVariable();
3400 // when implementing a dynamic schedule for a 'for' combined with a
3401 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
3402 // is not normalized as each team only executes its own assigned
3403 // distribute chunk
3404 QualType IteratorTy = IVExpr->getType();
3405 llvm::Value *LBVal =
3406 CGF.EmitLoadOfScalar(Addr: LB, /*Volatile=*/false, Ty: IteratorTy, Loc: S.getBeginLoc());
3407 llvm::Value *UBVal =
3408 CGF.EmitLoadOfScalar(Addr: UB, /*Volatile=*/false, Ty: IteratorTy, Loc: S.getBeginLoc());
3409 return {LBVal, UBVal};
3410}
3411
3412static void emitDistributeParallelForDistributeInnerBoundParams(
3413 CodeGenFunction &CGF, const OMPExecutableDirective &S,
3414 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
3415 const auto &Dir = cast<OMPLoopDirective>(Val: S);
3416 LValue LB =
3417 CGF.EmitLValue(E: cast<DeclRefExpr>(Val: Dir.getCombinedLowerBoundVariable()));
3418 llvm::Value *LBCast = CGF.Builder.CreateIntCast(
3419 V: CGF.Builder.CreateLoad(Addr: LB.getAddress()), DestTy: CGF.SizeTy, /*isSigned=*/false);
3420 CapturedVars.push_back(Elt: LBCast);
3421 LValue UB =
3422 CGF.EmitLValue(E: cast<DeclRefExpr>(Val: Dir.getCombinedUpperBoundVariable()));
3423
3424 llvm::Value *UBCast = CGF.Builder.CreateIntCast(
3425 V: CGF.Builder.CreateLoad(Addr: UB.getAddress()), DestTy: CGF.SizeTy, /*isSigned=*/false);
3426 CapturedVars.push_back(Elt: UBCast);
3427}
3428
3429static void
3430emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
3431 const OMPLoopDirective &S,
3432 CodeGenFunction::JumpDest LoopExit) {
3433 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3434 auto &&CGInlinedWorksharingLoop = [&S, EKind](CodeGenFunction &CGF,
3435 PrePostActionTy &Action) {
3436 Action.Enter(CGF);
3437 bool HasCancel = false;
3438 if (!isOpenMPSimdDirective(DKind: EKind)) {
3439 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(Val: &S))
3440 HasCancel = D->hasCancel();
3441 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(Val: &S))
3442 HasCancel = D->hasCancel();
3443 else if (const auto *D =
3444 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(Val: &S))
3445 HasCancel = D->hasCancel();
3446 }
3447 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
3448 CGF.EmitOMPWorksharingLoop(S, EUB: S.getPrevEnsureUpperBound(),
3449 CodeGenLoopBounds: emitDistributeParallelForInnerBounds,
3450 CGDispatchBounds: emitDistributeParallelForDispatchBounds);
3451 };
3452
3453 emitCommonOMPParallelDirective(
3454 CGF, S, InnermostKind: isOpenMPSimdDirective(DKind: EKind) ? OMPD_for_simd : OMPD_for,
3455 CodeGen: CGInlinedWorksharingLoop,
3456 CodeGenBoundParameters: emitDistributeParallelForDistributeInnerBoundParams);
3457}
3458
3459void CodeGenFunction::EmitOMPDistributeParallelForDirective(
3460 const OMPDistributeParallelForDirective &S) {
3461 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3462 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
3463 IncExpr: S.getDistInc());
3464 };
3465 OMPLexicalScope Scope(*this, S, OMPD_parallel);
3466 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_distribute, CodeGen);
3467}
3468
3469void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
3470 const OMPDistributeParallelForSimdDirective &S) {
3471 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3472 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
3473 IncExpr: S.getDistInc());
3474 };
3475 OMPLexicalScope Scope(*this, S, OMPD_parallel);
3476 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_distribute, CodeGen);
3477}
3478
3479void CodeGenFunction::EmitOMPDistributeSimdDirective(
3480 const OMPDistributeSimdDirective &S) {
3481 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3482 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
3483 };
3484 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3485 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_simd, CodeGen);
3486}
3487
3488void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
3489 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
3490 // Emit SPMD target parallel for region as a standalone region.
3491 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3492 emitOMPSimdRegion(CGF, S, Action);
3493 };
3494 llvm::Function *Fn;
3495 llvm::Constant *Addr;
3496 // Emit target region as a standalone region.
3497 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3498 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
3499 assert(Fn && Addr && "Target device function emission failed.");
3500}
3501
3502void CodeGenFunction::EmitOMPTargetSimdDirective(
3503 const OMPTargetSimdDirective &S) {
3504 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3505 emitOMPSimdRegion(CGF, S, Action);
3506 };
3507 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
3508}
3509
3510namespace {
3511struct ScheduleKindModifiersTy {
3512 OpenMPScheduleClauseKind Kind;
3513 OpenMPScheduleClauseModifier M1;
3514 OpenMPScheduleClauseModifier M2;
3515 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
3516 OpenMPScheduleClauseModifier M1,
3517 OpenMPScheduleClauseModifier M2)
3518 : Kind(Kind), M1(M1), M2(M2) {}
3519};
3520} // namespace
3521
3522bool CodeGenFunction::EmitOMPWorksharingLoop(
3523 const OMPLoopDirective &S, Expr *EUB,
3524 const CodeGenLoopBoundsTy &CodeGenLoopBounds,
3525 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3526 // Emit the loop iteration variable.
3527 const auto *IVExpr = cast<DeclRefExpr>(Val: S.getIterationVariable());
3528 const auto *IVDecl = cast<VarDecl>(Val: IVExpr->getDecl());
3529 EmitVarDecl(D: *IVDecl);
3530
3531 // Emit the iterations count variable.
3532 // If it is not a variable, Sema decided to calculate iterations count on each
3533 // iteration (e.g., it is foldable into a constant).
3534 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
3535 EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
3536 // Emit calculation of the iterations count.
3537 EmitIgnoredExpr(E: S.getCalcLastIteration());
3538 }
3539
3540 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3541
3542 bool HasLastprivateClause;
3543 // Check pre-condition.
3544 {
3545 OMPLoopScope PreInitScope(*this, S);
3546 // Skip the entire loop if we don't meet the precondition.
3547 // If the condition constant folds and can be elided, avoid emitting the
3548 // whole loop.
3549 bool CondConstant;
3550 llvm::BasicBlock *ContBlock = nullptr;
3551 if (ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
3552 if (!CondConstant)
3553 return false;
3554 } else {
3555 llvm::BasicBlock *ThenBlock = createBasicBlock(name: "omp.precond.then");
3556 ContBlock = createBasicBlock(name: "omp.precond.end");
3557 emitPreCond(CGF&: *this, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
3558 TrueCount: getProfileCount(S: &S));
3559 EmitBlock(BB: ThenBlock);
3560 incrementProfileCounter(S: &S);
3561 }
3562
3563 RunCleanupsScope DoacrossCleanupScope(*this);
3564 bool Ordered = false;
3565 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
3566 if (OrderedClause->getNumForLoops())
3567 RT.emitDoacrossInit(CGF&: *this, D: S, NumIterations: OrderedClause->getLoopNumIterations());
3568 else
3569 Ordered = true;
3570 }
3571
3572 emitAlignedClause(CGF&: *this, D: S);
3573 bool HasLinears = EmitOMPLinearClauseInit(D: S);
3574 // Emit helper vars inits.
3575
3576 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
3577 LValue LB = Bounds.first;
3578 LValue UB = Bounds.second;
3579 LValue ST =
3580 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable()));
3581 LValue IL =
3582 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable()));
3583
3584 // Emit 'then' code.
3585 {
3586 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3587 OMPPrivateScope LoopScope(*this);
3588 if (EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope) || HasLinears) {
3589 // Emit implicit barrier to synchronize threads and avoid data races on
3590 // initialization of firstprivate variables and post-update of
3591 // lastprivate variables.
3592 CGM.getOpenMPRuntime().emitBarrierCall(
3593 CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
3594 /*ForceSimpleCall=*/true);
3595 }
3596 EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
3597 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
3598 *this, S, EmitLValue(E: S.getIterationVariable()));
3599 HasLastprivateClause = EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
3600 EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
3601 EmitOMPPrivateLoopCounters(S, LoopScope);
3602 EmitOMPLinearClause(D: S, PrivateScope&: LoopScope);
3603 (void)LoopScope.Privatize();
3604 if (isOpenMPTargetExecutionDirective(DKind: EKind))
3605 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF&: *this, D: S);
3606
3607 // Detect the loop schedule kind and chunk.
3608 const Expr *ChunkExpr = nullptr;
3609 OpenMPScheduleTy ScheduleKind;
3610 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
3611 ScheduleKind.Schedule = C->getScheduleKind();
3612 ScheduleKind.M1 = C->getFirstScheduleModifier();
3613 ScheduleKind.M2 = C->getSecondScheduleModifier();
3614 ChunkExpr = C->getChunkSize();
3615 } else {
3616 // Default behaviour for schedule clause.
3617 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
3618 CGF&: *this, S, ScheduleKind&: ScheduleKind.Schedule, ChunkExpr);
3619 }
3620 bool HasChunkSizeOne = false;
3621 llvm::Value *Chunk = nullptr;
3622 if (ChunkExpr) {
3623 Chunk = EmitScalarExpr(E: ChunkExpr);
3624 Chunk = EmitScalarConversion(Src: Chunk, SrcTy: ChunkExpr->getType(),
3625 DstTy: S.getIterationVariable()->getType(),
3626 Loc: S.getBeginLoc());
3627 Expr::EvalResult Result;
3628 if (ChunkExpr->EvaluateAsInt(Result, Ctx: getContext())) {
3629 llvm::APSInt EvaluatedChunk = Result.Val.getInt();
3630 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
3631 }
3632 }
3633 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
3634 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3635 // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3636 // If the static schedule kind is specified or if the ordered clause is
3637 // specified, and if no monotonic modifier is specified, the effect will
3638 // be as if the monotonic modifier was specified.
3639 bool StaticChunkedOne =
3640 RT.isStaticChunked(ScheduleKind: ScheduleKind.Schedule,
3641 /* Chunked */ Chunk != nullptr) &&
3642 HasChunkSizeOne && isOpenMPLoopBoundSharingDirective(Kind: EKind);
3643 bool IsMonotonic =
3644 Ordered ||
3645 (ScheduleKind.Schedule == OMPC_SCHEDULE_static &&
3646 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
3647 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
3648 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
3649 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
3650 if ((RT.isStaticNonchunked(ScheduleKind: ScheduleKind.Schedule,
3651 /* Chunked */ Chunk != nullptr) ||
3652 StaticChunkedOne) &&
3653 !Ordered) {
3654 JumpDest LoopExit =
3655 getJumpDestInCurrentScope(Target: createBasicBlock(name: "omp.loop.exit"));
3656 emitCommonSimdLoop(
3657 CGF&: *this, S,
3658 SimdInitGen: [&S, EKind](CodeGenFunction &CGF, PrePostActionTy &) {
3659 if (isOpenMPSimdDirective(DKind: EKind)) {
3660 CGF.EmitOMPSimdInit(D: S);
3661 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3662 if (C->getKind() == OMPC_ORDER_concurrent)
3663 CGF.LoopStack.setParallel(/*Enable=*/true);
3664 }
3665 },
3666 BodyCodeGen: [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3667 &S, ScheduleKind, LoopExit, EKind,
3668 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3669 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3670 // When no chunk_size is specified, the iteration space is divided
3671 // into chunks that are approximately equal in size, and at most
3672 // one chunk is distributed to each thread. Note that the size of
3673 // the chunks is unspecified in this case.
3674 CGOpenMPRuntime::StaticRTInput StaticInit(
3675 IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
3676 UB.getAddress(), ST.getAddress(),
3677 StaticChunkedOne ? Chunk : nullptr);
3678 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3679 CGF, Loc: S.getBeginLoc(), DKind: EKind, ScheduleKind, Values: StaticInit);
3680 // UB = min(UB, GlobalUB);
3681 if (!StaticChunkedOne)
3682 CGF.EmitIgnoredExpr(E: S.getEnsureUpperBound());
3683 // IV = LB;
3684 CGF.EmitIgnoredExpr(E: S.getInit());
3685 // For unchunked static schedule generate:
3686 //
3687 // while (idx <= UB) {
3688 // BODY;
3689 // ++idx;
3690 // }
3691 //
3692 // For static schedule with chunk one:
3693 //
3694 // while (IV <= PrevUB) {
3695 // BODY;
3696 // IV += ST;
3697 // }
3698 CGF.EmitOMPInnerLoop(
3699 S, RequiresCleanup: LoopScope.requiresCleanups(),
3700 LoopCond: StaticChunkedOne ? S.getCombinedParForInDistCond()
3701 : S.getCond(),
3702 IncExpr: StaticChunkedOne ? S.getDistInc() : S.getInc(),
3703 BodyGen: [&S, LoopExit](CodeGenFunction &CGF) {
3704 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3705 },
3706 PostIncGen: [](CodeGenFunction &) {});
3707 });
3708 EmitBlock(BB: LoopExit.getBlock());
3709 // Tell the runtime we are done.
3710 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3711 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, Loc: S.getEndLoc(),
3712 DKind: OMPD_for);
3713 };
3714 OMPCancelStack.emitExit(CGF&: *this, Kind: EKind, CodeGen);
3715 } else {
3716 // Emit the outer loop, which requests its work chunk [LB..UB] from
3717 // runtime and runs the inner loop to process it.
3718 OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
3719 ST.getAddress(), IL.getAddress(), Chunk,
3720 EUB);
3721 LoopArguments.DKind = OMPD_for;
3722 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3723 LoopArgs: LoopArguments, CGDispatchBounds);
3724 }
3725 if (isOpenMPSimdDirective(DKind: EKind)) {
3726 EmitOMPSimdFinal(D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
3727 return CGF.Builder.CreateIsNotNull(
3728 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
3729 });
3730 }
3731 EmitOMPReductionClauseFinal(
3732 D: S, /*ReductionKind=*/isOpenMPSimdDirective(DKind: EKind)
3733 ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3734 : /*Parallel only*/ OMPD_parallel);
3735 // Emit post-update of the reduction variables if IsLastIter != 0.
3736 emitPostUpdateForReductionClause(
3737 CGF&: *this, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
3738 return CGF.Builder.CreateIsNotNull(
3739 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
3740 });
3741 // Emit final copy of the lastprivate variables if IsLastIter != 0.
3742 if (HasLastprivateClause)
3743 EmitOMPLastprivateClauseFinal(
3744 D: S, NoFinals: isOpenMPSimdDirective(DKind: EKind),
3745 IsLastIterCond: Builder.CreateIsNotNull(Arg: EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())));
3746 LoopScope.restoreMap();
3747 EmitOMPLinearClauseFinal(D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
3748 return CGF.Builder.CreateIsNotNull(
3749 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
3750 });
3751 }
3752 DoacrossCleanupScope.ForceCleanup();
3753 // We're now done with the loop, so jump to the continuation block.
3754 if (ContBlock) {
3755 EmitBranch(Block: ContBlock);
3756 EmitBlock(BB: ContBlock, /*IsFinished=*/true);
3757 }
3758 }
3759 return HasLastprivateClause;
3760}
3761
3762/// The following two functions generate expressions for the loop lower
3763/// and upper bounds in case of static and dynamic (dispatch) schedule
3764/// of the associated 'for' or 'distribute' loop.
3765static std::pair<LValue, LValue>
3766emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3767 const auto &LS = cast<OMPLoopDirective>(Val: S);
3768 LValue LB =
3769 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getLowerBoundVariable()));
3770 LValue UB =
3771 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getUpperBoundVariable()));
3772 return {LB, UB};
3773}
3774
3775/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3776/// consider the lower and upper bound expressions generated by the
3777/// worksharing loop support, but we use 0 and the iteration space size as
3778/// constants
3779static std::pair<llvm::Value *, llvm::Value *>
3780emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
3781 Address LB, Address UB) {
3782 const auto &LS = cast<OMPLoopDirective>(Val: S);
3783 const Expr *IVExpr = LS.getIterationVariable();
3784 const unsigned IVSize = CGF.getContext().getTypeSize(T: IVExpr->getType());
3785 llvm::Value *LBVal = CGF.Builder.getIntN(N: IVSize, C: 0);
3786 llvm::Value *UBVal = CGF.EmitScalarExpr(E: LS.getLastIteration());
3787 return {LBVal, UBVal};
3788}
3789
3790/// Emits internal temp array declarations for the directive with inscan
3791/// reductions.
3792/// The code is the following:
3793/// \code
3794/// size num_iters = <num_iters>;
3795/// <type> buffer[num_iters];
3796/// \endcode
3797static void emitScanBasedDirectiveDecls(
3798 CodeGenFunction &CGF, const OMPLoopDirective &S,
3799 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3800 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3801 V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false);
3802 SmallVector<const Expr *, 4> Shareds;
3803 SmallVector<const Expr *, 4> Privates;
3804 SmallVector<const Expr *, 4> ReductionOps;
3805 SmallVector<const Expr *, 4> CopyArrayTemps;
3806 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3807 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3808 "Only inscan reductions are expected.");
3809 Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
3810 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
3811 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
3812 CopyArrayTemps.append(in_start: C->copy_array_temps().begin(),
3813 in_end: C->copy_array_temps().end());
3814 }
3815 {
3816 // Emit buffers for each reduction variables.
3817 // ReductionCodeGen is required to emit correctly the code for array
3818 // reductions.
3819 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
3820 unsigned Count = 0;
3821 auto *ITA = CopyArrayTemps.begin();
3822 for (const Expr *IRef : Privates) {
3823 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IRef)->getDecl());
3824 // Emit variably modified arrays, used for arrays/array sections
3825 // reductions.
3826 if (PrivateVD->getType()->isVariablyModifiedType()) {
3827 RedCG.emitSharedOrigLValue(CGF, N: Count);
3828 RedCG.emitAggregateType(CGF, N: Count);
3829 }
3830 CodeGenFunction::OpaqueValueMapping DimMapping(
3831 CGF,
3832 cast<OpaqueValueExpr>(
3833 Val: cast<VariableArrayType>(Val: (*ITA)->getType()->getAsArrayTypeUnsafe())
3834 ->getSizeExpr()),
3835 RValue::get(V: OMPScanNumIterations));
3836 // Emit temp buffer.
3837 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ITA)->getDecl()));
3838 ++ITA;
3839 ++Count;
3840 }
3841 }
3842}
3843
3844/// Copies final inscan reductions values to the original variables.
3845/// The code is the following:
3846/// \code
3847/// <orig_var> = buffer[num_iters-1];
3848/// \endcode
3849static void emitScanBasedDirectiveFinals(
3850 CodeGenFunction &CGF, const OMPLoopDirective &S,
3851 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3852 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3853 V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false);
3854 SmallVector<const Expr *, 4> Shareds;
3855 SmallVector<const Expr *, 4> LHSs;
3856 SmallVector<const Expr *, 4> RHSs;
3857 SmallVector<const Expr *, 4> Privates;
3858 SmallVector<const Expr *, 4> CopyOps;
3859 SmallVector<const Expr *, 4> CopyArrayElems;
3860 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3861 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3862 "Only inscan reductions are expected.");
3863 Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
3864 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
3865 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
3866 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
3867 CopyOps.append(in_start: C->copy_ops().begin(), in_end: C->copy_ops().end());
3868 CopyArrayElems.append(in_start: C->copy_array_elems().begin(),
3869 in_end: C->copy_array_elems().end());
3870 }
3871 // Create temp var and copy LHS value to this temp value.
3872 // LHS = TMP[LastIter];
3873 llvm::Value *OMPLast = CGF.Builder.CreateNSWSub(
3874 LHS: OMPScanNumIterations,
3875 RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1, /*isSigned=*/IsSigned: false));
3876 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
3877 const Expr *PrivateExpr = Privates[I];
3878 const Expr *OrigExpr = Shareds[I];
3879 const Expr *CopyArrayElem = CopyArrayElems[I];
3880 CodeGenFunction::OpaqueValueMapping IdxMapping(
3881 CGF,
3882 cast<OpaqueValueExpr>(
3883 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
3884 RValue::get(V: OMPLast));
3885 LValue DestLVal = CGF.EmitLValue(E: OrigExpr);
3886 LValue SrcLVal = CGF.EmitLValue(E: CopyArrayElem);
3887 CGF.EmitOMPCopy(
3888 OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(),
3889 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
3890 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]);
3891 }
3892}
3893
3894/// Emits the code for the directive with inscan reductions.
3895/// The code is the following:
3896/// \code
3897/// #pragma omp ...
3898/// for (i: 0..<num_iters>) {
3899/// <input phase>;
3900/// buffer[i] = red;
3901/// }
3902/// #pragma omp master // in parallel region
3903/// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3904/// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3905/// buffer[i] op= buffer[i-pow(2,k)];
3906/// #pragma omp barrier // in parallel region
3907/// #pragma omp ...
3908/// for (0..<num_iters>) {
3909/// red = InclusiveScan ? buffer[i] : buffer[i-1];
3910/// <scan phase>;
3911/// }
3912/// \endcode
3913static void emitScanBasedDirective(
3914 CodeGenFunction &CGF, const OMPLoopDirective &S,
3915 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
3916 llvm::function_ref<void(CodeGenFunction &)> FirstGen,
3917 llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
3918 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3919 V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false);
3920 SmallVector<const Expr *, 4> Privates;
3921 SmallVector<const Expr *, 4> ReductionOps;
3922 SmallVector<const Expr *, 4> LHSs;
3923 SmallVector<const Expr *, 4> RHSs;
3924 SmallVector<const Expr *, 4> CopyArrayElems;
3925 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3926 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3927 "Only inscan reductions are expected.");
3928 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
3929 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
3930 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
3931 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
3932 CopyArrayElems.append(in_start: C->copy_array_elems().begin(),
3933 in_end: C->copy_array_elems().end());
3934 }
3935 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
3936 {
3937 // Emit loop with input phase:
3938 // #pragma omp ...
3939 // for (i: 0..<num_iters>) {
3940 // <input phase>;
3941 // buffer[i] = red;
3942 // }
3943 CGF.OMPFirstScanLoop = true;
3944 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3945 FirstGen(CGF);
3946 }
3947 // #pragma omp barrier // in parallel region
3948 auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems,
3949 &ReductionOps,
3950 &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) {
3951 Action.Enter(CGF);
3952 // Emit prefix reduction:
3953 // #pragma omp master // in parallel region
3954 // for (int k = 0; k <= ceil(log2(n)); ++k)
3955 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
3956 llvm::BasicBlock *LoopBB = CGF.createBasicBlock(name: "omp.outer.log.scan.body");
3957 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: "omp.outer.log.scan.exit");
3958 llvm::Function *F =
3959 CGF.CGM.getIntrinsic(IID: llvm::Intrinsic::log2, Tys: CGF.DoubleTy);
3960 llvm::Value *Arg =
3961 CGF.Builder.CreateUIToFP(V: OMPScanNumIterations, DestTy: CGF.DoubleTy);
3962 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(callee: F, args: Arg);
3963 F = CGF.CGM.getIntrinsic(IID: llvm::Intrinsic::ceil, Tys: CGF.DoubleTy);
3964 LogVal = CGF.EmitNounwindRuntimeCall(callee: F, args: LogVal);
3965 LogVal = CGF.Builder.CreateFPToUI(V: LogVal, DestTy: CGF.IntTy);
3966 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
3967 LHS: OMPScanNumIterations, RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1));
3968 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: S.getBeginLoc());
3969 CGF.EmitBlock(BB: LoopBB);
3970 auto *Counter = CGF.Builder.CreatePHI(Ty: CGF.IntTy, NumReservedValues: 2);
3971 // size pow2k = 1;
3972 auto *Pow2K = CGF.Builder.CreatePHI(Ty: CGF.SizeTy, NumReservedValues: 2);
3973 Counter->addIncoming(V: llvm::ConstantInt::get(Ty: CGF.IntTy, V: 0), BB: InputBB);
3974 Pow2K->addIncoming(V: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1), BB: InputBB);
3975 // for (size i = n - 1; i >= 2 ^ k; --i)
3976 // tmp[i] op= tmp[i-pow2k];
3977 llvm::BasicBlock *InnerLoopBB =
3978 CGF.createBasicBlock(name: "omp.inner.log.scan.body");
3979 llvm::BasicBlock *InnerExitBB =
3980 CGF.createBasicBlock(name: "omp.inner.log.scan.exit");
3981 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(LHS: NMin1, RHS: Pow2K);
3982 CGF.Builder.CreateCondBr(Cond: CmpI, True: InnerLoopBB, False: InnerExitBB);
3983 CGF.EmitBlock(BB: InnerLoopBB);
3984 auto *IVal = CGF.Builder.CreatePHI(Ty: CGF.SizeTy, NumReservedValues: 2);
3985 IVal->addIncoming(V: NMin1, BB: LoopBB);
3986 {
3987 CodeGenFunction::OMPPrivateScope PrivScope(CGF);
3988 auto *ILHS = LHSs.begin();
3989 auto *IRHS = RHSs.begin();
3990 for (const Expr *CopyArrayElem : CopyArrayElems) {
3991 const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
3992 const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
3993 Address LHSAddr = Address::invalid();
3994 {
3995 CodeGenFunction::OpaqueValueMapping IdxMapping(
3996 CGF,
3997 cast<OpaqueValueExpr>(
3998 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
3999 RValue::get(V: IVal));
4000 LHSAddr = CGF.EmitLValue(E: CopyArrayElem).getAddress();
4001 }
4002 PrivScope.addPrivate(LocalVD: LHSVD, Addr: LHSAddr);
4003 Address RHSAddr = Address::invalid();
4004 {
4005 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(LHS: IVal, RHS: Pow2K);
4006 CodeGenFunction::OpaqueValueMapping IdxMapping(
4007 CGF,
4008 cast<OpaqueValueExpr>(
4009 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
4010 RValue::get(V: OffsetIVal));
4011 RHSAddr = CGF.EmitLValue(E: CopyArrayElem).getAddress();
4012 }
4013 PrivScope.addPrivate(LocalVD: RHSVD, Addr: RHSAddr);
4014 ++ILHS;
4015 ++IRHS;
4016 }
4017 PrivScope.Privatize();
4018 CGF.CGM.getOpenMPRuntime().emitReduction(
4019 CGF, Loc: S.getEndLoc(), Privates, LHSExprs: LHSs, RHSExprs: RHSs, ReductionOps,
4020 Options: {/*WithNowait=*/true, /*SimpleReduction=*/true,
4021 /*IsPrivateVarReduction*/ {}, .ReductionKind: OMPD_unknown});
4022 }
4023 llvm::Value *NextIVal =
4024 CGF.Builder.CreateNUWSub(LHS: IVal, RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1));
4025 IVal->addIncoming(V: NextIVal, BB: CGF.Builder.GetInsertBlock());
4026 CmpI = CGF.Builder.CreateICmpUGE(LHS: NextIVal, RHS: Pow2K);
4027 CGF.Builder.CreateCondBr(Cond: CmpI, True: InnerLoopBB, False: InnerExitBB);
4028 CGF.EmitBlock(BB: InnerExitBB);
4029 llvm::Value *Next =
4030 CGF.Builder.CreateNUWAdd(LHS: Counter, RHS: llvm::ConstantInt::get(Ty: CGF.IntTy, V: 1));
4031 Counter->addIncoming(V: Next, BB: CGF.Builder.GetInsertBlock());
4032 // pow2k <<= 1;
4033 llvm::Value *NextPow2K =
4034 CGF.Builder.CreateShl(LHS: Pow2K, RHS: 1, Name: "", /*HasNUW=*/true);
4035 Pow2K->addIncoming(V: NextPow2K, BB: CGF.Builder.GetInsertBlock());
4036 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(LHS: Next, RHS: LogVal);
4037 CGF.Builder.CreateCondBr(Cond: Cmp, True: LoopBB, False: ExitBB);
4038 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: S.getEndLoc());
4039 CGF.EmitBlock(BB: ExitBB);
4040 };
4041 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
4042 if (isOpenMPParallelDirective(DKind: EKind)) {
4043 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
4044 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
4045 CGF, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
4046 /*ForceSimpleCall=*/true);
4047 } else {
4048 RegionCodeGenTy RCG(CodeGen);
4049 RCG(CGF);
4050 }
4051
4052 CGF.OMPFirstScanLoop = false;
4053 SecondGen(CGF);
4054}
4055
4056static bool emitWorksharingDirective(CodeGenFunction &CGF,
4057 const OMPLoopDirective &S,
4058 bool HasCancel) {
4059 bool HasLastprivates;
4060 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
4061 if (llvm::any_of(Range: S.getClausesOfKind<OMPReductionClause>(),
4062 P: [](const OMPReductionClause *C) {
4063 return C->getModifier() == OMPC_REDUCTION_inscan;
4064 })) {
4065 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4066 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4067 OMPLoopScope LoopScope(CGF, S);
4068 return CGF.EmitScalarExpr(E: S.getNumIterations());
4069 };
4070 const auto &&FirstGen = [&S, HasCancel, EKind](CodeGenFunction &CGF) {
4071 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
4072 (void)CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(),
4073 CodeGenLoopBounds: emitForLoopBounds,
4074 CGDispatchBounds: emitDispatchForLoopBounds);
4075 // Emit an implicit barrier at the end.
4076 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc: S.getBeginLoc(),
4077 Kind: OMPD_for);
4078 };
4079 const auto &&SecondGen = [&S, HasCancel, EKind,
4080 &HasLastprivates](CodeGenFunction &CGF) {
4081 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
4082 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(),
4083 CodeGenLoopBounds: emitForLoopBounds,
4084 CGDispatchBounds: emitDispatchForLoopBounds);
4085 };
4086 if (!isOpenMPParallelDirective(DKind: EKind))
4087 emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
4088 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
4089 if (!isOpenMPParallelDirective(DKind: EKind))
4090 emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen);
4091 } else {
4092 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
4093 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(),
4094 CodeGenLoopBounds: emitForLoopBounds,
4095 CGDispatchBounds: emitDispatchForLoopBounds);
4096 }
4097 return HasLastprivates;
4098}
4099
4100// Pass OMPLoopDirective (instead of OMPForDirective) to make this check
4101// available for "loop bind(parallel)", which maps to "for".
4102static bool isForSupportedByOpenMPIRBuilder(const OMPLoopDirective &S,
4103 bool HasCancel) {
4104 if (HasCancel)
4105 return false;
4106 for (OMPClause *C : S.clauses()) {
4107 if (isa<OMPNowaitClause, OMPBindClause>(Val: C))
4108 continue;
4109
4110 if (auto *SC = dyn_cast<OMPScheduleClause>(Val: C)) {
4111 if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
4112 return false;
4113 if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
4114 return false;
4115 switch (SC->getScheduleKind()) {
4116 case OMPC_SCHEDULE_auto:
4117 case OMPC_SCHEDULE_dynamic:
4118 case OMPC_SCHEDULE_runtime:
4119 case OMPC_SCHEDULE_guided:
4120 case OMPC_SCHEDULE_static:
4121 continue;
4122 case OMPC_SCHEDULE_unknown:
4123 return false;
4124 }
4125 }
4126
4127 return false;
4128 }
4129
4130 return true;
4131}
4132
4133static llvm::omp::ScheduleKind
4134convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) {
4135 switch (ScheduleClauseKind) {
4136 case OMPC_SCHEDULE_unknown:
4137 return llvm::omp::OMP_SCHEDULE_Default;
4138 case OMPC_SCHEDULE_auto:
4139 return llvm::omp::OMP_SCHEDULE_Auto;
4140 case OMPC_SCHEDULE_dynamic:
4141 return llvm::omp::OMP_SCHEDULE_Dynamic;
4142 case OMPC_SCHEDULE_guided:
4143 return llvm::omp::OMP_SCHEDULE_Guided;
4144 case OMPC_SCHEDULE_runtime:
4145 return llvm::omp::OMP_SCHEDULE_Runtime;
4146 case OMPC_SCHEDULE_static:
4147 return llvm::omp::OMP_SCHEDULE_Static;
4148 }
4149 llvm_unreachable("Unhandled schedule kind");
4150}
4151
4152// Pass OMPLoopDirective (instead of OMPForDirective) to make this function
4153// available for "loop bind(parallel)", which maps to "for".
4154static void emitOMPForDirective(const OMPLoopDirective &S, CodeGenFunction &CGF,
4155 CodeGenModule &CGM, bool HasCancel) {
4156 bool HasLastprivates = false;
4157 bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder &&
4158 isForSupportedByOpenMPIRBuilder(S, HasCancel);
4159 auto &&CodeGen = [&S, &CGM, HasCancel, &HasLastprivates,
4160 UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
4161 // Use the OpenMPIRBuilder if enabled.
4162 if (UseOMPIRBuilder) {
4163 bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>();
4164
4165 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default;
4166 llvm::Value *ChunkSize = nullptr;
4167 if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) {
4168 SchedKind =
4169 convertClauseKindToSchedKind(ScheduleClauseKind: SchedClause->getScheduleKind());
4170 if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize())
4171 ChunkSize = CGF.EmitScalarExpr(E: ChunkSizeExpr);
4172 }
4173
4174 // Emit the associated statement and get its loop representation.
4175 const Stmt *Inner = S.getRawStmt();
4176 llvm::CanonicalLoopInfo *CLI =
4177 CGF.EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1);
4178
4179 llvm::OpenMPIRBuilder &OMPBuilder =
4180 CGM.getOpenMPRuntime().getOMPBuilder();
4181 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
4182 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
4183 cantFail(ValOrErr: OMPBuilder.applyWorkshareLoop(
4184 DL: CGF.Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier,
4185 SchedKind, ChunkSize, /*HasSimdModifier=*/false,
4186 /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
4187 /*HasOrderedClause=*/false));
4188 return;
4189 }
4190
4191 HasLastprivates = emitWorksharingDirective(CGF, S, HasCancel);
4192 };
4193 {
4194 auto LPCRegion =
4195 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
4196 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
4197 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_for, CodeGen,
4198 HasCancel);
4199 }
4200
4201 if (!UseOMPIRBuilder) {
4202 // Emit an implicit barrier at the end.
4203 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
4204 CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc: S.getBeginLoc(), Kind: OMPD_for);
4205 }
4206 // Check for outer lastprivate conditional update.
4207 checkForLastprivateConditionalUpdate(CGF, S);
4208}
4209
4210void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
4211 return emitOMPForDirective(S, CGF&: *this, CGM, HasCancel: S.hasCancel());
4212}
4213
4214void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
4215 bool HasLastprivates = false;
4216 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
4217 PrePostActionTy &) {
4218 HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4219 };
4220 {
4221 auto LPCRegion =
4222 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4223 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4224 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_simd, CodeGen);
4225 }
4226
4227 // Emit an implicit barrier at the end.
4228 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
4229 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_for);
4230 // Check for outer lastprivate conditional update.
4231 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4232}
4233
4234static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
4235 const Twine &Name,
4236 llvm::Value *Init = nullptr) {
4237 LValue LVal = CGF.MakeAddrLValue(Addr: CGF.CreateMemTemp(T: Ty, Name), T: Ty);
4238 if (Init)
4239 CGF.EmitStoreThroughLValue(Src: RValue::get(V: Init), Dst: LVal, /*isInit*/ true);
4240 return LVal;
4241}
4242
4243void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
4244 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4245 const auto *CS = dyn_cast<CompoundStmt>(Val: CapturedStmt);
4246 bool HasLastprivates = false;
4247 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
4248 auto &&CodeGen = [&S, CapturedStmt, CS, EKind,
4249 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
4250 const ASTContext &C = CGF.getContext();
4251 QualType KmpInt32Ty =
4252 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4253 // Emit helper vars inits.
4254 LValue LB = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.lb.",
4255 Init: CGF.Builder.getInt32(C: 0));
4256 llvm::ConstantInt *GlobalUBVal = CS != nullptr
4257 ? CGF.Builder.getInt32(C: CS->size() - 1)
4258 : CGF.Builder.getInt32(C: 0);
4259 LValue UB =
4260 createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.ub.", Init: GlobalUBVal);
4261 LValue ST = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.st.",
4262 Init: CGF.Builder.getInt32(C: 1));
4263 LValue IL = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.il.",
4264 Init: CGF.Builder.getInt32(C: 0));
4265 // Loop counter.
4266 LValue IV = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.iv.");
4267 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4268 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
4269 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4270 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
4271 // Generate condition for loop.
4272 BinaryOperator *Cond = BinaryOperator::Create(
4273 C, lhs: &IVRefExpr, rhs: &UBRefExpr, opc: BO_LE, ResTy: C.BoolTy, VK: VK_PRValue, OK: OK_Ordinary,
4274 opLoc: S.getBeginLoc(), FPFeatures: FPOptionsOverride());
4275 // Increment for loop counter.
4276 UnaryOperator *Inc = UnaryOperator::Create(
4277 C, input: &IVRefExpr, opc: UO_PreInc, type: KmpInt32Ty, VK: VK_PRValue, OK: OK_Ordinary,
4278 l: S.getBeginLoc(), CanOverflow: true, FPFeatures: FPOptionsOverride());
4279 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
4280 // Iterate through all sections and emit a switch construct:
4281 // switch (IV) {
4282 // case 0:
4283 // <SectionStmt[0]>;
4284 // break;
4285 // ...
4286 // case <NumSection> - 1:
4287 // <SectionStmt[<NumSection> - 1]>;
4288 // break;
4289 // }
4290 // .omp.sections.exit:
4291 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".omp.sections.exit");
4292 llvm::SwitchInst *SwitchStmt =
4293 CGF.Builder.CreateSwitch(V: CGF.EmitLoadOfScalar(lvalue: IV, Loc: S.getBeginLoc()),
4294 Dest: ExitBB, NumCases: CS == nullptr ? 1 : CS->size());
4295 if (CS) {
4296 unsigned CaseNumber = 0;
4297 for (const Stmt *SubStmt : CS->children()) {
4298 auto CaseBB = CGF.createBasicBlock(name: ".omp.sections.case");
4299 CGF.EmitBlock(BB: CaseBB);
4300 SwitchStmt->addCase(OnVal: CGF.Builder.getInt32(C: CaseNumber), Dest: CaseBB);
4301 CGF.EmitStmt(S: SubStmt);
4302 CGF.EmitBranch(Block: ExitBB);
4303 ++CaseNumber;
4304 }
4305 } else {
4306 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(name: ".omp.sections.case");
4307 CGF.EmitBlock(BB: CaseBB);
4308 SwitchStmt->addCase(OnVal: CGF.Builder.getInt32(C: 0), Dest: CaseBB);
4309 CGF.EmitStmt(S: CapturedStmt);
4310 CGF.EmitBranch(Block: ExitBB);
4311 }
4312 CGF.EmitBlock(BB: ExitBB, /*IsFinished=*/true);
4313 };
4314
4315 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
4316 if (CGF.EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope)) {
4317 // Emit implicit barrier to synchronize threads and avoid data races on
4318 // initialization of firstprivate variables and post-update of lastprivate
4319 // variables.
4320 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
4321 CGF, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
4322 /*ForceSimpleCall=*/true);
4323 }
4324 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
4325 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
4326 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
4327 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
4328 (void)LoopScope.Privatize();
4329 if (isOpenMPTargetExecutionDirective(DKind: EKind))
4330 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
4331
4332 // Emit static non-chunked loop.
4333 OpenMPScheduleTy ScheduleKind;
4334 ScheduleKind.Schedule = OMPC_SCHEDULE_static;
4335 CGOpenMPRuntime::StaticRTInput StaticInit(
4336 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
4337 LB.getAddress(), UB.getAddress(), ST.getAddress());
4338 CGF.CGM.getOpenMPRuntime().emitForStaticInit(CGF, Loc: S.getBeginLoc(), DKind: EKind,
4339 ScheduleKind, Values: StaticInit);
4340 // UB = min(UB, GlobalUB);
4341 llvm::Value *UBVal = CGF.EmitLoadOfScalar(lvalue: UB, Loc: S.getBeginLoc());
4342 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
4343 C: CGF.Builder.CreateICmpSLT(LHS: UBVal, RHS: GlobalUBVal), True: UBVal, False: GlobalUBVal);
4344 CGF.EmitStoreOfScalar(value: MinUBGlobalUB, lvalue: UB);
4345 // IV = LB;
4346 CGF.EmitStoreOfScalar(value: CGF.EmitLoadOfScalar(lvalue: LB, Loc: S.getBeginLoc()), lvalue: IV);
4347 // while (idx <= UB) { BODY; ++idx; }
4348 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, LoopCond: Cond, IncExpr: Inc, BodyGen,
4349 PostIncGen: [](CodeGenFunction &) {});
4350 // Tell the runtime we are done.
4351 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
4352 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, Loc: S.getEndLoc(),
4353 DKind: OMPD_sections);
4354 };
4355 CGF.OMPCancelStack.emitExit(CGF, Kind: EKind, CodeGen);
4356 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
4357 // Emit post-update of the reduction variables if IsLastIter != 0.
4358 emitPostUpdateForReductionClause(CGF, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
4359 return CGF.Builder.CreateIsNotNull(
4360 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
4361 });
4362
4363 // Emit final copy of the lastprivate variables if IsLastIter != 0.
4364 if (HasLastprivates)
4365 CGF.EmitOMPLastprivateClauseFinal(
4366 D: S, /*NoFinals=*/false,
4367 IsLastIterCond: CGF.Builder.CreateIsNotNull(
4368 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())));
4369 };
4370
4371 bool HasCancel = false;
4372 if (auto *OSD = dyn_cast<OMPSectionsDirective>(Val: &S))
4373 HasCancel = OSD->hasCancel();
4374 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(Val: &S))
4375 HasCancel = OPSD->hasCancel();
4376 OMPCancelStackRAII CancelRegion(*this, EKind, HasCancel);
4377 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_sections, CodeGen,
4378 HasCancel);
4379 // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
4380 // clause. Otherwise the barrier will be generated by the codegen for the
4381 // directive.
4382 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
4383 // Emit implicit barrier to synchronize threads and avoid data races on
4384 // initialization of firstprivate variables.
4385 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(),
4386 Kind: OMPD_unknown);
4387 }
4388}
4389
4390void CodeGenFunction::EmitOMPScopeDirective(const OMPScopeDirective &S) {
4391 {
4392 // Emit code for 'scope' region
4393 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4394 Action.Enter(CGF);
4395 OMPPrivateScope PrivateScope(CGF);
4396 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
4397 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
4398 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
4399 (void)PrivateScope.Privatize();
4400 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
4401 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
4402 };
4403 auto LPCRegion =
4404 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4405 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4406 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_scope, CodeGen);
4407 }
4408 // Emit an implicit barrier at the end.
4409 if (!S.getSingleClause<OMPNowaitClause>()) {
4410 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_scope);
4411 }
4412 // Check for outer lastprivate conditional update.
4413 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4414}
4415
4416void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
4417 if (CGM.getLangOpts().OpenMPIRBuilder) {
4418 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4419 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4420 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
4421
4422 auto FiniCB = [](InsertPointTy IP) {
4423 // Don't FinalizeOMPRegion because this is done inside of OMPIRBuilder for
4424 // sections.
4425 return llvm::Error::success();
4426 };
4427
4428 const CapturedStmt *ICS = S.getInnermostCapturedStmt();
4429 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4430 const auto *CS = dyn_cast<CompoundStmt>(Val: CapturedStmt);
4431 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
4432 if (CS) {
4433 for (const Stmt *SubStmt : CS->children()) {
4434 auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
4435 InsertPointTy CodeGenIP) {
4436 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4437 CGF&: *this, RegionBodyStmt: SubStmt, AllocaIP, CodeGenIP, RegionName: "section");
4438 return llvm::Error::success();
4439 };
4440 SectionCBVector.push_back(Elt: SectionCB);
4441 }
4442 } else {
4443 auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
4444 InsertPointTy CodeGenIP) {
4445 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4446 CGF&: *this, RegionBodyStmt: CapturedStmt, AllocaIP, CodeGenIP, RegionName: "section");
4447 return llvm::Error::success();
4448 };
4449 SectionCBVector.push_back(Elt: SectionCB);
4450 }
4451
4452 // Privatization callback that performs appropriate action for
4453 // shared/private/firstprivate/lastprivate/copyin/... variables.
4454 //
4455 // TODO: This defaults to shared right now.
4456 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
4457 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
4458 // The next line is appropriate only for variables (Val) with the
4459 // data-sharing attribute "shared".
4460 ReplVal = &Val;
4461
4462 return CodeGenIP;
4463 };
4464
4465 CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
4466 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
4467 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
4468 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
4469 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
4470 cantFail(ValOrErr: OMPBuilder.createSections(
4471 Loc: Builder, AllocaIP, SectionCBs: SectionCBVector, PrivCB, FiniCB, IsCancellable: S.hasCancel(),
4472 IsNowait: S.getSingleClause<OMPNowaitClause>()));
4473 Builder.restoreIP(IP: AfterIP);
4474 return;
4475 }
4476 {
4477 auto LPCRegion =
4478 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4479 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4480 EmitSections(S);
4481 }
4482 // Emit an implicit barrier at the end.
4483 if (!S.getSingleClause<OMPNowaitClause>()) {
4484 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(),
4485 Kind: OMPD_sections);
4486 }
4487 // Check for outer lastprivate conditional update.
4488 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4489}
4490
4491void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
4492 if (CGM.getLangOpts().OpenMPIRBuilder) {
4493 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4494 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4495
4496 const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
4497 auto FiniCB = [this](InsertPointTy IP) {
4498 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4499 return llvm::Error::success();
4500 };
4501
4502 auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
4503 InsertPointTy CodeGenIP) {
4504 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4505 CGF&: *this, RegionBodyStmt: SectionRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "section");
4506 return llvm::Error::success();
4507 };
4508
4509 LexicalScope Scope(*this, S.getSourceRange());
4510 EmitStopPoint(S: &S);
4511 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
4512 cantFail(ValOrErr: OMPBuilder.createSection(Loc: Builder, BodyGenCB, FiniCB));
4513 Builder.restoreIP(IP: AfterIP);
4514
4515 return;
4516 }
4517 LexicalScope Scope(*this, S.getSourceRange());
4518 EmitStopPoint(S: &S);
4519 EmitStmt(S: S.getAssociatedStmt());
4520}
4521
4522void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
4523 llvm::SmallVector<const Expr *, 8> CopyprivateVars;
4524 llvm::SmallVector<const Expr *, 8> DestExprs;
4525 llvm::SmallVector<const Expr *, 8> SrcExprs;
4526 llvm::SmallVector<const Expr *, 8> AssignmentOps;
4527 // Check if there are any 'copyprivate' clauses associated with this
4528 // 'single' construct.
4529 // Build a list of copyprivate variables along with helper expressions
4530 // (<source>, <destination>, <destination>=<source> expressions)
4531 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
4532 CopyprivateVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
4533 DestExprs.append(in_start: C->destination_exprs().begin(),
4534 in_end: C->destination_exprs().end());
4535 SrcExprs.append(in_start: C->source_exprs().begin(), in_end: C->source_exprs().end());
4536 AssignmentOps.append(in_start: C->assignment_ops().begin(),
4537 in_end: C->assignment_ops().end());
4538 }
4539 // Emit code for 'single' region along with 'copyprivate' clauses
4540 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4541 Action.Enter(CGF);
4542 OMPPrivateScope SingleScope(CGF);
4543 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope&: SingleScope);
4544 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: SingleScope);
4545 (void)SingleScope.Privatize();
4546 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
4547 };
4548 {
4549 auto LPCRegion =
4550 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4551 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4552 CGM.getOpenMPRuntime().emitSingleRegion(CGF&: *this, SingleOpGen: CodeGen, Loc: S.getBeginLoc(),
4553 CopyprivateVars, DestExprs,
4554 SrcExprs, AssignmentOps);
4555 }
4556 // Emit an implicit barrier at the end (to avoid data race on firstprivate
4557 // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
4558 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
4559 CGM.getOpenMPRuntime().emitBarrierCall(
4560 CGF&: *this, Loc: S.getBeginLoc(),
4561 Kind: S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
4562 }
4563 // Check for outer lastprivate conditional update.
4564 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4565}
4566
4567static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4568 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4569 Action.Enter(CGF);
4570 CGF.EmitStmt(S: S.getRawStmt());
4571 };
4572 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
4573}
4574
4575void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
4576 if (CGM.getLangOpts().OpenMPIRBuilder) {
4577 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4578 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4579
4580 const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
4581
4582 auto FiniCB = [this](InsertPointTy IP) {
4583 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4584 return llvm::Error::success();
4585 };
4586
4587 auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
4588 InsertPointTy CodeGenIP) {
4589 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4590 CGF&: *this, RegionBodyStmt: MasterRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "master");
4591 return llvm::Error::success();
4592 };
4593
4594 LexicalScope Scope(*this, S.getSourceRange());
4595 EmitStopPoint(S: &S);
4596 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
4597 cantFail(ValOrErr: OMPBuilder.createMaster(Loc: Builder, BodyGenCB, FiniCB));
4598 Builder.restoreIP(IP: AfterIP);
4599
4600 return;
4601 }
4602 LexicalScope Scope(*this, S.getSourceRange());
4603 EmitStopPoint(S: &S);
4604 emitMaster(CGF&: *this, S);
4605}
4606
4607static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4608 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4609 Action.Enter(CGF);
4610 CGF.EmitStmt(S: S.getRawStmt());
4611 };
4612 Expr *Filter = nullptr;
4613 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4614 Filter = FilterClause->getThreadID();
4615 CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, MaskedOpGen: CodeGen, Loc: S.getBeginLoc(),
4616 Filter);
4617}
4618
4619void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
4620 if (CGM.getLangOpts().OpenMPIRBuilder) {
4621 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4622 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4623
4624 const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
4625 const Expr *Filter = nullptr;
4626 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4627 Filter = FilterClause->getThreadID();
4628 llvm::Value *FilterVal = Filter
4629 ? EmitScalarExpr(E: Filter, IgnoreResultAssign: CGM.Int32Ty)
4630 : llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/0);
4631
4632 auto FiniCB = [this](InsertPointTy IP) {
4633 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4634 return llvm::Error::success();
4635 };
4636
4637 auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
4638 InsertPointTy CodeGenIP) {
4639 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4640 CGF&: *this, RegionBodyStmt: MaskedRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "masked");
4641 return llvm::Error::success();
4642 };
4643
4644 LexicalScope Scope(*this, S.getSourceRange());
4645 EmitStopPoint(S: &S);
4646 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
4647 ValOrErr: OMPBuilder.createMasked(Loc: Builder, BodyGenCB, FiniCB, Filter: FilterVal));
4648 Builder.restoreIP(IP: AfterIP);
4649
4650 return;
4651 }
4652 LexicalScope Scope(*this, S.getSourceRange());
4653 EmitStopPoint(S: &S);
4654 emitMasked(CGF&: *this, S);
4655}
4656
4657void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
4658 if (CGM.getLangOpts().OpenMPIRBuilder) {
4659 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4660 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4661
4662 const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
4663 const Expr *Hint = nullptr;
4664 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4665 Hint = HintClause->getHint();
4666
4667 // TODO: This is slightly different from what's currently being done in
4668 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4669 // about typing is final.
4670 llvm::Value *HintInst = nullptr;
4671 if (Hint)
4672 HintInst =
4673 Builder.CreateIntCast(V: EmitScalarExpr(E: Hint), DestTy: CGM.Int32Ty, isSigned: false);
4674
4675 auto FiniCB = [this](InsertPointTy IP) {
4676 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4677 return llvm::Error::success();
4678 };
4679
4680 auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
4681 InsertPointTy CodeGenIP) {
4682 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4683 CGF&: *this, RegionBodyStmt: CriticalRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "critical");
4684 return llvm::Error::success();
4685 };
4686
4687 LexicalScope Scope(*this, S.getSourceRange());
4688 EmitStopPoint(S: &S);
4689 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
4690 cantFail(ValOrErr: OMPBuilder.createCritical(Loc: Builder, BodyGenCB, FiniCB,
4691 CriticalName: S.getDirectiveName().getAsString(),
4692 HintInst));
4693 Builder.restoreIP(IP: AfterIP);
4694
4695 return;
4696 }
4697
4698 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4699 Action.Enter(CGF);
4700 CGF.EmitStmt(S: S.getAssociatedStmt());
4701 };
4702 const Expr *Hint = nullptr;
4703 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4704 Hint = HintClause->getHint();
4705 LexicalScope Scope(*this, S.getSourceRange());
4706 EmitStopPoint(S: &S);
4707 CGM.getOpenMPRuntime().emitCriticalRegion(CGF&: *this,
4708 CriticalName: S.getDirectiveName().getAsString(),
4709 CriticalOpGen: CodeGen, Loc: S.getBeginLoc(), Hint);
4710}
4711
4712void CodeGenFunction::EmitOMPParallelForDirective(
4713 const OMPParallelForDirective &S) {
4714 // Emit directive as a combined directive that consists of two implicit
4715 // directives: 'parallel' with 'for' directive.
4716 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4717 Action.Enter(CGF);
4718 emitOMPCopyinClause(CGF, S);
4719 (void)emitWorksharingDirective(CGF, S, HasCancel: S.hasCancel());
4720 };
4721 {
4722 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4723 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4724 CGCapturedStmtInfo CGSI(CR_OpenMP);
4725 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4726 OMPLoopScope LoopScope(CGF, S);
4727 return CGF.EmitScalarExpr(E: S.getNumIterations());
4728 };
4729 bool IsInscan = llvm::any_of(Range: S.getClausesOfKind<OMPReductionClause>(),
4730 P: [](const OMPReductionClause *C) {
4731 return C->getModifier() == OMPC_REDUCTION_inscan;
4732 });
4733 if (IsInscan)
4734 emitScanBasedDirectiveDecls(CGF&: *this, S, NumIteratorsGen);
4735 auto LPCRegion =
4736 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4737 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_for, CodeGen,
4738 CodeGenBoundParameters: emitEmptyBoundParameters);
4739 if (IsInscan)
4740 emitScanBasedDirectiveFinals(CGF&: *this, S, NumIteratorsGen);
4741 }
4742 // Check for outer lastprivate conditional update.
4743 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4744}
4745
4746void CodeGenFunction::EmitOMPParallelForSimdDirective(
4747 const OMPParallelForSimdDirective &S) {
4748 // Emit directive as a combined directive that consists of two implicit
4749 // directives: 'parallel' with 'for' directive.
4750 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4751 Action.Enter(CGF);
4752 emitOMPCopyinClause(CGF, S);
4753 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4754 };
4755 {
4756 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4757 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4758 CGCapturedStmtInfo CGSI(CR_OpenMP);
4759 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4760 OMPLoopScope LoopScope(CGF, S);
4761 return CGF.EmitScalarExpr(E: S.getNumIterations());
4762 };
4763 bool IsInscan = llvm::any_of(Range: S.getClausesOfKind<OMPReductionClause>(),
4764 P: [](const OMPReductionClause *C) {
4765 return C->getModifier() == OMPC_REDUCTION_inscan;
4766 });
4767 if (IsInscan)
4768 emitScanBasedDirectiveDecls(CGF&: *this, S, NumIteratorsGen);
4769 auto LPCRegion =
4770 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4771 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_for_simd, CodeGen,
4772 CodeGenBoundParameters: emitEmptyBoundParameters);
4773 if (IsInscan)
4774 emitScanBasedDirectiveFinals(CGF&: *this, S, NumIteratorsGen);
4775 }
4776 // Check for outer lastprivate conditional update.
4777 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4778}
4779
4780void CodeGenFunction::EmitOMPParallelMasterDirective(
4781 const OMPParallelMasterDirective &S) {
4782 // Emit directive as a combined directive that consists of two implicit
4783 // directives: 'parallel' with 'master' directive.
4784 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4785 Action.Enter(CGF);
4786 OMPPrivateScope PrivateScope(CGF);
4787 emitOMPCopyinClause(CGF, S);
4788 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
4789 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
4790 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
4791 (void)PrivateScope.Privatize();
4792 emitMaster(CGF, S);
4793 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
4794 };
4795 {
4796 auto LPCRegion =
4797 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4798 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_master, CodeGen,
4799 CodeGenBoundParameters: emitEmptyBoundParameters);
4800 emitPostUpdateForReductionClause(CGF&: *this, D: S,
4801 CondGen: [](CodeGenFunction &) { return nullptr; });
4802 }
4803 // Check for outer lastprivate conditional update.
4804 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4805}
4806
4807void CodeGenFunction::EmitOMPParallelMaskedDirective(
4808 const OMPParallelMaskedDirective &S) {
4809 // Emit directive as a combined directive that consists of two implicit
4810 // directives: 'parallel' with 'masked' directive.
4811 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4812 Action.Enter(CGF);
4813 OMPPrivateScope PrivateScope(CGF);
4814 emitOMPCopyinClause(CGF, S);
4815 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
4816 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
4817 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
4818 (void)PrivateScope.Privatize();
4819 emitMasked(CGF, S);
4820 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
4821 };
4822 {
4823 auto LPCRegion =
4824 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4825 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_masked, CodeGen,
4826 CodeGenBoundParameters: emitEmptyBoundParameters);
4827 emitPostUpdateForReductionClause(CGF&: *this, D: S,
4828 CondGen: [](CodeGenFunction &) { return nullptr; });
4829 }
4830 // Check for outer lastprivate conditional update.
4831 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4832}
4833
4834void CodeGenFunction::EmitOMPParallelSectionsDirective(
4835 const OMPParallelSectionsDirective &S) {
4836 // Emit directive as a combined directive that consists of two implicit
4837 // directives: 'parallel' with 'sections' directive.
4838 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4839 Action.Enter(CGF);
4840 emitOMPCopyinClause(CGF, S);
4841 CGF.EmitSections(S);
4842 };
4843 {
4844 auto LPCRegion =
4845 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4846 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_sections, CodeGen,
4847 CodeGenBoundParameters: emitEmptyBoundParameters);
4848 }
4849 // Check for outer lastprivate conditional update.
4850 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4851}
4852
4853namespace {
4854/// Get the list of variables declared in the context of the untied tasks.
4855class CheckVarsEscapingUntiedTaskDeclContext final
4856 : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
4857 llvm::SmallVector<const VarDecl *, 4> PrivateDecls;
4858
4859public:
4860 explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
4861 ~CheckVarsEscapingUntiedTaskDeclContext() = default;
4862 void VisitDeclStmt(const DeclStmt *S) {
4863 if (!S)
4864 return;
4865 // Need to privatize only local vars, static locals can be processed as is.
4866 for (const Decl *D : S->decls()) {
4867 if (const auto *VD = dyn_cast_or_null<VarDecl>(Val: D))
4868 if (VD->hasLocalStorage())
4869 PrivateDecls.push_back(Elt: VD);
4870 }
4871 }
4872 void VisitOMPExecutableDirective(const OMPExecutableDirective *) {}
4873 void VisitCapturedStmt(const CapturedStmt *) {}
4874 void VisitLambdaExpr(const LambdaExpr *) {}
4875 void VisitBlockExpr(const BlockExpr *) {}
4876 void VisitStmt(const Stmt *S) {
4877 if (!S)
4878 return;
4879 for (const Stmt *Child : S->children())
4880 if (Child)
4881 Visit(S: Child);
4882 }
4883
4884 /// Swaps list of vars with the provided one.
4885 ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
4886};
4887} // anonymous namespace
4888
4889static void buildDependences(const OMPExecutableDirective &S,
4890 OMPTaskDataTy &Data) {
4891
4892 // First look for 'omp_all_memory' and add this first.
4893 bool OmpAllMemory = false;
4894 if (llvm::any_of(
4895 Range: S.getClausesOfKind<OMPDependClause>(), P: [](const OMPDependClause *C) {
4896 return C->getDependencyKind() == OMPC_DEPEND_outallmemory ||
4897 C->getDependencyKind() == OMPC_DEPEND_inoutallmemory;
4898 })) {
4899 OmpAllMemory = true;
4900 // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are
4901 // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to
4902 // simplify.
4903 OMPTaskDataTy::DependData &DD =
4904 Data.Dependences.emplace_back(Args: OMPC_DEPEND_outallmemory,
4905 /*IteratorExpr=*/Args: nullptr);
4906 // Add a nullptr Expr to simplify the codegen in emitDependData.
4907 DD.DepExprs.push_back(Elt: nullptr);
4908 }
4909 // Add remaining dependences skipping any 'out' or 'inout' if they are
4910 // overridden by 'omp_all_memory'.
4911 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4912 OpenMPDependClauseKind Kind = C->getDependencyKind();
4913 if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory)
4914 continue;
4915 if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout))
4916 continue;
4917 OMPTaskDataTy::DependData &DD =
4918 Data.Dependences.emplace_back(Args: C->getDependencyKind(), Args: C->getModifier());
4919 DD.DepExprs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
4920 }
4921}
4922
4923void CodeGenFunction::EmitOMPTaskBasedDirective(
4924 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
4925 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
4926 OMPTaskDataTy &Data) {
4927 // Emit outlined function for task construct.
4928 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: CapturedRegion);
4929 auto I = CS->getCapturedDecl()->param_begin();
4930 auto PartId = std::next(x: I);
4931 auto TaskT = std::next(x: I, n: 4);
4932 // Check if the task is final
4933 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
4934 // If the condition constant folds and can be elided, try to avoid emitting
4935 // the condition and the dead arm of the if/else.
4936 const Expr *Cond = Clause->getCondition();
4937 bool CondConstant;
4938 if (ConstantFoldsToSimpleInteger(Cond, Result&: CondConstant))
4939 Data.Final.setInt(CondConstant);
4940 else
4941 Data.Final.setPointer(EvaluateExprAsBool(E: Cond));
4942 } else {
4943 // By default the task is not final.
4944 Data.Final.setInt(/*IntVal=*/false);
4945 }
4946 // Check if the task has 'priority' clause.
4947 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
4948 const Expr *Prio = Clause->getPriority();
4949 Data.Priority.setInt(/*IntVal=*/true);
4950 Data.Priority.setPointer(EmitScalarConversion(
4951 Src: EmitScalarExpr(E: Prio), SrcTy: Prio->getType(),
4952 DstTy: getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
4953 Loc: Prio->getExprLoc()));
4954 }
4955 // The first function argument for tasks is a thread id, the second one is a
4956 // part id (0 for tied tasks, >=0 for untied task).
4957 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
4958 // Get list of private variables.
4959 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
4960 auto IRef = C->varlist_begin();
4961 for (const Expr *IInit : C->private_copies()) {
4962 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
4963 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
4964 Data.PrivateVars.push_back(Elt: *IRef);
4965 Data.PrivateCopies.push_back(Elt: IInit);
4966 }
4967 ++IRef;
4968 }
4969 }
4970 EmittedAsPrivate.clear();
4971 // Get list of firstprivate variables.
4972 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4973 auto IRef = C->varlist_begin();
4974 auto IElemInitRef = C->inits().begin();
4975 for (const Expr *IInit : C->private_copies()) {
4976 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
4977 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
4978 Data.FirstprivateVars.push_back(Elt: *IRef);
4979 Data.FirstprivateCopies.push_back(Elt: IInit);
4980 Data.FirstprivateInits.push_back(Elt: *IElemInitRef);
4981 }
4982 ++IRef;
4983 ++IElemInitRef;
4984 }
4985 }
4986 // Get list of lastprivate variables (for taskloops).
4987 llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
4988 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
4989 auto IRef = C->varlist_begin();
4990 auto ID = C->destination_exprs().begin();
4991 for (const Expr *IInit : C->private_copies()) {
4992 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
4993 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
4994 Data.LastprivateVars.push_back(Elt: *IRef);
4995 Data.LastprivateCopies.push_back(Elt: IInit);
4996 }
4997 LastprivateDstsOrigs.insert(
4998 KV: std::make_pair(x: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ID)->getDecl()),
4999 y: cast<DeclRefExpr>(Val: *IRef)));
5000 ++IRef;
5001 ++ID;
5002 }
5003 }
5004 SmallVector<const Expr *, 4> LHSs;
5005 SmallVector<const Expr *, 4> RHSs;
5006 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
5007 Data.ReductionVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5008 Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5009 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
5010 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
5011 in_end: C->reduction_ops().end());
5012 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
5013 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
5014 }
5015 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
5016 CGF&: *this, Loc: S.getBeginLoc(), LHSExprs: LHSs, RHSExprs: RHSs, Data);
5017 // Build list of dependences.
5018 buildDependences(S, Data);
5019 // Get list of local vars for untied tasks.
5020 if (!Data.Tied) {
5021 CheckVarsEscapingUntiedTaskDeclContext Checker;
5022 Checker.Visit(S: S.getInnermostCapturedStmt()->getCapturedStmt());
5023 Data.PrivateLocals.append(in_start: Checker.getPrivateDecls().begin(),
5024 in_end: Checker.getPrivateDecls().end());
5025 }
5026 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
5027 CapturedRegion](CodeGenFunction &CGF,
5028 PrePostActionTy &Action) {
5029 llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
5030 std::pair<Address, Address>>
5031 UntiedLocalVars;
5032 // Set proper addresses for generated private copies.
5033 OMPPrivateScope Scope(CGF);
5034 // Generate debug info for variables present in shared clause.
5035 if (auto *DI = CGF.getDebugInfo()) {
5036 llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields =
5037 CGF.CapturedStmtInfo->getCaptureFields();
5038 llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue();
5039 if (CaptureFields.size() && ContextValue) {
5040 unsigned CharWidth = CGF.getContext().getCharWidth();
5041 // The shared variables are packed together as members of structure.
5042 // So the address of each shared variable can be computed by adding
5043 // offset of it (within record) to the base address of record. For each
5044 // shared variable, debug intrinsic llvm.dbg.declare is generated with
5045 // appropriate expressions (DIExpression).
5046 // Ex:
5047 // %12 = load %struct.anon*, %struct.anon** %__context.addr.i
5048 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
5049 // metadata !svar1,
5050 // metadata !DIExpression(DW_OP_deref))
5051 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
5052 // metadata !svar2,
5053 // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref))
5054 for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) {
5055 const VarDecl *SharedVar = It->first;
5056 RecordDecl *CaptureRecord = It->second->getParent();
5057 const ASTRecordLayout &Layout =
5058 CGF.getContext().getASTRecordLayout(D: CaptureRecord);
5059 unsigned Offset =
5060 Layout.getFieldOffset(FieldNo: It->second->getFieldIndex()) / CharWidth;
5061 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
5062 (void)DI->EmitDeclareOfAutoVariable(Decl: SharedVar, AI: ContextValue,
5063 Builder&: CGF.Builder, UsePointerValue: false);
5064 // Get the call dbg.declare instruction we just created and update
5065 // its DIExpression to add offset to base address.
5066 auto UpdateExpr = [](llvm::LLVMContext &Ctx, auto *Declare,
5067 unsigned Offset) {
5068 SmallVector<uint64_t, 8> Ops;
5069 // Add offset to the base address if non zero.
5070 if (Offset) {
5071 Ops.push_back(Elt: llvm::dwarf::DW_OP_plus_uconst);
5072 Ops.push_back(Elt: Offset);
5073 }
5074 Ops.push_back(Elt: llvm::dwarf::DW_OP_deref);
5075 Declare->setExpression(llvm::DIExpression::get(Context&: Ctx, Elements: Ops));
5076 };
5077 llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back();
5078 if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(Val: &Last))
5079 UpdateExpr(DDI->getContext(), DDI, Offset);
5080 // If we're emitting using the new debug info format into a block
5081 // without a terminator, the record will be "trailing".
5082 assert(!Last.isTerminator() && "unexpected terminator");
5083 if (auto *Marker =
5084 CGF.Builder.GetInsertBlock()->getTrailingDbgRecords()) {
5085 for (llvm::DbgVariableRecord &DVR : llvm::reverse(
5086 C: llvm::filterDbgVars(R: Marker->getDbgRecordRange()))) {
5087 UpdateExpr(Last.getContext(), &DVR, Offset);
5088 break;
5089 }
5090 }
5091 }
5092 }
5093 }
5094 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
5095 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
5096 !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) {
5097 enum { PrivatesParam = 2, CopyFnParam = 3 };
5098 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
5099 Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: CopyFnParam)));
5100 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(
5101 VD: CS->getCapturedDecl()->getParam(i: PrivatesParam)));
5102 // Map privates.
5103 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
5104 llvm::SmallVector<llvm::Value *, 16> CallArgs;
5105 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
5106 CallArgs.push_back(Elt: PrivatesPtr);
5107 ParamTypes.push_back(Elt: PrivatesPtr->getType());
5108 for (const Expr *E : Data.PrivateVars) {
5109 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5110 RawAddress PrivatePtr = CGF.CreateMemTemp(
5111 T: CGF.getContext().getPointerType(T: E->getType()), Name: ".priv.ptr.addr");
5112 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5113 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5114 ParamTypes.push_back(Elt: PrivatePtr.getType());
5115 }
5116 for (const Expr *E : Data.FirstprivateVars) {
5117 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5118 RawAddress PrivatePtr =
5119 CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()),
5120 Name: ".firstpriv.ptr.addr");
5121 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5122 FirstprivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5123 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5124 ParamTypes.push_back(Elt: PrivatePtr.getType());
5125 }
5126 for (const Expr *E : Data.LastprivateVars) {
5127 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5128 RawAddress PrivatePtr =
5129 CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()),
5130 Name: ".lastpriv.ptr.addr");
5131 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5132 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5133 ParamTypes.push_back(Elt: PrivatePtr.getType());
5134 }
5135 for (const VarDecl *VD : Data.PrivateLocals) {
5136 QualType Ty = VD->getType().getNonReferenceType();
5137 if (VD->getType()->isLValueReferenceType())
5138 Ty = CGF.getContext().getPointerType(T: Ty);
5139 if (isAllocatableDecl(VD))
5140 Ty = CGF.getContext().getPointerType(T: Ty);
5141 RawAddress PrivatePtr = CGF.CreateMemTemp(
5142 T: CGF.getContext().getPointerType(T: Ty), Name: ".local.ptr.addr");
5143 auto Result = UntiedLocalVars.insert(
5144 KV: std::make_pair(x&: VD, y: std::make_pair(x&: PrivatePtr, y: Address::invalid())));
5145 // If key exists update in place.
5146 if (Result.second == false)
5147 *Result.first = std::make_pair(
5148 x&: VD, y: std::make_pair(x&: PrivatePtr, y: Address::invalid()));
5149 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5150 ParamTypes.push_back(Elt: PrivatePtr.getType());
5151 }
5152 auto *CopyFnTy = llvm::FunctionType::get(Result: CGF.Builder.getVoidTy(),
5153 Params: ParamTypes, /*isVarArg=*/false);
5154 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
5155 CGF, Loc: S.getBeginLoc(), OutlinedFn: {CopyFnTy, CopyFn}, Args: CallArgs);
5156 for (const auto &Pair : LastprivateDstsOrigs) {
5157 const auto *OrigVD = cast<VarDecl>(Val: Pair.second->getDecl());
5158 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
5159 /*RefersToEnclosingVariableOrCapture=*/
5160 CGF.CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
5161 Pair.second->getType(), VK_LValue,
5162 Pair.second->getExprLoc());
5163 Scope.addPrivate(LocalVD: Pair.first, Addr: CGF.EmitLValue(E: &DRE).getAddress());
5164 }
5165 for (const auto &Pair : PrivatePtrs) {
5166 Address Replacement = Address(
5167 CGF.Builder.CreateLoad(Addr: Pair.second),
5168 CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()),
5169 CGF.getContext().getDeclAlign(D: Pair.first));
5170 Scope.addPrivate(LocalVD: Pair.first, Addr: Replacement);
5171 if (auto *DI = CGF.getDebugInfo())
5172 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
5173 (void)DI->EmitDeclareOfAutoVariable(
5174 Decl: Pair.first, AI: Pair.second.getBasePointer(), Builder&: CGF.Builder,
5175 /*UsePointerValue*/ true);
5176 }
5177 // Adjust mapping for internal locals by mapping actual memory instead of
5178 // a pointer to this memory.
5179 for (auto &Pair : UntiedLocalVars) {
5180 QualType VDType = Pair.first->getType().getNonReferenceType();
5181 if (Pair.first->getType()->isLValueReferenceType())
5182 VDType = CGF.getContext().getPointerType(T: VDType);
5183 if (isAllocatableDecl(VD: Pair.first)) {
5184 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Pair.second.first);
5185 Address Replacement(
5186 Ptr,
5187 CGF.ConvertTypeForMem(T: CGF.getContext().getPointerType(T: VDType)),
5188 CGF.getPointerAlign());
5189 Pair.second.first = Replacement;
5190 Ptr = CGF.Builder.CreateLoad(Addr: Replacement);
5191 Replacement = Address(Ptr, CGF.ConvertTypeForMem(T: VDType),
5192 CGF.getContext().getDeclAlign(D: Pair.first));
5193 Pair.second.second = Replacement;
5194 } else {
5195 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Pair.second.first);
5196 Address Replacement(Ptr, CGF.ConvertTypeForMem(T: VDType),
5197 CGF.getContext().getDeclAlign(D: Pair.first));
5198 Pair.second.first = Replacement;
5199 }
5200 }
5201 }
5202 if (Data.Reductions) {
5203 OMPPrivateScope FirstprivateScope(CGF);
5204 for (const auto &Pair : FirstprivatePtrs) {
5205 Address Replacement(
5206 CGF.Builder.CreateLoad(Addr: Pair.second),
5207 CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()),
5208 CGF.getContext().getDeclAlign(D: Pair.first));
5209 FirstprivateScope.addPrivate(LocalVD: Pair.first, Addr: Replacement);
5210 }
5211 (void)FirstprivateScope.Privatize();
5212 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
5213 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
5214 Data.ReductionCopies, Data.ReductionOps);
5215 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
5216 Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: 9)));
5217 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
5218 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
5219 RedCG.emitAggregateType(CGF, N: Cnt);
5220 // FIXME: This must removed once the runtime library is fixed.
5221 // Emit required threadprivate variables for
5222 // initializer/combiner/finalizer.
5223 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5224 RCG&: RedCG, N: Cnt);
5225 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5226 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5227 Replacement = Address(
5228 CGF.EmitScalarConversion(Src: Replacement.emitRawPointer(CGF),
5229 SrcTy: CGF.getContext().VoidPtrTy,
5230 DstTy: CGF.getContext().getPointerType(
5231 T: Data.ReductionCopies[Cnt]->getType()),
5232 Loc: Data.ReductionCopies[Cnt]->getExprLoc()),
5233 CGF.ConvertTypeForMem(T: Data.ReductionCopies[Cnt]->getType()),
5234 Replacement.getAlignment());
5235 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5236 Scope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5237 }
5238 }
5239 // Privatize all private variables except for in_reduction items.
5240 (void)Scope.Privatize();
5241 SmallVector<const Expr *, 4> InRedVars;
5242 SmallVector<const Expr *, 4> InRedPrivs;
5243 SmallVector<const Expr *, 4> InRedOps;
5244 SmallVector<const Expr *, 4> TaskgroupDescriptors;
5245 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5246 auto IPriv = C->privates().begin();
5247 auto IRed = C->reduction_ops().begin();
5248 auto ITD = C->taskgroup_descriptors().begin();
5249 for (const Expr *Ref : C->varlist()) {
5250 InRedVars.emplace_back(Args&: Ref);
5251 InRedPrivs.emplace_back(Args: *IPriv);
5252 InRedOps.emplace_back(Args: *IRed);
5253 TaskgroupDescriptors.emplace_back(Args: *ITD);
5254 std::advance(i&: IPriv, n: 1);
5255 std::advance(i&: IRed, n: 1);
5256 std::advance(i&: ITD, n: 1);
5257 }
5258 }
5259 // Privatize in_reduction items here, because taskgroup descriptors must be
5260 // privatized earlier.
5261 OMPPrivateScope InRedScope(CGF);
5262 if (!InRedVars.empty()) {
5263 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
5264 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
5265 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
5266 RedCG.emitAggregateType(CGF, N: Cnt);
5267 // The taskgroup descriptor variable is always implicit firstprivate and
5268 // privatized already during processing of the firstprivates.
5269 // FIXME: This must removed once the runtime library is fixed.
5270 // Emit required threadprivate variables for
5271 // initializer/combiner/finalizer.
5272 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5273 RCG&: RedCG, N: Cnt);
5274 llvm::Value *ReductionsPtr;
5275 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
5276 ReductionsPtr = CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: TRExpr),
5277 Loc: TRExpr->getExprLoc());
5278 } else {
5279 ReductionsPtr = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
5280 }
5281 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5282 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5283 Replacement = Address(
5284 CGF.EmitScalarConversion(
5285 Src: Replacement.emitRawPointer(CGF), SrcTy: CGF.getContext().VoidPtrTy,
5286 DstTy: CGF.getContext().getPointerType(T: InRedPrivs[Cnt]->getType()),
5287 Loc: InRedPrivs[Cnt]->getExprLoc()),
5288 CGF.ConvertTypeForMem(T: InRedPrivs[Cnt]->getType()),
5289 Replacement.getAlignment());
5290 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5291 InRedScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5292 }
5293 }
5294 (void)InRedScope.Privatize();
5295
5296 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
5297 UntiedLocalVars);
5298 Action.Enter(CGF);
5299 BodyGen(CGF);
5300 };
5301 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
5302 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
5303 D: S, ThreadIDVar: *I, PartIDVar: *PartId, TaskTVar: *TaskT, InnermostKind: EKind, CodeGen, Tied: Data.Tied, NumberOfParts&: Data.NumberOfParts);
5304 OMPLexicalScope Scope(*this, S, std::nullopt,
5305 !isOpenMPParallelDirective(DKind: EKind) &&
5306 !isOpenMPSimdDirective(DKind: EKind));
5307 TaskGen(*this, OutlinedFn, Data);
5308}
5309
5310static ImplicitParamDecl *
5311createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
5312 QualType Ty, CapturedDecl *CD,
5313 SourceLocation Loc) {
5314 auto *OrigVD = ImplicitParamDecl::Create(C, DC: CD, IdLoc: Loc, /*Id=*/nullptr, T: Ty,
5315 ParamKind: ImplicitParamKind::Other);
5316 auto *OrigRef = DeclRefExpr::Create(
5317 Context: C, QualifierLoc: NestedNameSpecifierLoc(), TemplateKWLoc: SourceLocation(), D: OrigVD,
5318 /*RefersToEnclosingVariableOrCapture=*/false, NameLoc: Loc, T: Ty, VK: VK_LValue);
5319 auto *PrivateVD = ImplicitParamDecl::Create(C, DC: CD, IdLoc: Loc, /*Id=*/nullptr, T: Ty,
5320 ParamKind: ImplicitParamKind::Other);
5321 auto *PrivateRef = DeclRefExpr::Create(
5322 Context: C, QualifierLoc: NestedNameSpecifierLoc(), TemplateKWLoc: SourceLocation(), D: PrivateVD,
5323 /*RefersToEnclosingVariableOrCapture=*/false, NameLoc: Loc, T: Ty, VK: VK_LValue);
5324 QualType ElemType = C.getBaseElementType(QT: Ty);
5325 auto *InitVD = ImplicitParamDecl::Create(C, DC: CD, IdLoc: Loc, /*Id=*/nullptr, T: ElemType,
5326 ParamKind: ImplicitParamKind::Other);
5327 auto *InitRef = DeclRefExpr::Create(
5328 Context: C, QualifierLoc: NestedNameSpecifierLoc(), TemplateKWLoc: SourceLocation(), D: InitVD,
5329 /*RefersToEnclosingVariableOrCapture=*/false, NameLoc: Loc, T: ElemType, VK: VK_LValue);
5330 PrivateVD->setInitStyle(VarDecl::CInit);
5331 PrivateVD->setInit(ImplicitCastExpr::Create(Context: C, T: ElemType, Kind: CK_LValueToRValue,
5332 Operand: InitRef, /*BasePath=*/nullptr,
5333 Cat: VK_PRValue, FPO: FPOptionsOverride()));
5334 Data.FirstprivateVars.emplace_back(Args&: OrigRef);
5335 Data.FirstprivateCopies.emplace_back(Args&: PrivateRef);
5336 Data.FirstprivateInits.emplace_back(Args&: InitRef);
5337 return OrigVD;
5338}
5339
5340void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
5341 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
5342 OMPTargetDataInfo &InputInfo) {
5343 // Emit outlined function for task construct.
5344 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_task);
5345 Address CapturedStruct = GenerateCapturedStmtArgument(S: *CS);
5346 CanQualType SharedsTy =
5347 getContext().getCanonicalTagType(TD: CS->getCapturedRecordDecl());
5348 auto I = CS->getCapturedDecl()->param_begin();
5349 auto PartId = std::next(x: I);
5350 auto TaskT = std::next(x: I, n: 4);
5351 OMPTaskDataTy Data;
5352 // The task is not final.
5353 Data.Final.setInt(/*IntVal=*/false);
5354 // Get list of firstprivate variables.
5355 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
5356 auto IRef = C->varlist_begin();
5357 auto IElemInitRef = C->inits().begin();
5358 for (auto *IInit : C->private_copies()) {
5359 Data.FirstprivateVars.push_back(Elt: *IRef);
5360 Data.FirstprivateCopies.push_back(Elt: IInit);
5361 Data.FirstprivateInits.push_back(Elt: *IElemInitRef);
5362 ++IRef;
5363 ++IElemInitRef;
5364 }
5365 }
5366 SmallVector<const Expr *, 4> LHSs;
5367 SmallVector<const Expr *, 4> RHSs;
5368 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5369 Data.ReductionVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5370 Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5371 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
5372 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
5373 in_end: C->reduction_ops().end());
5374 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
5375 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
5376 }
5377 OMPPrivateScope TargetScope(*this);
5378 VarDecl *BPVD = nullptr;
5379 VarDecl *PVD = nullptr;
5380 VarDecl *SVD = nullptr;
5381 VarDecl *MVD = nullptr;
5382 if (InputInfo.NumberOfTargetItems > 0) {
5383 auto *CD = CapturedDecl::Create(
5384 C&: getContext(), DC: getContext().getTranslationUnitDecl(), /*NumParams=*/0);
5385 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
5386 QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
5387 EltTy: getContext().VoidPtrTy, ArySize: ArrSize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
5388 /*IndexTypeQuals=*/0);
5389 BPVD = createImplicitFirstprivateForType(
5390 C&: getContext(), Data, Ty: BaseAndPointerAndMapperType, CD, Loc: S.getBeginLoc());
5391 PVD = createImplicitFirstprivateForType(
5392 C&: getContext(), Data, Ty: BaseAndPointerAndMapperType, CD, Loc: S.getBeginLoc());
5393 QualType SizesType = getContext().getConstantArrayType(
5394 EltTy: getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
5395 ArySize: ArrSize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
5396 /*IndexTypeQuals=*/0);
5397 SVD = createImplicitFirstprivateForType(C&: getContext(), Data, Ty: SizesType, CD,
5398 Loc: S.getBeginLoc());
5399 TargetScope.addPrivate(LocalVD: BPVD, Addr: InputInfo.BasePointersArray);
5400 TargetScope.addPrivate(LocalVD: PVD, Addr: InputInfo.PointersArray);
5401 TargetScope.addPrivate(LocalVD: SVD, Addr: InputInfo.SizesArray);
5402 // If there is no user-defined mapper, the mapper array will be nullptr. In
5403 // this case, we don't need to privatize it.
5404 if (!isa_and_nonnull<llvm::ConstantPointerNull>(
5405 Val: InputInfo.MappersArray.emitRawPointer(CGF&: *this))) {
5406 MVD = createImplicitFirstprivateForType(
5407 C&: getContext(), Data, Ty: BaseAndPointerAndMapperType, CD, Loc: S.getBeginLoc());
5408 TargetScope.addPrivate(LocalVD: MVD, Addr: InputInfo.MappersArray);
5409 }
5410 }
5411 (void)TargetScope.Privatize();
5412 buildDependences(S, Data);
5413 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
5414 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD, EKind,
5415 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
5416 // Set proper addresses for generated private copies.
5417 OMPPrivateScope Scope(CGF);
5418 if (!Data.FirstprivateVars.empty()) {
5419 enum { PrivatesParam = 2, CopyFnParam = 3 };
5420 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
5421 Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: CopyFnParam)));
5422 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(
5423 VD: CS->getCapturedDecl()->getParam(i: PrivatesParam)));
5424 // Map privates.
5425 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
5426 llvm::SmallVector<llvm::Value *, 16> CallArgs;
5427 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
5428 CallArgs.push_back(Elt: PrivatesPtr);
5429 ParamTypes.push_back(Elt: PrivatesPtr->getType());
5430 for (const Expr *E : Data.FirstprivateVars) {
5431 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5432 RawAddress PrivatePtr =
5433 CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()),
5434 Name: ".firstpriv.ptr.addr");
5435 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5436 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5437 ParamTypes.push_back(Elt: PrivatePtr.getType());
5438 }
5439 auto *CopyFnTy = llvm::FunctionType::get(Result: CGF.Builder.getVoidTy(),
5440 Params: ParamTypes, /*isVarArg=*/false);
5441 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
5442 CGF, Loc: S.getBeginLoc(), OutlinedFn: {CopyFnTy, CopyFn}, Args: CallArgs);
5443 for (const auto &Pair : PrivatePtrs) {
5444 Address Replacement(
5445 CGF.Builder.CreateLoad(Addr: Pair.second),
5446 CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()),
5447 CGF.getContext().getDeclAlign(D: Pair.first));
5448 Scope.addPrivate(LocalVD: Pair.first, Addr: Replacement);
5449 }
5450 }
5451 CGF.processInReduction(S, Data, CGF, CS, Scope);
5452 if (InputInfo.NumberOfTargetItems > 0) {
5453 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
5454 Addr: CGF.GetAddrOfLocalVar(VD: BPVD), /*Index=*/0);
5455 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
5456 Addr: CGF.GetAddrOfLocalVar(VD: PVD), /*Index=*/0);
5457 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
5458 Addr: CGF.GetAddrOfLocalVar(VD: SVD), /*Index=*/0);
5459 // If MVD is nullptr, the mapper array is not privatized
5460 if (MVD)
5461 InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
5462 Addr: CGF.GetAddrOfLocalVar(VD: MVD), /*Index=*/0);
5463 }
5464
5465 Action.Enter(CGF);
5466 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
5467 auto *TL = S.getSingleClause<OMPThreadLimitClause>();
5468 if (CGF.CGM.getLangOpts().OpenMP >= 51 &&
5469 needsTaskBasedThreadLimit(DKind: EKind) && TL) {
5470 // Emit __kmpc_set_thread_limit() to set the thread_limit for the task
5471 // enclosing this target region. This will indirectly set the thread_limit
5472 // for every applicable construct within target region.
5473 CGF.CGM.getOpenMPRuntime().emitThreadLimitClause(
5474 CGF, ThreadLimit: TL->getThreadLimit().front(), Loc: S.getBeginLoc());
5475 }
5476 BodyGen(CGF);
5477 };
5478 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
5479 D: S, ThreadIDVar: *I, PartIDVar: *PartId, TaskTVar: *TaskT, InnermostKind: EKind, CodeGen, /*Tied=*/true,
5480 NumberOfParts&: Data.NumberOfParts);
5481 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
5482 IntegerLiteral IfCond(getContext(), TrueOrFalse,
5483 getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/0),
5484 SourceLocation());
5485 CGM.getOpenMPRuntime().emitTaskCall(CGF&: *this, Loc: S.getBeginLoc(), D: S, TaskFunction: OutlinedFn,
5486 SharedsTy, Shareds: CapturedStruct, IfCond: &IfCond, Data);
5487}
5488
5489void CodeGenFunction::processInReduction(const OMPExecutableDirective &S,
5490 OMPTaskDataTy &Data,
5491 CodeGenFunction &CGF,
5492 const CapturedStmt *CS,
5493 OMPPrivateScope &Scope) {
5494 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
5495 if (Data.Reductions) {
5496 OpenMPDirectiveKind CapturedRegion = EKind;
5497 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
5498 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
5499 Data.ReductionCopies, Data.ReductionOps);
5500 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
5501 Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: 4)));
5502 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
5503 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
5504 RedCG.emitAggregateType(CGF, N: Cnt);
5505 // FIXME: This must removed once the runtime library is fixed.
5506 // Emit required threadprivate variables for
5507 // initializer/combiner/finalizer.
5508 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5509 RCG&: RedCG, N: Cnt);
5510 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5511 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5512 Replacement = Address(
5513 CGF.EmitScalarConversion(Src: Replacement.emitRawPointer(CGF),
5514 SrcTy: CGF.getContext().VoidPtrTy,
5515 DstTy: CGF.getContext().getPointerType(
5516 T: Data.ReductionCopies[Cnt]->getType()),
5517 Loc: Data.ReductionCopies[Cnt]->getExprLoc()),
5518 CGF.ConvertTypeForMem(T: Data.ReductionCopies[Cnt]->getType()),
5519 Replacement.getAlignment());
5520 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5521 Scope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5522 }
5523 }
5524 (void)Scope.Privatize();
5525 SmallVector<const Expr *, 4> InRedVars;
5526 SmallVector<const Expr *, 4> InRedPrivs;
5527 SmallVector<const Expr *, 4> InRedOps;
5528 SmallVector<const Expr *, 4> TaskgroupDescriptors;
5529 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5530 auto IPriv = C->privates().begin();
5531 auto IRed = C->reduction_ops().begin();
5532 auto ITD = C->taskgroup_descriptors().begin();
5533 for (const Expr *Ref : C->varlist()) {
5534 InRedVars.emplace_back(Args&: Ref);
5535 InRedPrivs.emplace_back(Args: *IPriv);
5536 InRedOps.emplace_back(Args: *IRed);
5537 TaskgroupDescriptors.emplace_back(Args: *ITD);
5538 std::advance(i&: IPriv, n: 1);
5539 std::advance(i&: IRed, n: 1);
5540 std::advance(i&: ITD, n: 1);
5541 }
5542 }
5543 OMPPrivateScope InRedScope(CGF);
5544 if (!InRedVars.empty()) {
5545 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
5546 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
5547 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
5548 RedCG.emitAggregateType(CGF, N: Cnt);
5549 // FIXME: This must removed once the runtime library is fixed.
5550 // Emit required threadprivate variables for
5551 // initializer/combiner/finalizer.
5552 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5553 RCG&: RedCG, N: Cnt);
5554 llvm::Value *ReductionsPtr;
5555 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
5556 ReductionsPtr =
5557 CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: TRExpr), Loc: TRExpr->getExprLoc());
5558 } else {
5559 ReductionsPtr = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
5560 }
5561 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5562 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5563 Replacement = Address(
5564 CGF.EmitScalarConversion(
5565 Src: Replacement.emitRawPointer(CGF), SrcTy: CGF.getContext().VoidPtrTy,
5566 DstTy: CGF.getContext().getPointerType(T: InRedPrivs[Cnt]->getType()),
5567 Loc: InRedPrivs[Cnt]->getExprLoc()),
5568 CGF.ConvertTypeForMem(T: InRedPrivs[Cnt]->getType()),
5569 Replacement.getAlignment());
5570 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5571 InRedScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5572 }
5573 }
5574 (void)InRedScope.Privatize();
5575}
5576
5577void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
5578 // Emit outlined function for task construct.
5579 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_task);
5580 Address CapturedStruct = GenerateCapturedStmtArgument(S: *CS);
5581 CanQualType SharedsTy =
5582 getContext().getCanonicalTagType(TD: CS->getCapturedRecordDecl());
5583 const Expr *IfCond = nullptr;
5584 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
5585 if (C->getNameModifier() == OMPD_unknown ||
5586 C->getNameModifier() == OMPD_task) {
5587 IfCond = C->getCondition();
5588 break;
5589 }
5590 }
5591
5592 OMPTaskDataTy Data;
5593 // Check if we should emit tied or untied task.
5594 Data.Tied = !S.getSingleClause<OMPUntiedClause>();
5595 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
5596 CGF.EmitStmt(S: CS->getCapturedStmt());
5597 };
5598 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
5599 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
5600 const OMPTaskDataTy &Data) {
5601 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, Loc: S.getBeginLoc(), D: S, TaskFunction: OutlinedFn,
5602 SharedsTy, Shareds: CapturedStruct, IfCond,
5603 Data);
5604 };
5605 auto LPCRegion =
5606 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
5607 EmitOMPTaskBasedDirective(S, CapturedRegion: OMPD_task, BodyGen, TaskGen, Data);
5608}
5609
5610void CodeGenFunction::EmitOMPTaskyieldDirective(
5611 const OMPTaskyieldDirective &S) {
5612 CGM.getOpenMPRuntime().emitTaskyieldCall(CGF&: *this, Loc: S.getBeginLoc());
5613}
5614
5615void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) {
5616 const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>();
5617 Expr *ME = MC ? MC->getMessageString() : nullptr;
5618 const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>();
5619 bool IsFatal = false;
5620 if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal)
5621 IsFatal = true;
5622 CGM.getOpenMPRuntime().emitErrorCall(CGF&: *this, Loc: S.getBeginLoc(), ME, IsFatal);
5623}
5624
5625void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
5626 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_barrier);
5627}
5628
5629void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
5630 OMPTaskDataTy Data;
5631 // Build list of dependences
5632 buildDependences(S, Data);
5633 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
5634 CGM.getOpenMPRuntime().emitTaskwaitCall(CGF&: *this, Loc: S.getBeginLoc(), Data);
5635}
5636
5637static bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) {
5638 return T.clauses().empty();
5639}
5640
5641void CodeGenFunction::EmitOMPTaskgroupDirective(
5642 const OMPTaskgroupDirective &S) {
5643 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5644 if (CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(T: S)) {
5645 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5646 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5647 InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
5648 AllocaInsertPt->getIterator());
5649
5650 auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
5651 InsertPointTy CodeGenIP) {
5652 Builder.restoreIP(IP: CodeGenIP);
5653 EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
5654 return llvm::Error::success();
5655 };
5656 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
5657 if (!CapturedStmtInfo)
5658 CapturedStmtInfo = &CapStmtInfo;
5659 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
5660 cantFail(ValOrErr: OMPBuilder.createTaskgroup(Loc: Builder, AllocaIP, BodyGenCB));
5661 Builder.restoreIP(IP: AfterIP);
5662 return;
5663 }
5664 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5665 Action.Enter(CGF);
5666 if (const Expr *E = S.getReductionRef()) {
5667 SmallVector<const Expr *, 4> LHSs;
5668 SmallVector<const Expr *, 4> RHSs;
5669 OMPTaskDataTy Data;
5670 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
5671 Data.ReductionVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5672 Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5673 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
5674 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
5675 in_end: C->reduction_ops().end());
5676 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
5677 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
5678 }
5679 llvm::Value *ReductionDesc =
5680 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, Loc: S.getBeginLoc(),
5681 LHSExprs: LHSs, RHSExprs: RHSs, Data);
5682 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5683 CGF.EmitVarDecl(D: *VD);
5684 CGF.EmitStoreOfScalar(Value: ReductionDesc, Addr: CGF.GetAddrOfLocalVar(VD),
5685 /*Volatile=*/false, Ty: E->getType());
5686 }
5687 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
5688 };
5689 CGM.getOpenMPRuntime().emitTaskgroupRegion(CGF&: *this, TaskgroupOpGen: CodeGen, Loc: S.getBeginLoc());
5690}
5691
5692void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
5693 llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
5694 ? llvm::AtomicOrdering::NotAtomic
5695 : llvm::AtomicOrdering::AcquireRelease;
5696 CGM.getOpenMPRuntime().emitFlush(
5697 CGF&: *this,
5698 Vars: [&S]() -> ArrayRef<const Expr *> {
5699 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
5700 return llvm::ArrayRef(FlushClause->varlist_begin(),
5701 FlushClause->varlist_end());
5702 return {};
5703 }(),
5704 Loc: S.getBeginLoc(), AO);
5705}
5706
5707void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
5708 const auto *DO = S.getSingleClause<OMPDepobjClause>();
5709 LValue DOLVal = EmitLValue(E: DO->getDepobj());
5710 if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
5711 // Build list and emit dependences
5712 OMPTaskDataTy Data;
5713 buildDependences(S, Data);
5714 for (auto &Dep : Data.Dependences) {
5715 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
5716 CGF&: *this, Dependencies: Dep, Loc: DC->getBeginLoc());
5717 EmitStoreOfScalar(value: DepAddr.emitRawPointer(CGF&: *this), lvalue: DOLVal);
5718 }
5719 return;
5720 }
5721 if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
5722 CGM.getOpenMPRuntime().emitDestroyClause(CGF&: *this, DepobjLVal: DOLVal, Loc: DC->getBeginLoc());
5723 return;
5724 }
5725 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
5726 CGM.getOpenMPRuntime().emitUpdateClause(
5727 CGF&: *this, DepobjLVal: DOLVal, NewDepKind: UC->getDependencyKind(), Loc: UC->getBeginLoc());
5728 return;
5729 }
5730}
5731
5732void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
5733 if (!OMPParentLoopDirectiveForScan)
5734 return;
5735 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
5736 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
5737 SmallVector<const Expr *, 4> Shareds;
5738 SmallVector<const Expr *, 4> Privates;
5739 SmallVector<const Expr *, 4> LHSs;
5740 SmallVector<const Expr *, 4> RHSs;
5741 SmallVector<const Expr *, 4> ReductionOps;
5742 SmallVector<const Expr *, 4> CopyOps;
5743 SmallVector<const Expr *, 4> CopyArrayTemps;
5744 SmallVector<const Expr *, 4> CopyArrayElems;
5745 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
5746 if (C->getModifier() != OMPC_REDUCTION_inscan)
5747 continue;
5748 Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5749 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
5750 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
5751 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
5752 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
5753 CopyOps.append(in_start: C->copy_ops().begin(), in_end: C->copy_ops().end());
5754 CopyArrayTemps.append(in_start: C->copy_array_temps().begin(),
5755 in_end: C->copy_array_temps().end());
5756 CopyArrayElems.append(in_start: C->copy_array_elems().begin(),
5757 in_end: C->copy_array_elems().end());
5758 }
5759 if (ParentDir.getDirectiveKind() == OMPD_simd ||
5760 (getLangOpts().OpenMPSimd &&
5761 isOpenMPSimdDirective(DKind: ParentDir.getDirectiveKind()))) {
5762 // For simd directive and simd-based directives in simd only mode, use the
5763 // following codegen:
5764 // int x = 0;
5765 // #pragma omp simd reduction(inscan, +: x)
5766 // for (..) {
5767 // <first part>
5768 // #pragma omp scan inclusive(x)
5769 // <second part>
5770 // }
5771 // is transformed to:
5772 // int x = 0;
5773 // for (..) {
5774 // int x_priv = 0;
5775 // <first part>
5776 // x = x_priv + x;
5777 // x_priv = x;
5778 // <second part>
5779 // }
5780 // and
5781 // int x = 0;
5782 // #pragma omp simd reduction(inscan, +: x)
5783 // for (..) {
5784 // <first part>
5785 // #pragma omp scan exclusive(x)
5786 // <second part>
5787 // }
5788 // to
5789 // int x = 0;
5790 // for (..) {
5791 // int x_priv = 0;
5792 // <second part>
5793 // int temp = x;
5794 // x = x_priv + x;
5795 // x_priv = temp;
5796 // <first part>
5797 // }
5798 llvm::BasicBlock *OMPScanReduce = createBasicBlock(name: "omp.inscan.reduce");
5799 EmitBranch(Block: IsInclusive
5800 ? OMPScanReduce
5801 : BreakContinueStack.back().ContinueBlock.getBlock());
5802 EmitBlock(BB: OMPScanDispatch);
5803 {
5804 // New scope for correct construction/destruction of temp variables for
5805 // exclusive scan.
5806 LexicalScope Scope(*this, S.getSourceRange());
5807 EmitBranch(Block: IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
5808 EmitBlock(BB: OMPScanReduce);
5809 if (!IsInclusive) {
5810 // Create temp var and copy LHS value to this temp value.
5811 // TMP = LHS;
5812 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5813 const Expr *PrivateExpr = Privates[I];
5814 const Expr *TempExpr = CopyArrayTemps[I];
5815 EmitAutoVarDecl(
5816 D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: TempExpr)->getDecl()));
5817 LValue DestLVal = EmitLValue(E: TempExpr);
5818 LValue SrcLVal = EmitLValue(E: LHSs[I]);
5819 EmitOMPCopy(OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(),
5820 SrcAddr: SrcLVal.getAddress(),
5821 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
5822 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()),
5823 Copy: CopyOps[I]);
5824 }
5825 }
5826 CGM.getOpenMPRuntime().emitReduction(
5827 CGF&: *this, Loc: ParentDir.getEndLoc(), Privates, LHSExprs: LHSs, RHSExprs: RHSs, ReductionOps,
5828 Options: {/*WithNowait=*/true, /*SimpleReduction=*/true,
5829 /*IsPrivateVarReduction*/ {}, .ReductionKind: OMPD_simd});
5830 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5831 const Expr *PrivateExpr = Privates[I];
5832 LValue DestLVal;
5833 LValue SrcLVal;
5834 if (IsInclusive) {
5835 DestLVal = EmitLValue(E: RHSs[I]);
5836 SrcLVal = EmitLValue(E: LHSs[I]);
5837 } else {
5838 const Expr *TempExpr = CopyArrayTemps[I];
5839 DestLVal = EmitLValue(E: RHSs[I]);
5840 SrcLVal = EmitLValue(E: TempExpr);
5841 }
5842 EmitOMPCopy(
5843 OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(),
5844 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
5845 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]);
5846 }
5847 }
5848 EmitBranch(Block: IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
5849 OMPScanExitBlock = IsInclusive
5850 ? BreakContinueStack.back().ContinueBlock.getBlock()
5851 : OMPScanReduce;
5852 EmitBlock(BB: OMPAfterScanBlock);
5853 return;
5854 }
5855 if (!IsInclusive) {
5856 EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock());
5857 EmitBlock(BB: OMPScanExitBlock);
5858 }
5859 if (OMPFirstScanLoop) {
5860 // Emit buffer[i] = red; at the end of the input phase.
5861 const auto *IVExpr = cast<OMPLoopDirective>(Val: ParentDir)
5862 .getIterationVariable()
5863 ->IgnoreParenImpCasts();
5864 LValue IdxLVal = EmitLValue(E: IVExpr);
5865 llvm::Value *IdxVal = EmitLoadOfScalar(lvalue: IdxLVal, Loc: IVExpr->getExprLoc());
5866 IdxVal = Builder.CreateIntCast(V: IdxVal, DestTy: SizeTy, /*isSigned=*/false);
5867 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5868 const Expr *PrivateExpr = Privates[I];
5869 const Expr *OrigExpr = Shareds[I];
5870 const Expr *CopyArrayElem = CopyArrayElems[I];
5871 OpaqueValueMapping IdxMapping(
5872 *this,
5873 cast<OpaqueValueExpr>(
5874 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
5875 RValue::get(V: IdxVal));
5876 LValue DestLVal = EmitLValue(E: CopyArrayElem);
5877 LValue SrcLVal = EmitLValue(E: OrigExpr);
5878 EmitOMPCopy(
5879 OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(),
5880 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
5881 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]);
5882 }
5883 }
5884 EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock());
5885 if (IsInclusive) {
5886 EmitBlock(BB: OMPScanExitBlock);
5887 EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock());
5888 }
5889 EmitBlock(BB: OMPScanDispatch);
5890 if (!OMPFirstScanLoop) {
5891 // Emit red = buffer[i]; at the entrance to the scan phase.
5892 const auto *IVExpr = cast<OMPLoopDirective>(Val: ParentDir)
5893 .getIterationVariable()
5894 ->IgnoreParenImpCasts();
5895 LValue IdxLVal = EmitLValue(E: IVExpr);
5896 llvm::Value *IdxVal = EmitLoadOfScalar(lvalue: IdxLVal, Loc: IVExpr->getExprLoc());
5897 IdxVal = Builder.CreateIntCast(V: IdxVal, DestTy: SizeTy, /*isSigned=*/false);
5898 llvm::BasicBlock *ExclusiveExitBB = nullptr;
5899 if (!IsInclusive) {
5900 llvm::BasicBlock *ContBB = createBasicBlock(name: "omp.exclusive.dec");
5901 ExclusiveExitBB = createBasicBlock(name: "omp.exclusive.copy.exit");
5902 llvm::Value *Cmp = Builder.CreateIsNull(Arg: IdxVal);
5903 Builder.CreateCondBr(Cond: Cmp, True: ExclusiveExitBB, False: ContBB);
5904 EmitBlock(BB: ContBB);
5905 // Use idx - 1 iteration for exclusive scan.
5906 IdxVal = Builder.CreateNUWSub(LHS: IdxVal, RHS: llvm::ConstantInt::get(Ty: SizeTy, V: 1));
5907 }
5908 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5909 const Expr *PrivateExpr = Privates[I];
5910 const Expr *OrigExpr = Shareds[I];
5911 const Expr *CopyArrayElem = CopyArrayElems[I];
5912 OpaqueValueMapping IdxMapping(
5913 *this,
5914 cast<OpaqueValueExpr>(
5915 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
5916 RValue::get(V: IdxVal));
5917 LValue SrcLVal = EmitLValue(E: CopyArrayElem);
5918 LValue DestLVal = EmitLValue(E: OrigExpr);
5919 EmitOMPCopy(
5920 OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(),
5921 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
5922 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]);
5923 }
5924 if (!IsInclusive) {
5925 EmitBlock(BB: ExclusiveExitBB);
5926 }
5927 }
5928 EmitBranch(Block: (OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
5929 : OMPAfterScanBlock);
5930 EmitBlock(BB: OMPAfterScanBlock);
5931}
5932
5933void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
5934 const CodeGenLoopTy &CodeGenLoop,
5935 Expr *IncExpr) {
5936 // Emit the loop iteration variable.
5937 const auto *IVExpr = cast<DeclRefExpr>(Val: S.getIterationVariable());
5938 const auto *IVDecl = cast<VarDecl>(Val: IVExpr->getDecl());
5939 EmitVarDecl(D: *IVDecl);
5940
5941 // Emit the iterations count variable.
5942 // If it is not a variable, Sema decided to calculate iterations count on each
5943 // iteration (e.g., it is foldable into a constant).
5944 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
5945 EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
5946 // Emit calculation of the iterations count.
5947 EmitIgnoredExpr(E: S.getCalcLastIteration());
5948 }
5949
5950 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
5951
5952 bool HasLastprivateClause = false;
5953 // Check pre-condition.
5954 {
5955 OMPLoopScope PreInitScope(*this, S);
5956 // Skip the entire loop if we don't meet the precondition.
5957 // If the condition constant folds and can be elided, avoid emitting the
5958 // whole loop.
5959 bool CondConstant;
5960 llvm::BasicBlock *ContBlock = nullptr;
5961 if (ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
5962 if (!CondConstant)
5963 return;
5964 } else {
5965 llvm::BasicBlock *ThenBlock = createBasicBlock(name: "omp.precond.then");
5966 ContBlock = createBasicBlock(name: "omp.precond.end");
5967 emitPreCond(CGF&: *this, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
5968 TrueCount: getProfileCount(S: &S));
5969 EmitBlock(BB: ThenBlock);
5970 incrementProfileCounter(S: &S);
5971 }
5972
5973 emitAlignedClause(CGF&: *this, D: S);
5974 // Emit 'then' code.
5975 {
5976 // Emit helper vars inits.
5977
5978 LValue LB = EmitOMPHelperVar(
5979 CGF&: *this, Helper: cast<DeclRefExpr>(
5980 Val: (isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
5981 ? S.getCombinedLowerBoundVariable()
5982 : S.getLowerBoundVariable())));
5983 LValue UB = EmitOMPHelperVar(
5984 CGF&: *this, Helper: cast<DeclRefExpr>(
5985 Val: (isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
5986 ? S.getCombinedUpperBoundVariable()
5987 : S.getUpperBoundVariable())));
5988 LValue ST =
5989 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable()));
5990 LValue IL =
5991 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable()));
5992
5993 OMPPrivateScope LoopScope(*this);
5994 if (EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope)) {
5995 // Emit implicit barrier to synchronize threads and avoid data races
5996 // on initialization of firstprivate variables and post-update of
5997 // lastprivate variables.
5998 CGM.getOpenMPRuntime().emitBarrierCall(
5999 CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
6000 /*ForceSimpleCall=*/true);
6001 }
6002 EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
6003 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()) &&
6004 !isOpenMPParallelDirective(DKind: S.getDirectiveKind()) &&
6005 !isOpenMPTeamsDirective(DKind: S.getDirectiveKind()))
6006 EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
6007 HasLastprivateClause = EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
6008 EmitOMPPrivateLoopCounters(S, LoopScope);
6009 (void)LoopScope.Privatize();
6010 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
6011 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF&: *this, D: S);
6012
6013 // Detect the distribute schedule kind and chunk.
6014 llvm::Value *Chunk = nullptr;
6015 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
6016 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
6017 ScheduleKind = C->getDistScheduleKind();
6018 if (const Expr *Ch = C->getChunkSize()) {
6019 Chunk = EmitScalarExpr(E: Ch);
6020 Chunk = EmitScalarConversion(Src: Chunk, SrcTy: Ch->getType(),
6021 DstTy: S.getIterationVariable()->getType(),
6022 Loc: S.getBeginLoc());
6023 }
6024 } else {
6025 // Default behaviour for dist_schedule clause.
6026 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
6027 CGF&: *this, S, ScheduleKind, Chunk);
6028 }
6029 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
6030 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
6031
6032 // OpenMP [2.10.8, distribute Construct, Description]
6033 // If dist_schedule is specified, kind must be static. If specified,
6034 // iterations are divided into chunks of size chunk_size, chunks are
6035 // assigned to the teams of the league in a round-robin fashion in the
6036 // order of the team number. When no chunk_size is specified, the
6037 // iteration space is divided into chunks that are approximately equal
6038 // in size, and at most one chunk is distributed to each team of the
6039 // league. The size of the chunks is unspecified in this case.
6040 bool StaticChunked =
6041 RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
6042 isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind());
6043 if (RT.isStaticNonchunked(ScheduleKind,
6044 /* Chunked */ Chunk != nullptr) ||
6045 StaticChunked) {
6046 CGOpenMPRuntime::StaticRTInput StaticInit(
6047 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
6048 LB.getAddress(), UB.getAddress(), ST.getAddress(),
6049 StaticChunked ? Chunk : nullptr);
6050 RT.emitDistributeStaticInit(CGF&: *this, Loc: S.getBeginLoc(), SchedKind: ScheduleKind,
6051 Values: StaticInit);
6052 JumpDest LoopExit =
6053 getJumpDestInCurrentScope(Target: createBasicBlock(name: "omp.loop.exit"));
6054 // UB = min(UB, GlobalUB);
6055 EmitIgnoredExpr(E: isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
6056 ? S.getCombinedEnsureUpperBound()
6057 : S.getEnsureUpperBound());
6058 // IV = LB;
6059 EmitIgnoredExpr(E: isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
6060 ? S.getCombinedInit()
6061 : S.getInit());
6062
6063 const Expr *Cond =
6064 isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
6065 ? S.getCombinedCond()
6066 : S.getCond();
6067
6068 if (StaticChunked)
6069 Cond = S.getCombinedDistCond();
6070
6071 // For static unchunked schedules generate:
6072 //
6073 // 1. For distribute alone, codegen
6074 // while (idx <= UB) {
6075 // BODY;
6076 // ++idx;
6077 // }
6078 //
6079 // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
6080 // while (idx <= UB) {
6081 // <CodeGen rest of pragma>(LB, UB);
6082 // idx += ST;
6083 // }
6084 //
6085 // For static chunk one schedule generate:
6086 //
6087 // while (IV <= GlobalUB) {
6088 // <CodeGen rest of pragma>(LB, UB);
6089 // LB += ST;
6090 // UB += ST;
6091 // UB = min(UB, GlobalUB);
6092 // IV = LB;
6093 // }
6094 //
6095 emitCommonSimdLoop(
6096 CGF&: *this, S,
6097 SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6098 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()))
6099 CGF.EmitOMPSimdInit(D: S);
6100 },
6101 BodyCodeGen: [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
6102 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
6103 CGF.EmitOMPInnerLoop(
6104 S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: Cond, IncExpr,
6105 BodyGen: [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
6106 CodeGenLoop(CGF, S, LoopExit);
6107 },
6108 PostIncGen: [&S, StaticChunked](CodeGenFunction &CGF) {
6109 if (StaticChunked) {
6110 CGF.EmitIgnoredExpr(E: S.getCombinedNextLowerBound());
6111 CGF.EmitIgnoredExpr(E: S.getCombinedNextUpperBound());
6112 CGF.EmitIgnoredExpr(E: S.getCombinedEnsureUpperBound());
6113 CGF.EmitIgnoredExpr(E: S.getCombinedInit());
6114 }
6115 });
6116 });
6117 EmitBlock(BB: LoopExit.getBlock());
6118 // Tell the runtime we are done.
6119 RT.emitForStaticFinish(CGF&: *this, Loc: S.getEndLoc(), DKind: OMPD_distribute);
6120 } else {
6121 // Emit the outer loop, which requests its work chunk [LB..UB] from
6122 // runtime and runs the inner loop to process it.
6123 const OMPLoopArguments LoopArguments = {
6124 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
6125 Chunk};
6126 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArgs: LoopArguments,
6127 CodeGenLoopContent: CodeGenLoop);
6128 }
6129 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind())) {
6130 EmitOMPSimdFinal(D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
6131 return CGF.Builder.CreateIsNotNull(
6132 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
6133 });
6134 }
6135 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()) &&
6136 !isOpenMPParallelDirective(DKind: S.getDirectiveKind()) &&
6137 !isOpenMPTeamsDirective(DKind: S.getDirectiveKind())) {
6138 EmitOMPReductionClauseFinal(D: S, ReductionKind: OMPD_simd);
6139 // Emit post-update of the reduction variables if IsLastIter != 0.
6140 emitPostUpdateForReductionClause(
6141 CGF&: *this, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
6142 return CGF.Builder.CreateIsNotNull(
6143 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
6144 });
6145 }
6146 // Emit final copy of the lastprivate variables if IsLastIter != 0.
6147 if (HasLastprivateClause) {
6148 EmitOMPLastprivateClauseFinal(
6149 D: S, /*NoFinals=*/false,
6150 IsLastIterCond: Builder.CreateIsNotNull(Arg: EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())));
6151 }
6152 }
6153
6154 // We're now done with the loop, so jump to the continuation block.
6155 if (ContBlock) {
6156 EmitBranch(Block: ContBlock);
6157 EmitBlock(BB: ContBlock, IsFinished: true);
6158 }
6159 }
6160}
6161
6162// Pass OMPLoopDirective (instead of OMPDistributeDirective) to make this
6163// function available for "loop bind(teams)", which maps to "distribute".
6164static void emitOMPDistributeDirective(const OMPLoopDirective &S,
6165 CodeGenFunction &CGF,
6166 CodeGenModule &CGM) {
6167 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6168 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
6169 };
6170 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
6171 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute, CodeGen);
6172}
6173
6174void CodeGenFunction::EmitOMPDistributeDirective(
6175 const OMPDistributeDirective &S) {
6176 emitOMPDistributeDirective(S, CGF&: *this, CGM);
6177}
6178
6179static llvm::Function *
6180emitOutlinedOrderedFunction(CodeGenModule &CGM, const CapturedStmt *S,
6181 const OMPExecutableDirective &D) {
6182 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
6183 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
6184 CGF.CapturedStmtInfo = &CapStmtInfo;
6185 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(S: *S, D);
6186 Fn->setDoesNotRecurse();
6187 return Fn;
6188}
6189
6190template <typename T>
6191static void emitRestoreIP(CodeGenFunction &CGF, const T *C,
6192 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
6193 llvm::OpenMPIRBuilder &OMPBuilder) {
6194
6195 unsigned NumLoops = C->getNumLoops();
6196 QualType Int64Ty = CGF.CGM.getContext().getIntTypeForBitwidth(
6197 /*DestWidth=*/64, /*Signed=*/1);
6198 llvm::SmallVector<llvm::Value *> StoreValues;
6199 for (unsigned I = 0; I < NumLoops; I++) {
6200 const Expr *CounterVal = C->getLoopData(I);
6201 assert(CounterVal);
6202 llvm::Value *StoreValue = CGF.EmitScalarConversion(
6203 Src: CGF.EmitScalarExpr(E: CounterVal), SrcTy: CounterVal->getType(), DstTy: Int64Ty,
6204 Loc: CounterVal->getExprLoc());
6205 StoreValues.emplace_back(Args&: StoreValue);
6206 }
6207 OMPDoacrossKind<T> ODK;
6208 bool IsDependSource = ODK.isSource(C);
6209 CGF.Builder.restoreIP(
6210 IP: OMPBuilder.createOrderedDepend(Loc: CGF.Builder, AllocaIP, NumLoops,
6211 StoreValues, Name: ".cnt.addr", IsDependSource));
6212}
6213
6214void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
6215 if (CGM.getLangOpts().OpenMPIRBuilder) {
6216 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
6217 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
6218
6219 if (S.hasClausesOfKind<OMPDependClause>() ||
6220 S.hasClausesOfKind<OMPDoacrossClause>()) {
6221 // The ordered directive with depend clause.
6222 assert(!S.hasAssociatedStmt() && "No associated statement must be in "
6223 "ordered depend|doacross construct.");
6224 InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
6225 AllocaInsertPt->getIterator());
6226 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
6227 emitRestoreIP(CGF&: *this, C: DC, AllocaIP, OMPBuilder);
6228 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
6229 emitRestoreIP(CGF&: *this, C: DC, AllocaIP, OMPBuilder);
6230 } else {
6231 // The ordered directive with threads or simd clause, or without clause.
6232 // Without clause, it behaves as if the threads clause is specified.
6233 const auto *C = S.getSingleClause<OMPSIMDClause>();
6234
6235 auto FiniCB = [this](InsertPointTy IP) {
6236 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
6237 return llvm::Error::success();
6238 };
6239
6240 auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP,
6241 InsertPointTy CodeGenIP) {
6242 Builder.restoreIP(IP: CodeGenIP);
6243
6244 const CapturedStmt *CS = S.getInnermostCapturedStmt();
6245 if (C) {
6246 llvm::BasicBlock *FiniBB = splitBBWithSuffix(
6247 Builder, /*CreateBranch=*/false, Suffix: ".ordered.after");
6248 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6249 GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
6250 llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, S: CS, D: S);
6251 assert(S.getBeginLoc().isValid() &&
6252 "Outlined function call location must be valid.");
6253 ApplyDebugLocation::CreateDefaultArtificial(CGF&: *this, TemporaryLocation: S.getBeginLoc());
6254 OMPBuilderCBHelpers::EmitCaptureStmt(CGF&: *this, CodeGenIP, FiniBB&: *FiniBB,
6255 Fn: OutlinedFn, Args: CapturedVars);
6256 } else {
6257 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
6258 CGF&: *this, RegionBodyStmt: CS->getCapturedStmt(), AllocaIP, CodeGenIP, RegionName: "ordered");
6259 }
6260 return llvm::Error::success();
6261 };
6262
6263 OMPLexicalScope Scope(*this, S, OMPD_unknown);
6264 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
6265 ValOrErr: OMPBuilder.createOrderedThreadsSimd(Loc: Builder, BodyGenCB, FiniCB, IsThreads: !C));
6266 Builder.restoreIP(IP: AfterIP);
6267 }
6268 return;
6269 }
6270
6271 if (S.hasClausesOfKind<OMPDependClause>()) {
6272 assert(!S.hasAssociatedStmt() &&
6273 "No associated statement must be in ordered depend construct.");
6274 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
6275 CGM.getOpenMPRuntime().emitDoacrossOrdered(CGF&: *this, C: DC);
6276 return;
6277 }
6278 if (S.hasClausesOfKind<OMPDoacrossClause>()) {
6279 assert(!S.hasAssociatedStmt() &&
6280 "No associated statement must be in ordered doacross construct.");
6281 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
6282 CGM.getOpenMPRuntime().emitDoacrossOrdered(CGF&: *this, C: DC);
6283 return;
6284 }
6285 const auto *C = S.getSingleClause<OMPSIMDClause>();
6286 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
6287 PrePostActionTy &Action) {
6288 const CapturedStmt *CS = S.getInnermostCapturedStmt();
6289 if (C) {
6290 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6291 CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
6292 llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, S: CS, D: S);
6293 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc: S.getBeginLoc(),
6294 OutlinedFn, Args: CapturedVars);
6295 } else {
6296 Action.Enter(CGF);
6297 CGF.EmitStmt(S: CS->getCapturedStmt());
6298 }
6299 };
6300 OMPLexicalScope Scope(*this, S, OMPD_unknown);
6301 CGM.getOpenMPRuntime().emitOrderedRegion(CGF&: *this, OrderedOpGen: CodeGen, Loc: S.getBeginLoc(), IsThreads: !C);
6302}
6303
6304static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
6305 QualType SrcType, QualType DestType,
6306 SourceLocation Loc) {
6307 assert(CGF.hasScalarEvaluationKind(DestType) &&
6308 "DestType must have scalar evaluation kind.");
6309 assert(!Val.isAggregate() && "Must be a scalar or complex.");
6310 return Val.isScalar() ? CGF.EmitScalarConversion(Src: Val.getScalarVal(), SrcTy: SrcType,
6311 DstTy: DestType, Loc)
6312 : CGF.EmitComplexToScalarConversion(
6313 Src: Val.getComplexVal(), SrcTy: SrcType, DstTy: DestType, Loc);
6314}
6315
6316static CodeGenFunction::ComplexPairTy
6317convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
6318 QualType DestType, SourceLocation Loc) {
6319 assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
6320 "DestType must have complex evaluation kind.");
6321 CodeGenFunction::ComplexPairTy ComplexVal;
6322 if (Val.isScalar()) {
6323 // Convert the input element to the element type of the complex.
6324 QualType DestElementType =
6325 DestType->castAs<ComplexType>()->getElementType();
6326 llvm::Value *ScalarVal = CGF.EmitScalarConversion(
6327 Src: Val.getScalarVal(), SrcTy: SrcType, DstTy: DestElementType, Loc);
6328 ComplexVal = CodeGenFunction::ComplexPairTy(
6329 ScalarVal, llvm::Constant::getNullValue(Ty: ScalarVal->getType()));
6330 } else {
6331 assert(Val.isComplex() && "Must be a scalar or complex.");
6332 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
6333 QualType DestElementType =
6334 DestType->castAs<ComplexType>()->getElementType();
6335 ComplexVal.first = CGF.EmitScalarConversion(
6336 Src: Val.getComplexVal().first, SrcTy: SrcElementType, DstTy: DestElementType, Loc);
6337 ComplexVal.second = CGF.EmitScalarConversion(
6338 Src: Val.getComplexVal().second, SrcTy: SrcElementType, DstTy: DestElementType, Loc);
6339 }
6340 return ComplexVal;
6341}
6342
6343static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6344 LValue LVal, RValue RVal) {
6345 if (LVal.isGlobalReg())
6346 CGF.EmitStoreThroughGlobalRegLValue(Src: RVal, Dst: LVal);
6347 else
6348 CGF.EmitAtomicStore(rvalue: RVal, lvalue: LVal, AO, IsVolatile: LVal.isVolatile(), /*isInit=*/false);
6349}
6350
6351static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF,
6352 llvm::AtomicOrdering AO, LValue LVal,
6353 SourceLocation Loc) {
6354 if (LVal.isGlobalReg())
6355 return CGF.EmitLoadOfLValue(V: LVal, Loc);
6356 return CGF.EmitAtomicLoad(
6357 lvalue: LVal, loc: Loc, AO: llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: AO),
6358 IsVolatile: LVal.isVolatile());
6359}
6360
6361void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
6362 QualType RValTy, SourceLocation Loc) {
6363 switch (getEvaluationKind(T: LVal.getType())) {
6364 case TEK_Scalar:
6365 EmitStoreThroughLValue(Src: RValue::get(V: convertToScalarValue(
6366 CGF&: *this, Val: RVal, SrcType: RValTy, DestType: LVal.getType(), Loc)),
6367 Dst: LVal);
6368 break;
6369 case TEK_Complex:
6370 EmitStoreOfComplex(
6371 V: convertToComplexValue(CGF&: *this, Val: RVal, SrcType: RValTy, DestType: LVal.getType(), Loc), dest: LVal,
6372 /*isInit=*/false);
6373 break;
6374 case TEK_Aggregate:
6375 llvm_unreachable("Must be a scalar or complex.");
6376 }
6377}
6378
6379static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6380 const Expr *X, const Expr *V,
6381 SourceLocation Loc) {
6382 // v = x;
6383 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
6384 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
6385 LValue XLValue = CGF.EmitLValue(E: X);
6386 LValue VLValue = CGF.EmitLValue(E: V);
6387 RValue Res = emitSimpleAtomicLoad(CGF, AO, LVal: XLValue, Loc);
6388 // OpenMP, 2.17.7, atomic Construct
6389 // If the read or capture clause is specified and the acquire, acq_rel, or
6390 // seq_cst clause is specified then the strong flush on exit from the atomic
6391 // operation is also an acquire flush.
6392 switch (AO) {
6393 case llvm::AtomicOrdering::Acquire:
6394 case llvm::AtomicOrdering::AcquireRelease:
6395 case llvm::AtomicOrdering::SequentiallyConsistent:
6396 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
6397 AO: llvm::AtomicOrdering::Acquire);
6398 break;
6399 case llvm::AtomicOrdering::Monotonic:
6400 case llvm::AtomicOrdering::Release:
6401 break;
6402 case llvm::AtomicOrdering::NotAtomic:
6403 case llvm::AtomicOrdering::Unordered:
6404 llvm_unreachable("Unexpected ordering.");
6405 }
6406 CGF.emitOMPSimpleStore(LVal: VLValue, RVal: Res, RValTy: X->getType().getNonReferenceType(), Loc);
6407 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: V);
6408}
6409
6410static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
6411 llvm::AtomicOrdering AO, const Expr *X,
6412 const Expr *E, SourceLocation Loc) {
6413 // x = expr;
6414 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
6415 emitSimpleAtomicStore(CGF, AO, LVal: CGF.EmitLValue(E: X), RVal: CGF.EmitAnyExpr(E));
6416 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6417 // OpenMP, 2.17.7, atomic Construct
6418 // If the write, update, or capture clause is specified and the release,
6419 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6420 // the atomic operation is also a release flush.
6421 switch (AO) {
6422 case llvm::AtomicOrdering::Release:
6423 case llvm::AtomicOrdering::AcquireRelease:
6424 case llvm::AtomicOrdering::SequentiallyConsistent:
6425 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
6426 AO: llvm::AtomicOrdering::Release);
6427 break;
6428 case llvm::AtomicOrdering::Acquire:
6429 case llvm::AtomicOrdering::Monotonic:
6430 break;
6431 case llvm::AtomicOrdering::NotAtomic:
6432 case llvm::AtomicOrdering::Unordered:
6433 llvm_unreachable("Unexpected ordering.");
6434 }
6435}
6436
6437static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
6438 RValue Update,
6439 BinaryOperatorKind BO,
6440 llvm::AtomicOrdering AO,
6441 bool IsXLHSInRHSPart) {
6442 ASTContext &Context = CGF.getContext();
6443 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
6444 // expression is simple and atomic is allowed for the given type for the
6445 // target platform.
6446 if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() ||
6447 (!isa<llvm::ConstantInt>(Val: Update.getScalarVal()) &&
6448 (Update.getScalarVal()->getType() != X.getAddress().getElementType())) ||
6449 !Context.getTargetInfo().hasBuiltinAtomic(
6450 AtomicSizeInBits: Context.getTypeSize(T: X.getType()), AlignmentInBits: Context.toBits(CharSize: X.getAlignment())))
6451 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6452
6453 auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) {
6454 if (T->isIntegerTy())
6455 return true;
6456
6457 if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub))
6458 return llvm::isPowerOf2_64(Value: CGF.CGM.getDataLayout().getTypeStoreSize(Ty: T));
6459
6460 return false;
6461 };
6462
6463 if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) ||
6464 !CheckAtomicSupport(X.getAddress().getElementType(), BO))
6465 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6466
6467 bool IsInteger = X.getAddress().getElementType()->isIntegerTy();
6468 llvm::AtomicRMWInst::BinOp RMWOp;
6469 switch (BO) {
6470 case BO_Add:
6471 RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd;
6472 break;
6473 case BO_Sub:
6474 if (!IsXLHSInRHSPart)
6475 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6476 RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub;
6477 break;
6478 case BO_And:
6479 RMWOp = llvm::AtomicRMWInst::And;
6480 break;
6481 case BO_Or:
6482 RMWOp = llvm::AtomicRMWInst::Or;
6483 break;
6484 case BO_Xor:
6485 RMWOp = llvm::AtomicRMWInst::Xor;
6486 break;
6487 case BO_LT:
6488 if (IsInteger)
6489 RMWOp = X.getType()->hasSignedIntegerRepresentation()
6490 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
6491 : llvm::AtomicRMWInst::Max)
6492 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
6493 : llvm::AtomicRMWInst::UMax);
6494 else
6495 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMin
6496 : llvm::AtomicRMWInst::FMax;
6497 break;
6498 case BO_GT:
6499 if (IsInteger)
6500 RMWOp = X.getType()->hasSignedIntegerRepresentation()
6501 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
6502 : llvm::AtomicRMWInst::Min)
6503 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
6504 : llvm::AtomicRMWInst::UMin);
6505 else
6506 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMax
6507 : llvm::AtomicRMWInst::FMin;
6508 break;
6509 case BO_Assign:
6510 RMWOp = llvm::AtomicRMWInst::Xchg;
6511 break;
6512 case BO_Mul:
6513 case BO_Div:
6514 case BO_Rem:
6515 case BO_Shl:
6516 case BO_Shr:
6517 case BO_LAnd:
6518 case BO_LOr:
6519 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6520 case BO_PtrMemD:
6521 case BO_PtrMemI:
6522 case BO_LE:
6523 case BO_GE:
6524 case BO_EQ:
6525 case BO_NE:
6526 case BO_Cmp:
6527 case BO_AddAssign:
6528 case BO_SubAssign:
6529 case BO_AndAssign:
6530 case BO_OrAssign:
6531 case BO_XorAssign:
6532 case BO_MulAssign:
6533 case BO_DivAssign:
6534 case BO_RemAssign:
6535 case BO_ShlAssign:
6536 case BO_ShrAssign:
6537 case BO_Comma:
6538 llvm_unreachable("Unsupported atomic update operation");
6539 }
6540 llvm::Value *UpdateVal = Update.getScalarVal();
6541 if (auto *IC = dyn_cast<llvm::ConstantInt>(Val: UpdateVal)) {
6542 if (IsInteger)
6543 UpdateVal = CGF.Builder.CreateIntCast(
6544 V: IC, DestTy: X.getAddress().getElementType(),
6545 isSigned: X.getType()->hasSignedIntegerRepresentation());
6546 else
6547 UpdateVal = CGF.Builder.CreateCast(Op: llvm::Instruction::CastOps::UIToFP, V: IC,
6548 DestTy: X.getAddress().getElementType());
6549 }
6550 llvm::AtomicRMWInst *Res =
6551 CGF.emitAtomicRMWInst(Op: RMWOp, Addr: X.getAddress(), Val: UpdateVal, Order: AO);
6552 return std::make_pair(x: true, y: RValue::get(V: Res));
6553}
6554
6555std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
6556 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
6557 llvm::AtomicOrdering AO, SourceLocation Loc,
6558 const llvm::function_ref<RValue(RValue)> CommonGen) {
6559 // Update expressions are allowed to have the following forms:
6560 // x binop= expr; -> xrval + expr;
6561 // x++, ++x -> xrval + 1;
6562 // x--, --x -> xrval - 1;
6563 // x = x binop expr; -> xrval binop expr
6564 // x = expr Op x; - > expr binop xrval;
6565 auto Res = emitOMPAtomicRMW(CGF&: *this, X, Update: E, BO, AO, IsXLHSInRHSPart);
6566 if (!Res.first) {
6567 if (X.isGlobalReg()) {
6568 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
6569 // 'xrval'.
6570 EmitStoreThroughLValue(Src: CommonGen(EmitLoadOfLValue(V: X, Loc)), Dst: X);
6571 } else {
6572 // Perform compare-and-swap procedure.
6573 EmitAtomicUpdate(LVal: X, AO, UpdateOp: CommonGen, IsVolatile: X.getType().isVolatileQualified());
6574 }
6575 }
6576 return Res;
6577}
6578
6579static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF,
6580 llvm::AtomicOrdering AO, const Expr *X,
6581 const Expr *E, const Expr *UE,
6582 bool IsXLHSInRHSPart, SourceLocation Loc) {
6583 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6584 "Update expr in 'atomic update' must be a binary operator.");
6585 const auto *BOUE = cast<BinaryOperator>(Val: UE->IgnoreImpCasts());
6586 // Update expressions are allowed to have the following forms:
6587 // x binop= expr; -> xrval + expr;
6588 // x++, ++x -> xrval + 1;
6589 // x--, --x -> xrval - 1;
6590 // x = x binop expr; -> xrval binop expr
6591 // x = expr Op x; - > expr binop xrval;
6592 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
6593 LValue XLValue = CGF.EmitLValue(E: X);
6594 RValue ExprRValue = CGF.EmitAnyExpr(E);
6595 const auto *LHS = cast<OpaqueValueExpr>(Val: BOUE->getLHS()->IgnoreImpCasts());
6596 const auto *RHS = cast<OpaqueValueExpr>(Val: BOUE->getRHS()->IgnoreImpCasts());
6597 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6598 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6599 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
6600 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6601 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6602 return CGF.EmitAnyExpr(E: UE);
6603 };
6604 (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
6605 X: XLValue, E: ExprRValue, BO: BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, CommonGen: Gen);
6606 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6607 // OpenMP, 2.17.7, atomic Construct
6608 // If the write, update, or capture clause is specified and the release,
6609 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6610 // the atomic operation is also a release flush.
6611 switch (AO) {
6612 case llvm::AtomicOrdering::Release:
6613 case llvm::AtomicOrdering::AcquireRelease:
6614 case llvm::AtomicOrdering::SequentiallyConsistent:
6615 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
6616 AO: llvm::AtomicOrdering::Release);
6617 break;
6618 case llvm::AtomicOrdering::Acquire:
6619 case llvm::AtomicOrdering::Monotonic:
6620 break;
6621 case llvm::AtomicOrdering::NotAtomic:
6622 case llvm::AtomicOrdering::Unordered:
6623 llvm_unreachable("Unexpected ordering.");
6624 }
6625}
6626
6627static RValue convertToType(CodeGenFunction &CGF, RValue Value,
6628 QualType SourceType, QualType ResType,
6629 SourceLocation Loc) {
6630 switch (CGF.getEvaluationKind(T: ResType)) {
6631 case TEK_Scalar:
6632 return RValue::get(
6633 V: convertToScalarValue(CGF, Val: Value, SrcType: SourceType, DestType: ResType, Loc));
6634 case TEK_Complex: {
6635 auto Res = convertToComplexValue(CGF, Val: Value, SrcType: SourceType, DestType: ResType, Loc);
6636 return RValue::getComplex(V1: Res.first, V2: Res.second);
6637 }
6638 case TEK_Aggregate:
6639 break;
6640 }
6641 llvm_unreachable("Must be a scalar or complex.");
6642}
6643
6644static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
6645 llvm::AtomicOrdering AO,
6646 bool IsPostfixUpdate, const Expr *V,
6647 const Expr *X, const Expr *E,
6648 const Expr *UE, bool IsXLHSInRHSPart,
6649 SourceLocation Loc) {
6650 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
6651 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
6652 RValue NewVVal;
6653 LValue VLValue = CGF.EmitLValue(E: V);
6654 LValue XLValue = CGF.EmitLValue(E: X);
6655 RValue ExprRValue = CGF.EmitAnyExpr(E);
6656 QualType NewVValType;
6657 if (UE) {
6658 // 'x' is updated with some additional value.
6659 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6660 "Update expr in 'atomic capture' must be a binary operator.");
6661 const auto *BOUE = cast<BinaryOperator>(Val: UE->IgnoreImpCasts());
6662 // Update expressions are allowed to have the following forms:
6663 // x binop= expr; -> xrval + expr;
6664 // x++, ++x -> xrval + 1;
6665 // x--, --x -> xrval - 1;
6666 // x = x binop expr; -> xrval binop expr
6667 // x = expr Op x; - > expr binop xrval;
6668 const auto *LHS = cast<OpaqueValueExpr>(Val: BOUE->getLHS()->IgnoreImpCasts());
6669 const auto *RHS = cast<OpaqueValueExpr>(Val: BOUE->getRHS()->IgnoreImpCasts());
6670 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6671 NewVValType = XRValExpr->getType();
6672 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6673 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
6674 IsPostfixUpdate](RValue XRValue) {
6675 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6676 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6677 RValue Res = CGF.EmitAnyExpr(E: UE);
6678 NewVVal = IsPostfixUpdate ? XRValue : Res;
6679 return Res;
6680 };
6681 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6682 X: XLValue, E: ExprRValue, BO: BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, CommonGen: Gen);
6683 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6684 if (Res.first) {
6685 // 'atomicrmw' instruction was generated.
6686 if (IsPostfixUpdate) {
6687 // Use old value from 'atomicrmw'.
6688 NewVVal = Res.second;
6689 } else {
6690 // 'atomicrmw' does not provide new value, so evaluate it using old
6691 // value of 'x'.
6692 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6693 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
6694 NewVVal = CGF.EmitAnyExpr(E: UE);
6695 }
6696 }
6697 } else {
6698 // 'x' is simply rewritten with some 'expr'.
6699 NewVValType = X->getType().getNonReferenceType();
6700 ExprRValue = convertToType(CGF, Value: ExprRValue, SourceType: E->getType(),
6701 ResType: X->getType().getNonReferenceType(), Loc);
6702 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
6703 NewVVal = XRValue;
6704 return ExprRValue;
6705 };
6706 // Try to perform atomicrmw xchg, otherwise simple exchange.
6707 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6708 X: XLValue, E: ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
6709 Loc, CommonGen: Gen);
6710 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6711 if (Res.first) {
6712 // 'atomicrmw' instruction was generated.
6713 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
6714 }
6715 }
6716 // Emit post-update store to 'v' of old/new 'x' value.
6717 CGF.emitOMPSimpleStore(LVal: VLValue, RVal: NewVVal, RValTy: NewVValType, Loc);
6718 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: V);
6719 // OpenMP 5.1 removes the required flush for capture clause.
6720 if (CGF.CGM.getLangOpts().OpenMP < 51) {
6721 // OpenMP, 2.17.7, atomic Construct
6722 // If the write, update, or capture clause is specified and the release,
6723 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6724 // the atomic operation is also a release flush.
6725 // If the read or capture clause is specified and the acquire, acq_rel, or
6726 // seq_cst clause is specified then the strong flush on exit from the atomic
6727 // operation is also an acquire flush.
6728 switch (AO) {
6729 case llvm::AtomicOrdering::Release:
6730 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
6731 AO: llvm::AtomicOrdering::Release);
6732 break;
6733 case llvm::AtomicOrdering::Acquire:
6734 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
6735 AO: llvm::AtomicOrdering::Acquire);
6736 break;
6737 case llvm::AtomicOrdering::AcquireRelease:
6738 case llvm::AtomicOrdering::SequentiallyConsistent:
6739 CGF.CGM.getOpenMPRuntime().emitFlush(
6740 CGF, Vars: {}, Loc, AO: llvm::AtomicOrdering::AcquireRelease);
6741 break;
6742 case llvm::AtomicOrdering::Monotonic:
6743 break;
6744 case llvm::AtomicOrdering::NotAtomic:
6745 case llvm::AtomicOrdering::Unordered:
6746 llvm_unreachable("Unexpected ordering.");
6747 }
6748 }
6749}
6750
6751static void emitOMPAtomicCompareExpr(
6752 CodeGenFunction &CGF, llvm::AtomicOrdering AO, llvm::AtomicOrdering FailAO,
6753 const Expr *X, const Expr *V, const Expr *R, const Expr *E, const Expr *D,
6754 const Expr *CE, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly,
6755 SourceLocation Loc) {
6756 llvm::OpenMPIRBuilder &OMPBuilder =
6757 CGF.CGM.getOpenMPRuntime().getOMPBuilder();
6758
6759 OMPAtomicCompareOp Op;
6760 assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator");
6761 switch (cast<BinaryOperator>(Val: CE)->getOpcode()) {
6762 case BO_EQ:
6763 Op = OMPAtomicCompareOp::EQ;
6764 break;
6765 case BO_LT:
6766 Op = OMPAtomicCompareOp::MIN;
6767 break;
6768 case BO_GT:
6769 Op = OMPAtomicCompareOp::MAX;
6770 break;
6771 default:
6772 llvm_unreachable("unsupported atomic compare binary operator");
6773 }
6774
6775 LValue XLVal = CGF.EmitLValue(E: X);
6776 Address XAddr = XLVal.getAddress();
6777
6778 auto EmitRValueWithCastIfNeeded = [&CGF, Loc](const Expr *X, const Expr *E) {
6779 if (X->getType() == E->getType())
6780 return CGF.EmitScalarExpr(E);
6781 const Expr *NewE = E->IgnoreImplicitAsWritten();
6782 llvm::Value *V = CGF.EmitScalarExpr(E: NewE);
6783 if (NewE->getType() == X->getType())
6784 return V;
6785 return CGF.EmitScalarConversion(Src: V, SrcTy: NewE->getType(), DstTy: X->getType(), Loc);
6786 };
6787
6788 llvm::Value *EVal = EmitRValueWithCastIfNeeded(X, E);
6789 llvm::Value *DVal = D ? EmitRValueWithCastIfNeeded(X, D) : nullptr;
6790 if (auto *CI = dyn_cast<llvm::ConstantInt>(Val: EVal))
6791 EVal = CGF.Builder.CreateIntCast(
6792 V: CI, DestTy: XLVal.getAddress().getElementType(),
6793 isSigned: E->getType()->hasSignedIntegerRepresentation());
6794 if (DVal)
6795 if (auto *CI = dyn_cast<llvm::ConstantInt>(Val: DVal))
6796 DVal = CGF.Builder.CreateIntCast(
6797 V: CI, DestTy: XLVal.getAddress().getElementType(),
6798 isSigned: D->getType()->hasSignedIntegerRepresentation());
6799
6800 llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{
6801 .Var: XAddr.emitRawPointer(CGF), .ElemTy: XAddr.getElementType(),
6802 .IsSigned: X->getType()->hasSignedIntegerRepresentation(),
6803 .IsVolatile: X->getType().isVolatileQualified()};
6804 llvm::OpenMPIRBuilder::AtomicOpValue VOpVal, ROpVal;
6805 if (V) {
6806 LValue LV = CGF.EmitLValue(E: V);
6807 Address Addr = LV.getAddress();
6808 VOpVal = {.Var: Addr.emitRawPointer(CGF), .ElemTy: Addr.getElementType(),
6809 .IsSigned: V->getType()->hasSignedIntegerRepresentation(),
6810 .IsVolatile: V->getType().isVolatileQualified()};
6811 }
6812 if (R) {
6813 LValue LV = CGF.EmitLValue(E: R);
6814 Address Addr = LV.getAddress();
6815 ROpVal = {.Var: Addr.emitRawPointer(CGF), .ElemTy: Addr.getElementType(),
6816 .IsSigned: R->getType()->hasSignedIntegerRepresentation(),
6817 .IsVolatile: R->getType().isVolatileQualified()};
6818 }
6819
6820 if (FailAO == llvm::AtomicOrdering::NotAtomic) {
6821 // fail clause was not mentioned on the
6822 // "#pragma omp atomic compare" construct.
6823 CGF.Builder.restoreIP(IP: OMPBuilder.createAtomicCompare(
6824 Loc: CGF.Builder, X&: XOpVal, V&: VOpVal, R&: ROpVal, E: EVal, D: DVal, AO, Op, IsXBinopExpr,
6825 IsPostfixUpdate, IsFailOnly));
6826 } else
6827 CGF.Builder.restoreIP(IP: OMPBuilder.createAtomicCompare(
6828 Loc: CGF.Builder, X&: XOpVal, V&: VOpVal, R&: ROpVal, E: EVal, D: DVal, AO, Op, IsXBinopExpr,
6829 IsPostfixUpdate, IsFailOnly, Failure: FailAO));
6830}
6831
6832static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
6833 llvm::AtomicOrdering AO,
6834 llvm::AtomicOrdering FailAO, bool IsPostfixUpdate,
6835 const Expr *X, const Expr *V, const Expr *R,
6836 const Expr *E, const Expr *UE, const Expr *D,
6837 const Expr *CE, bool IsXLHSInRHSPart,
6838 bool IsFailOnly, SourceLocation Loc) {
6839 switch (Kind) {
6840 case OMPC_read:
6841 emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);
6842 break;
6843 case OMPC_write:
6844 emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);
6845 break;
6846 case OMPC_unknown:
6847 case OMPC_update:
6848 emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);
6849 break;
6850 case OMPC_capture:
6851 emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,
6852 IsXLHSInRHSPart, Loc);
6853 break;
6854 case OMPC_compare: {
6855 emitOMPAtomicCompareExpr(CGF, AO, FailAO, X, V, R, E, D, CE,
6856 IsXBinopExpr: IsXLHSInRHSPart, IsPostfixUpdate, IsFailOnly, Loc);
6857 break;
6858 }
6859 default:
6860 llvm_unreachable("Clause is not allowed in 'omp atomic'.");
6861 }
6862}
6863
6864void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
6865 llvm::AtomicOrdering AO = CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
6866 // Fail Memory Clause Ordering.
6867 llvm::AtomicOrdering FailAO = llvm::AtomicOrdering::NotAtomic;
6868 bool MemOrderingSpecified = false;
6869 if (S.getSingleClause<OMPSeqCstClause>()) {
6870 AO = llvm::AtomicOrdering::SequentiallyConsistent;
6871 MemOrderingSpecified = true;
6872 } else if (S.getSingleClause<OMPAcqRelClause>()) {
6873 AO = llvm::AtomicOrdering::AcquireRelease;
6874 MemOrderingSpecified = true;
6875 } else if (S.getSingleClause<OMPAcquireClause>()) {
6876 AO = llvm::AtomicOrdering::Acquire;
6877 MemOrderingSpecified = true;
6878 } else if (S.getSingleClause<OMPReleaseClause>()) {
6879 AO = llvm::AtomicOrdering::Release;
6880 MemOrderingSpecified = true;
6881 } else if (S.getSingleClause<OMPRelaxedClause>()) {
6882 AO = llvm::AtomicOrdering::Monotonic;
6883 MemOrderingSpecified = true;
6884 }
6885 llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered;
6886 OpenMPClauseKind Kind = OMPC_unknown;
6887 for (const OMPClause *C : S.clauses()) {
6888 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
6889 // if it is first).
6890 OpenMPClauseKind K = C->getClauseKind();
6891 // TBD
6892 if (K == OMPC_weak)
6893 return;
6894 if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire ||
6895 K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint)
6896 continue;
6897 Kind = K;
6898 KindsEncountered.insert(V: K);
6899 }
6900 // We just need to correct Kind here. No need to set a bool saying it is
6901 // actually compare capture because we can tell from whether V and R are
6902 // nullptr.
6903 if (KindsEncountered.contains(V: OMPC_compare) &&
6904 KindsEncountered.contains(V: OMPC_capture))
6905 Kind = OMPC_compare;
6906 if (!MemOrderingSpecified) {
6907 llvm::AtomicOrdering DefaultOrder =
6908 CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
6909 if (DefaultOrder == llvm::AtomicOrdering::Monotonic ||
6910 DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent ||
6911 (DefaultOrder == llvm::AtomicOrdering::AcquireRelease &&
6912 Kind == OMPC_capture)) {
6913 AO = DefaultOrder;
6914 } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) {
6915 if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) {
6916 AO = llvm::AtomicOrdering::Release;
6917 } else if (Kind == OMPC_read) {
6918 assert(Kind == OMPC_read && "Unexpected atomic kind.");
6919 AO = llvm::AtomicOrdering::Acquire;
6920 }
6921 }
6922 }
6923
6924 if (KindsEncountered.contains(V: OMPC_compare) &&
6925 KindsEncountered.contains(V: OMPC_fail)) {
6926 Kind = OMPC_compare;
6927 const auto *FailClause = S.getSingleClause<OMPFailClause>();
6928 if (FailClause) {
6929 OpenMPClauseKind FailParameter = FailClause->getFailParameter();
6930 if (FailParameter == llvm::omp::OMPC_relaxed)
6931 FailAO = llvm::AtomicOrdering::Monotonic;
6932 else if (FailParameter == llvm::omp::OMPC_acquire)
6933 FailAO = llvm::AtomicOrdering::Acquire;
6934 else if (FailParameter == llvm::omp::OMPC_seq_cst)
6935 FailAO = llvm::AtomicOrdering::SequentiallyConsistent;
6936 }
6937 }
6938
6939 LexicalScope Scope(*this, S.getSourceRange());
6940 EmitStopPoint(S: S.getAssociatedStmt());
6941 emitOMPAtomicExpr(CGF&: *this, Kind, AO, FailAO, IsPostfixUpdate: S.isPostfixUpdate(), X: S.getX(),
6942 V: S.getV(), R: S.getR(), E: S.getExpr(), UE: S.getUpdateExpr(),
6943 D: S.getD(), CE: S.getCondExpr(), IsXLHSInRHSPart: S.isXLHSInRHSPart(),
6944 IsFailOnly: S.isFailOnly(), Loc: S.getBeginLoc());
6945}
6946
6947static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
6948 const OMPExecutableDirective &S,
6949 const RegionCodeGenTy &CodeGen) {
6950 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
6951 CodeGenModule &CGM = CGF.CGM;
6952
6953 // On device emit this construct as inlined code.
6954 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
6955 OMPLexicalScope Scope(CGF, S, OMPD_target);
6956 CGM.getOpenMPRuntime().emitInlinedDirective(
6957 CGF, InnermostKind: OMPD_target, CodeGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6958 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
6959 });
6960 return;
6961 }
6962
6963 auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
6964 llvm::Function *Fn = nullptr;
6965 llvm::Constant *FnID = nullptr;
6966
6967 const Expr *IfCond = nullptr;
6968 // Check for the at most one if clause associated with the target region.
6969 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
6970 if (C->getNameModifier() == OMPD_unknown ||
6971 C->getNameModifier() == OMPD_target) {
6972 IfCond = C->getCondition();
6973 break;
6974 }
6975 }
6976
6977 // Check if we have any device clause associated with the directive.
6978 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device(
6979 nullptr, OMPC_DEVICE_unknown);
6980 if (auto *C = S.getSingleClause<OMPDeviceClause>())
6981 Device.setPointerAndInt(PtrVal: C->getDevice(), IntVal: C->getModifier());
6982
6983 // Check if we have an if clause whose conditional always evaluates to false
6984 // or if we do not have any targets specified. If so the target region is not
6985 // an offload entry point.
6986 bool IsOffloadEntry = true;
6987 if (IfCond) {
6988 bool Val;
6989 if (CGF.ConstantFoldsToSimpleInteger(Cond: IfCond, Result&: Val) && !Val)
6990 IsOffloadEntry = false;
6991 }
6992 if (CGM.getLangOpts().OMPTargetTriples.empty())
6993 IsOffloadEntry = false;
6994
6995 if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) {
6996 CGM.getDiags().Report(DiagID: diag::err_missing_mandatory_offloading);
6997 }
6998
6999 assert(CGF.CurFuncDecl && "No parent declaration for target region!");
7000 StringRef ParentName;
7001 // In case we have Ctors/Dtors we use the complete type variant to produce
7002 // the mangling of the device outlined kernel.
7003 if (const auto *D = dyn_cast<CXXConstructorDecl>(Val: CGF.CurFuncDecl))
7004 ParentName = CGM.getMangledName(GD: GlobalDecl(D, Ctor_Complete));
7005 else if (const auto *D = dyn_cast<CXXDestructorDecl>(Val: CGF.CurFuncDecl))
7006 ParentName = CGM.getMangledName(GD: GlobalDecl(D, Dtor_Complete));
7007 else
7008 ParentName =
7009 CGM.getMangledName(GD: GlobalDecl(cast<FunctionDecl>(Val: CGF.CurFuncDecl)));
7010
7011 // Emit target region as a standalone region.
7012 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: FnID,
7013 IsOffloadEntry, CodeGen);
7014 OMPLexicalScope Scope(CGF, S, OMPD_task);
7015 auto &&SizeEmitter =
7016 [IsOffloadEntry](CodeGenFunction &CGF,
7017 const OMPLoopDirective &D) -> llvm::Value * {
7018 if (IsOffloadEntry) {
7019 OMPLoopScope(CGF, D);
7020 // Emit calculation of the iterations count.
7021 llvm::Value *NumIterations = CGF.EmitScalarExpr(E: D.getNumIterations());
7022 NumIterations = CGF.Builder.CreateIntCast(V: NumIterations, DestTy: CGF.Int64Ty,
7023 /*isSigned=*/false);
7024 return NumIterations;
7025 }
7026 return nullptr;
7027 };
7028 CGM.getOpenMPRuntime().emitTargetCall(CGF, D: S, OutlinedFn: Fn, OutlinedFnID: FnID, IfCond, Device,
7029 SizeEmitter);
7030}
7031
7032static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
7033 PrePostActionTy &Action) {
7034 Action.Enter(CGF);
7035 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7036 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
7037 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
7038 (void)PrivateScope.Privatize();
7039 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
7040 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
7041
7042 CGF.EmitStmt(S: S.getCapturedStmt(RegionKind: OMPD_target)->getCapturedStmt());
7043 CGF.EnsureInsertPoint();
7044}
7045
7046void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
7047 StringRef ParentName,
7048 const OMPTargetDirective &S) {
7049 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7050 emitTargetRegion(CGF, S, Action);
7051 };
7052 llvm::Function *Fn;
7053 llvm::Constant *Addr;
7054 // Emit target region as a standalone region.
7055 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7056 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7057 assert(Fn && Addr && "Target device function emission failed.");
7058}
7059
7060void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
7061 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7062 emitTargetRegion(CGF, S, Action);
7063 };
7064 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7065}
7066
7067static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
7068 const OMPExecutableDirective &S,
7069 OpenMPDirectiveKind InnermostKind,
7070 const RegionCodeGenTy &CodeGen) {
7071 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_teams);
7072 llvm::Function *OutlinedFn =
7073 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
7074 CGF, D: S, ThreadIDVar: *CS->getCapturedDecl()->param_begin(), InnermostKind,
7075 CodeGen);
7076
7077 const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
7078 const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
7079 if (NT || TL) {
7080 const Expr *NumTeams = NT ? NT->getNumTeams().front() : nullptr;
7081 const Expr *ThreadLimit = TL ? TL->getThreadLimit().front() : nullptr;
7082
7083 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
7084 Loc: S.getBeginLoc());
7085 }
7086
7087 OMPTeamsScope Scope(CGF, S);
7088 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
7089 CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
7090 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, D: S, Loc: S.getBeginLoc(), OutlinedFn,
7091 CapturedVars);
7092}
7093
7094void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
7095 // Emit teams region as a standalone region.
7096 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7097 Action.Enter(CGF);
7098 OMPPrivateScope PrivateScope(CGF);
7099 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
7100 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
7101 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7102 (void)PrivateScope.Privatize();
7103 CGF.EmitStmt(S: S.getCapturedStmt(RegionKind: OMPD_teams)->getCapturedStmt());
7104 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7105 };
7106 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute, CodeGen);
7107 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7108 CondGen: [](CodeGenFunction &) { return nullptr; });
7109}
7110
7111static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
7112 const OMPTargetTeamsDirective &S) {
7113 auto *CS = S.getCapturedStmt(RegionKind: OMPD_teams);
7114 Action.Enter(CGF);
7115 // Emit teams region as a standalone region.
7116 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
7117 Action.Enter(CGF);
7118 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7119 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
7120 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
7121 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7122 (void)PrivateScope.Privatize();
7123 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
7124 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
7125 CGF.EmitStmt(S: CS->getCapturedStmt());
7126 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7127 };
7128 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_teams, CodeGen);
7129 emitPostUpdateForReductionClause(CGF, D: S,
7130 CondGen: [](CodeGenFunction &) { return nullptr; });
7131}
7132
7133void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
7134 CodeGenModule &CGM, StringRef ParentName,
7135 const OMPTargetTeamsDirective &S) {
7136 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7137 emitTargetTeamsRegion(CGF, Action, S);
7138 };
7139 llvm::Function *Fn;
7140 llvm::Constant *Addr;
7141 // Emit target region as a standalone region.
7142 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7143 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7144 assert(Fn && Addr && "Target device function emission failed.");
7145}
7146
7147void CodeGenFunction::EmitOMPTargetTeamsDirective(
7148 const OMPTargetTeamsDirective &S) {
7149 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7150 emitTargetTeamsRegion(CGF, Action, S);
7151 };
7152 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7153}
7154
7155static void
7156emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
7157 const OMPTargetTeamsDistributeDirective &S) {
7158 Action.Enter(CGF);
7159 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7160 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
7161 };
7162
7163 // Emit teams region as a standalone region.
7164 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7165 PrePostActionTy &Action) {
7166 Action.Enter(CGF);
7167 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7168 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7169 (void)PrivateScope.Privatize();
7170 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
7171 CodeGen: CodeGenDistribute);
7172 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7173 };
7174 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute, CodeGen);
7175 emitPostUpdateForReductionClause(CGF, D: S,
7176 CondGen: [](CodeGenFunction &) { return nullptr; });
7177}
7178
7179void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
7180 CodeGenModule &CGM, StringRef ParentName,
7181 const OMPTargetTeamsDistributeDirective &S) {
7182 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7183 emitTargetTeamsDistributeRegion(CGF, Action, S);
7184 };
7185 llvm::Function *Fn;
7186 llvm::Constant *Addr;
7187 // Emit target region as a standalone region.
7188 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7189 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7190 assert(Fn && Addr && "Target device function emission failed.");
7191}
7192
7193void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
7194 const OMPTargetTeamsDistributeDirective &S) {
7195 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7196 emitTargetTeamsDistributeRegion(CGF, Action, S);
7197 };
7198 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7199}
7200
7201static void emitTargetTeamsDistributeSimdRegion(
7202 CodeGenFunction &CGF, PrePostActionTy &Action,
7203 const OMPTargetTeamsDistributeSimdDirective &S) {
7204 Action.Enter(CGF);
7205 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7206 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
7207 };
7208
7209 // Emit teams region as a standalone region.
7210 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7211 PrePostActionTy &Action) {
7212 Action.Enter(CGF);
7213 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7214 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7215 (void)PrivateScope.Privatize();
7216 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
7217 CodeGen: CodeGenDistribute);
7218 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7219 };
7220 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_simd, CodeGen);
7221 emitPostUpdateForReductionClause(CGF, D: S,
7222 CondGen: [](CodeGenFunction &) { return nullptr; });
7223}
7224
7225void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
7226 CodeGenModule &CGM, StringRef ParentName,
7227 const OMPTargetTeamsDistributeSimdDirective &S) {
7228 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7229 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
7230 };
7231 llvm::Function *Fn;
7232 llvm::Constant *Addr;
7233 // Emit target region as a standalone region.
7234 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7235 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7236 assert(Fn && Addr && "Target device function emission failed.");
7237}
7238
7239void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
7240 const OMPTargetTeamsDistributeSimdDirective &S) {
7241 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7242 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
7243 };
7244 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7245}
7246
7247void CodeGenFunction::EmitOMPTeamsDistributeDirective(
7248 const OMPTeamsDistributeDirective &S) {
7249
7250 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7251 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
7252 };
7253
7254 // Emit teams region as a standalone region.
7255 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7256 PrePostActionTy &Action) {
7257 Action.Enter(CGF);
7258 OMPPrivateScope PrivateScope(CGF);
7259 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7260 (void)PrivateScope.Privatize();
7261 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
7262 CodeGen: CodeGenDistribute);
7263 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7264 };
7265 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute, CodeGen);
7266 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7267 CondGen: [](CodeGenFunction &) { return nullptr; });
7268}
7269
7270void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
7271 const OMPTeamsDistributeSimdDirective &S) {
7272 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7273 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
7274 };
7275
7276 // Emit teams region as a standalone region.
7277 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7278 PrePostActionTy &Action) {
7279 Action.Enter(CGF);
7280 OMPPrivateScope PrivateScope(CGF);
7281 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7282 (void)PrivateScope.Privatize();
7283 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_simd,
7284 CodeGen: CodeGenDistribute);
7285 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7286 };
7287 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute_simd, CodeGen);
7288 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7289 CondGen: [](CodeGenFunction &) { return nullptr; });
7290}
7291
7292void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
7293 const OMPTeamsDistributeParallelForDirective &S) {
7294 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7295 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7296 IncExpr: S.getDistInc());
7297 };
7298
7299 // Emit teams region as a standalone region.
7300 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7301 PrePostActionTy &Action) {
7302 Action.Enter(CGF);
7303 OMPPrivateScope PrivateScope(CGF);
7304 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7305 (void)PrivateScope.Privatize();
7306 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
7307 CodeGen: CodeGenDistribute);
7308 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7309 };
7310 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute_parallel_for, CodeGen);
7311 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7312 CondGen: [](CodeGenFunction &) { return nullptr; });
7313}
7314
7315void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
7316 const OMPTeamsDistributeParallelForSimdDirective &S) {
7317 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7318 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7319 IncExpr: S.getDistInc());
7320 };
7321
7322 // Emit teams region as a standalone region.
7323 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7324 PrePostActionTy &Action) {
7325 Action.Enter(CGF);
7326 OMPPrivateScope PrivateScope(CGF);
7327 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7328 (void)PrivateScope.Privatize();
7329 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7330 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
7331 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7332 };
7333 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute_parallel_for_simd,
7334 CodeGen);
7335 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7336 CondGen: [](CodeGenFunction &) { return nullptr; });
7337}
7338
7339void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) {
7340 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
7341 llvm::Value *Device = nullptr;
7342 llvm::Value *NumDependences = nullptr;
7343 llvm::Value *DependenceList = nullptr;
7344
7345 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7346 Device = EmitScalarExpr(E: C->getDevice());
7347
7348 // Build list and emit dependences
7349 OMPTaskDataTy Data;
7350 buildDependences(S, Data);
7351 if (!Data.Dependences.empty()) {
7352 Address DependenciesArray = Address::invalid();
7353 std::tie(args&: NumDependences, args&: DependenciesArray) =
7354 CGM.getOpenMPRuntime().emitDependClause(CGF&: *this, Dependencies: Data.Dependences,
7355 Loc: S.getBeginLoc());
7356 DependenceList = DependenciesArray.emitRawPointer(CGF&: *this);
7357 }
7358 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
7359
7360 assert(!(Data.HasNowaitClause && !(S.getSingleClause<OMPInitClause>() ||
7361 S.getSingleClause<OMPDestroyClause>() ||
7362 S.getSingleClause<OMPUseClause>())) &&
7363 "OMPNowaitClause clause is used separately in OMPInteropDirective.");
7364
7365 auto ItOMPInitClause = S.getClausesOfKind<OMPInitClause>();
7366 if (!ItOMPInitClause.empty()) {
7367 // Look at the multiple init clauses
7368 for (const OMPInitClause *C : ItOMPInitClause) {
7369 llvm::Value *InteropvarPtr =
7370 EmitLValue(E: C->getInteropVar()).getPointer(CGF&: *this);
7371 llvm::omp::OMPInteropType InteropType =
7372 llvm::omp::OMPInteropType::Unknown;
7373 if (C->getIsTarget()) {
7374 InteropType = llvm::omp::OMPInteropType::Target;
7375 } else {
7376 assert(C->getIsTargetSync() &&
7377 "Expected interop-type target/targetsync");
7378 InteropType = llvm::omp::OMPInteropType::TargetSync;
7379 }
7380 OMPBuilder.createOMPInteropInit(Loc: Builder, InteropVar: InteropvarPtr, InteropType,
7381 Device, NumDependences, DependenceAddress: DependenceList,
7382 HaveNowaitClause: Data.HasNowaitClause);
7383 }
7384 }
7385 auto ItOMPDestroyClause = S.getClausesOfKind<OMPDestroyClause>();
7386 if (!ItOMPDestroyClause.empty()) {
7387 // Look at the multiple destroy clauses
7388 for (const OMPDestroyClause *C : ItOMPDestroyClause) {
7389 llvm::Value *InteropvarPtr =
7390 EmitLValue(E: C->getInteropVar()).getPointer(CGF&: *this);
7391 OMPBuilder.createOMPInteropDestroy(Loc: Builder, InteropVar: InteropvarPtr, Device,
7392 NumDependences, DependenceAddress: DependenceList,
7393 HaveNowaitClause: Data.HasNowaitClause);
7394 }
7395 }
7396 auto ItOMPUseClause = S.getClausesOfKind<OMPUseClause>();
7397 if (!ItOMPUseClause.empty()) {
7398 // Look at the multiple use clauses
7399 for (const OMPUseClause *C : ItOMPUseClause) {
7400 llvm::Value *InteropvarPtr =
7401 EmitLValue(E: C->getInteropVar()).getPointer(CGF&: *this);
7402 OMPBuilder.createOMPInteropUse(Loc: Builder, InteropVar: InteropvarPtr, Device,
7403 NumDependences, DependenceAddress: DependenceList,
7404 HaveNowaitClause: Data.HasNowaitClause);
7405 }
7406 }
7407}
7408
7409static void emitTargetTeamsDistributeParallelForRegion(
7410 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
7411 PrePostActionTy &Action) {
7412 Action.Enter(CGF);
7413 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7414 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7415 IncExpr: S.getDistInc());
7416 };
7417
7418 // Emit teams region as a standalone region.
7419 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7420 PrePostActionTy &Action) {
7421 Action.Enter(CGF);
7422 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7423 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7424 (void)PrivateScope.Privatize();
7425 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7426 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
7427 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7428 };
7429
7430 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_parallel_for,
7431 CodeGen: CodeGenTeams);
7432 emitPostUpdateForReductionClause(CGF, D: S,
7433 CondGen: [](CodeGenFunction &) { return nullptr; });
7434}
7435
7436void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
7437 CodeGenModule &CGM, StringRef ParentName,
7438 const OMPTargetTeamsDistributeParallelForDirective &S) {
7439 // Emit SPMD target teams distribute parallel for region as a standalone
7440 // region.
7441 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7442 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7443 };
7444 llvm::Function *Fn;
7445 llvm::Constant *Addr;
7446 // Emit target region as a standalone region.
7447 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7448 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7449 assert(Fn && Addr && "Target device function emission failed.");
7450}
7451
7452void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
7453 const OMPTargetTeamsDistributeParallelForDirective &S) {
7454 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7455 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7456 };
7457 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7458}
7459
7460static void emitTargetTeamsDistributeParallelForSimdRegion(
7461 CodeGenFunction &CGF,
7462 const OMPTargetTeamsDistributeParallelForSimdDirective &S,
7463 PrePostActionTy &Action) {
7464 Action.Enter(CGF);
7465 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7466 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7467 IncExpr: S.getDistInc());
7468 };
7469
7470 // Emit teams region as a standalone region.
7471 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7472 PrePostActionTy &Action) {
7473 Action.Enter(CGF);
7474 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7475 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7476 (void)PrivateScope.Privatize();
7477 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7478 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
7479 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7480 };
7481
7482 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_parallel_for_simd,
7483 CodeGen: CodeGenTeams);
7484 emitPostUpdateForReductionClause(CGF, D: S,
7485 CondGen: [](CodeGenFunction &) { return nullptr; });
7486}
7487
7488void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
7489 CodeGenModule &CGM, StringRef ParentName,
7490 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7491 // Emit SPMD target teams distribute parallel for simd region as a standalone
7492 // region.
7493 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7494 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7495 };
7496 llvm::Function *Fn;
7497 llvm::Constant *Addr;
7498 // Emit target region as a standalone region.
7499 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7500 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7501 assert(Fn && Addr && "Target device function emission failed.");
7502}
7503
7504void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
7505 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7506 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7507 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7508 };
7509 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7510}
7511
7512void CodeGenFunction::EmitOMPCancellationPointDirective(
7513 const OMPCancellationPointDirective &S) {
7514 CGM.getOpenMPRuntime().emitCancellationPointCall(CGF&: *this, Loc: S.getBeginLoc(),
7515 CancelRegion: S.getCancelRegion());
7516}
7517
7518void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
7519 const Expr *IfCond = nullptr;
7520 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7521 if (C->getNameModifier() == OMPD_unknown ||
7522 C->getNameModifier() == OMPD_cancel) {
7523 IfCond = C->getCondition();
7524 break;
7525 }
7526 }
7527 if (CGM.getLangOpts().OpenMPIRBuilder) {
7528 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
7529 // TODO: This check is necessary as we only generate `omp parallel` through
7530 // the OpenMPIRBuilder for now.
7531 if (S.getCancelRegion() == OMPD_parallel ||
7532 S.getCancelRegion() == OMPD_sections ||
7533 S.getCancelRegion() == OMPD_section) {
7534 llvm::Value *IfCondition = nullptr;
7535 if (IfCond)
7536 IfCondition = EmitScalarExpr(E: IfCond,
7537 /*IgnoreResultAssign=*/true);
7538 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
7539 ValOrErr: OMPBuilder.createCancel(Loc: Builder, IfCondition, CanceledDirective: S.getCancelRegion()));
7540 return Builder.restoreIP(IP: AfterIP);
7541 }
7542 }
7543
7544 CGM.getOpenMPRuntime().emitCancelCall(CGF&: *this, Loc: S.getBeginLoc(), IfCond,
7545 CancelRegion: S.getCancelRegion());
7546}
7547
7548CodeGenFunction::JumpDest
7549CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
7550 if (Kind == OMPD_parallel || Kind == OMPD_task ||
7551 Kind == OMPD_target_parallel || Kind == OMPD_taskloop ||
7552 Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop)
7553 return ReturnBlock;
7554 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
7555 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
7556 Kind == OMPD_distribute_parallel_for ||
7557 Kind == OMPD_target_parallel_for ||
7558 Kind == OMPD_teams_distribute_parallel_for ||
7559 Kind == OMPD_target_teams_distribute_parallel_for);
7560 return OMPCancelStack.getExitBlock();
7561}
7562
7563void CodeGenFunction::EmitOMPUseDevicePtrClause(
7564 const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
7565 const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7566 CaptureDeviceAddrMap) {
7567 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7568 for (const Expr *OrigVarIt : C.varlist()) {
7569 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: OrigVarIt)->getDecl());
7570 if (!Processed.insert(V: OrigVD).second)
7571 continue;
7572
7573 // In order to identify the right initializer we need to match the
7574 // declaration used by the mapping logic. In some cases we may get
7575 // OMPCapturedExprDecl that refers to the original declaration.
7576 const ValueDecl *MatchingVD = OrigVD;
7577 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: MatchingVD)) {
7578 // OMPCapturedExprDecl are used to privative fields of the current
7579 // structure.
7580 const auto *ME = cast<MemberExpr>(Val: OED->getInit());
7581 assert(isa<CXXThisExpr>(ME->getBase()->IgnoreImpCasts()) &&
7582 "Base should be the current struct!");
7583 MatchingVD = ME->getMemberDecl();
7584 }
7585
7586 // If we don't have information about the current list item, move on to
7587 // the next one.
7588 auto InitAddrIt = CaptureDeviceAddrMap.find(Val: MatchingVD);
7589 if (InitAddrIt == CaptureDeviceAddrMap.end())
7590 continue;
7591
7592 llvm::Type *Ty = ConvertTypeForMem(T: OrigVD->getType().getNonReferenceType());
7593
7594 // Return the address of the private variable.
7595 bool IsRegistered = PrivateScope.addPrivate(
7596 LocalVD: OrigVD,
7597 Addr: Address(InitAddrIt->second, Ty,
7598 getContext().getTypeAlignInChars(T: getContext().VoidPtrTy)));
7599 assert(IsRegistered && "firstprivate var already registered as private");
7600 // Silence the warning about unused variable.
7601 (void)IsRegistered;
7602 }
7603}
7604
7605static const VarDecl *getBaseDecl(const Expr *Ref) {
7606 const Expr *Base = Ref->IgnoreParenImpCasts();
7607 while (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: Base))
7608 Base = OASE->getBase()->IgnoreParenImpCasts();
7609 while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
7610 Base = ASE->getBase()->IgnoreParenImpCasts();
7611 return cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Base)->getDecl());
7612}
7613
7614void CodeGenFunction::EmitOMPUseDeviceAddrClause(
7615 const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
7616 const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7617 CaptureDeviceAddrMap) {
7618 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7619 for (const Expr *Ref : C.varlist()) {
7620 const VarDecl *OrigVD = getBaseDecl(Ref);
7621 if (!Processed.insert(V: OrigVD).second)
7622 continue;
7623 // In order to identify the right initializer we need to match the
7624 // declaration used by the mapping logic. In some cases we may get
7625 // OMPCapturedExprDecl that refers to the original declaration.
7626 const ValueDecl *MatchingVD = OrigVD;
7627 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: MatchingVD)) {
7628 // OMPCapturedExprDecl are used to privative fields of the current
7629 // structure.
7630 const auto *ME = cast<MemberExpr>(Val: OED->getInit());
7631 assert(isa<CXXThisExpr>(ME->getBase()) &&
7632 "Base should be the current struct!");
7633 MatchingVD = ME->getMemberDecl();
7634 }
7635
7636 // If we don't have information about the current list item, move on to
7637 // the next one.
7638 auto InitAddrIt = CaptureDeviceAddrMap.find(Val: MatchingVD);
7639 if (InitAddrIt == CaptureDeviceAddrMap.end())
7640 continue;
7641
7642 llvm::Type *Ty = ConvertTypeForMem(T: OrigVD->getType().getNonReferenceType());
7643
7644 Address PrivAddr =
7645 Address(InitAddrIt->second, Ty,
7646 getContext().getTypeAlignInChars(T: getContext().VoidPtrTy));
7647 // For declrefs and variable length array need to load the pointer for
7648 // correct mapping, since the pointer to the data was passed to the runtime.
7649 if (isa<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts()) ||
7650 MatchingVD->getType()->isArrayType()) {
7651 QualType PtrTy = getContext().getPointerType(
7652 T: OrigVD->getType().getNonReferenceType());
7653 PrivAddr =
7654 EmitLoadOfPointer(Ptr: PrivAddr.withElementType(ElemTy: ConvertTypeForMem(T: PtrTy)),
7655 PtrTy: PtrTy->castAs<PointerType>());
7656 }
7657
7658 (void)PrivateScope.addPrivate(LocalVD: OrigVD, Addr: PrivAddr);
7659 }
7660}
7661
7662// Generate the instructions for '#pragma omp target data' directive.
7663void CodeGenFunction::EmitOMPTargetDataDirective(
7664 const OMPTargetDataDirective &S) {
7665 // Emit vtable only from host for target data directive.
7666 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
7667 CGM.getOpenMPRuntime().registerVTable(D: S);
7668
7669 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
7670 /*SeparateBeginEndCalls=*/true);
7671
7672 // Create a pre/post action to signal the privatization of the device pointer.
7673 // This action can be replaced by the OpenMP runtime code generation to
7674 // deactivate privatization.
7675 bool PrivatizeDevicePointers = false;
7676 class DevicePointerPrivActionTy : public PrePostActionTy {
7677 bool &PrivatizeDevicePointers;
7678
7679 public:
7680 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
7681 : PrivatizeDevicePointers(PrivatizeDevicePointers) {}
7682 void Enter(CodeGenFunction &CGF) override {
7683 PrivatizeDevicePointers = true;
7684 }
7685 };
7686 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
7687
7688 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7689 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7690 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
7691 };
7692
7693 // Codegen that selects whether to generate the privatization code or not.
7694 auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7695 RegionCodeGenTy RCG(InnermostCodeGen);
7696 PrivatizeDevicePointers = false;
7697
7698 // Call the pre-action to change the status of PrivatizeDevicePointers if
7699 // needed.
7700 Action.Enter(CGF);
7701
7702 if (PrivatizeDevicePointers) {
7703 OMPPrivateScope PrivateScope(CGF);
7704 // Emit all instances of the use_device_ptr clause.
7705 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7706 CGF.EmitOMPUseDevicePtrClause(C: *C, PrivateScope,
7707 CaptureDeviceAddrMap: Info.CaptureDeviceAddrMap);
7708 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7709 CGF.EmitOMPUseDeviceAddrClause(C: *C, PrivateScope,
7710 CaptureDeviceAddrMap: Info.CaptureDeviceAddrMap);
7711 (void)PrivateScope.Privatize();
7712 RCG(CGF);
7713 } else {
7714 // If we don't have target devices, don't bother emitting the data
7715 // mapping code.
7716 std::optional<OpenMPDirectiveKind> CaptureRegion;
7717 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7718 // Emit helper decls of the use_device_ptr/use_device_addr clauses.
7719 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7720 for (const Expr *E : C->varlist()) {
7721 const Decl *D = cast<DeclRefExpr>(Val: E)->getDecl();
7722 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D))
7723 CGF.EmitVarDecl(D: *OED);
7724 }
7725 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7726 for (const Expr *E : C->varlist()) {
7727 const Decl *D = getBaseDecl(Ref: E);
7728 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D))
7729 CGF.EmitVarDecl(D: *OED);
7730 }
7731 } else {
7732 CaptureRegion = OMPD_unknown;
7733 }
7734
7735 OMPLexicalScope Scope(CGF, S, CaptureRegion);
7736 RCG(CGF);
7737 }
7738 };
7739
7740 // Forward the provided action to the privatization codegen.
7741 RegionCodeGenTy PrivRCG(PrivCodeGen);
7742 PrivRCG.setAction(Action);
7743
7744 // Notwithstanding the body of the region is emitted as inlined directive,
7745 // we don't use an inline scope as changes in the references inside the
7746 // region are expected to be visible outside, so we do not privative them.
7747 OMPLexicalScope Scope(CGF, S);
7748 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_target_data,
7749 CodeGen: PrivRCG);
7750 };
7751
7752 RegionCodeGenTy RCG(CodeGen);
7753
7754 // If we don't have target devices, don't bother emitting the data mapping
7755 // code.
7756 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7757 RCG(*this);
7758 return;
7759 }
7760
7761 // Check if we have any if clause associated with the directive.
7762 const Expr *IfCond = nullptr;
7763 if (const auto *C = S.getSingleClause<OMPIfClause>())
7764 IfCond = C->getCondition();
7765
7766 // Check if we have any device clause associated with the directive.
7767 const Expr *Device = nullptr;
7768 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7769 Device = C->getDevice();
7770
7771 // Set the action to signal privatization of device pointers.
7772 RCG.setAction(PrivAction);
7773
7774 // Emit region code.
7775 CGM.getOpenMPRuntime().emitTargetDataCalls(CGF&: *this, D: S, IfCond, Device, CodeGen: RCG,
7776 Info);
7777}
7778
7779void CodeGenFunction::EmitOMPTargetEnterDataDirective(
7780 const OMPTargetEnterDataDirective &S) {
7781 // If we don't have target devices, don't bother emitting the data mapping
7782 // code.
7783 if (CGM.getLangOpts().OMPTargetTriples.empty())
7784 return;
7785
7786 // Check if we have any if clause associated with the directive.
7787 const Expr *IfCond = nullptr;
7788 if (const auto *C = S.getSingleClause<OMPIfClause>())
7789 IfCond = C->getCondition();
7790
7791 // Check if we have any device clause associated with the directive.
7792 const Expr *Device = nullptr;
7793 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7794 Device = C->getDevice();
7795
7796 OMPLexicalScope Scope(*this, S, OMPD_task);
7797 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF&: *this, D: S, IfCond, Device);
7798}
7799
7800void CodeGenFunction::EmitOMPTargetExitDataDirective(
7801 const OMPTargetExitDataDirective &S) {
7802 // If we don't have target devices, don't bother emitting the data mapping
7803 // code.
7804 if (CGM.getLangOpts().OMPTargetTriples.empty())
7805 return;
7806
7807 // Check if we have any if clause associated with the directive.
7808 const Expr *IfCond = nullptr;
7809 if (const auto *C = S.getSingleClause<OMPIfClause>())
7810 IfCond = C->getCondition();
7811
7812 // Check if we have any device clause associated with the directive.
7813 const Expr *Device = nullptr;
7814 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7815 Device = C->getDevice();
7816
7817 OMPLexicalScope Scope(*this, S, OMPD_task);
7818 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF&: *this, D: S, IfCond, Device);
7819}
7820
7821static void emitTargetParallelRegion(CodeGenFunction &CGF,
7822 const OMPTargetParallelDirective &S,
7823 PrePostActionTy &Action) {
7824 // Get the captured statement associated with the 'parallel' region.
7825 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_parallel);
7826 Action.Enter(CGF);
7827 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
7828 Action.Enter(CGF);
7829 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7830 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
7831 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
7832 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7833 (void)PrivateScope.Privatize();
7834 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
7835 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
7836 // TODO: Add support for clauses.
7837 CGF.EmitStmt(S: CS->getCapturedStmt());
7838 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
7839 };
7840 emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_parallel, CodeGen,
7841 CodeGenBoundParameters: emitEmptyBoundParameters);
7842 emitPostUpdateForReductionClause(CGF, D: S,
7843 CondGen: [](CodeGenFunction &) { return nullptr; });
7844}
7845
7846void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
7847 CodeGenModule &CGM, StringRef ParentName,
7848 const OMPTargetParallelDirective &S) {
7849 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7850 emitTargetParallelRegion(CGF, S, Action);
7851 };
7852 llvm::Function *Fn;
7853 llvm::Constant *Addr;
7854 // Emit target region as a standalone region.
7855 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7856 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7857 assert(Fn && Addr && "Target device function emission failed.");
7858}
7859
7860void CodeGenFunction::EmitOMPTargetParallelDirective(
7861 const OMPTargetParallelDirective &S) {
7862 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7863 emitTargetParallelRegion(CGF, S, Action);
7864 };
7865 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7866}
7867
7868static void emitTargetParallelForRegion(CodeGenFunction &CGF,
7869 const OMPTargetParallelForDirective &S,
7870 PrePostActionTy &Action) {
7871 Action.Enter(CGF);
7872 // Emit directive as a combined directive that consists of two implicit
7873 // directives: 'parallel' with 'for' directive.
7874 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7875 Action.Enter(CGF);
7876 CodeGenFunction::OMPCancelStackRAII CancelRegion(
7877 CGF, OMPD_target_parallel_for, S.hasCancel());
7878 CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds,
7879 CGDispatchBounds: emitDispatchForLoopBounds);
7880 };
7881 emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_for, CodeGen,
7882 CodeGenBoundParameters: emitEmptyBoundParameters);
7883}
7884
7885void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
7886 CodeGenModule &CGM, StringRef ParentName,
7887 const OMPTargetParallelForDirective &S) {
7888 // Emit SPMD target parallel for region as a standalone region.
7889 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7890 emitTargetParallelForRegion(CGF, S, Action);
7891 };
7892 llvm::Function *Fn;
7893 llvm::Constant *Addr;
7894 // Emit target region as a standalone region.
7895 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7896 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7897 assert(Fn && Addr && "Target device function emission failed.");
7898}
7899
7900void CodeGenFunction::EmitOMPTargetParallelForDirective(
7901 const OMPTargetParallelForDirective &S) {
7902 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7903 emitTargetParallelForRegion(CGF, S, Action);
7904 };
7905 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7906}
7907
7908static void
7909emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
7910 const OMPTargetParallelForSimdDirective &S,
7911 PrePostActionTy &Action) {
7912 Action.Enter(CGF);
7913 // Emit directive as a combined directive that consists of two implicit
7914 // directives: 'parallel' with 'for' directive.
7915 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7916 Action.Enter(CGF);
7917 CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds,
7918 CGDispatchBounds: emitDispatchForLoopBounds);
7919 };
7920 emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_simd, CodeGen,
7921 CodeGenBoundParameters: emitEmptyBoundParameters);
7922}
7923
7924void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
7925 CodeGenModule &CGM, StringRef ParentName,
7926 const OMPTargetParallelForSimdDirective &S) {
7927 // Emit SPMD target parallel for region as a standalone region.
7928 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7929 emitTargetParallelForSimdRegion(CGF, S, Action);
7930 };
7931 llvm::Function *Fn;
7932 llvm::Constant *Addr;
7933 // Emit target region as a standalone region.
7934 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7935 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7936 assert(Fn && Addr && "Target device function emission failed.");
7937}
7938
7939void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
7940 const OMPTargetParallelForSimdDirective &S) {
7941 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7942 emitTargetParallelForSimdRegion(CGF, S, Action);
7943 };
7944 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7945}
7946
7947/// Emit a helper variable and return corresponding lvalue.
7948static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
7949 const ImplicitParamDecl *PVD,
7950 CodeGenFunction::OMPPrivateScope &Privates) {
7951 const auto *VDecl = cast<VarDecl>(Val: Helper->getDecl());
7952 Privates.addPrivate(LocalVD: VDecl, Addr: CGF.GetAddrOfLocalVar(VD: PVD));
7953}
7954
7955void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
7956 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
7957 // Emit outlined function for task construct.
7958 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_taskloop);
7959 Address CapturedStruct = Address::invalid();
7960 {
7961 OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
7962 CapturedStruct = GenerateCapturedStmtArgument(S: *CS);
7963 }
7964 CanQualType SharedsTy =
7965 getContext().getCanonicalTagType(TD: CS->getCapturedRecordDecl());
7966 const Expr *IfCond = nullptr;
7967 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7968 if (C->getNameModifier() == OMPD_unknown ||
7969 C->getNameModifier() == OMPD_taskloop) {
7970 IfCond = C->getCondition();
7971 break;
7972 }
7973 }
7974
7975 OMPTaskDataTy Data;
7976 // Check if taskloop must be emitted without taskgroup.
7977 Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
7978 // TODO: Check if we should emit tied or untied task.
7979 Data.Tied = true;
7980 // Set scheduling for taskloop
7981 if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) {
7982 // grainsize clause
7983 Data.Schedule.setInt(/*IntVal=*/false);
7984 Data.Schedule.setPointer(EmitScalarExpr(E: Clause->getGrainsize()));
7985 Data.HasModifier =
7986 (Clause->getModifier() == OMPC_GRAINSIZE_strict) ? true : false;
7987 } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) {
7988 // num_tasks clause
7989 Data.Schedule.setInt(/*IntVal=*/true);
7990 Data.Schedule.setPointer(EmitScalarExpr(E: Clause->getNumTasks()));
7991 Data.HasModifier =
7992 (Clause->getModifier() == OMPC_NUMTASKS_strict) ? true : false;
7993 }
7994
7995 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
7996 // if (PreCond) {
7997 // for (IV in 0..LastIteration) BODY;
7998 // <Final counter/linear vars updates>;
7999 // }
8000 //
8001
8002 // Emit: if (PreCond) - begin.
8003 // If the condition constant folds and can be elided, avoid emitting the
8004 // whole loop.
8005 bool CondConstant;
8006 llvm::BasicBlock *ContBlock = nullptr;
8007 OMPLoopScope PreInitScope(CGF, S);
8008 if (CGF.ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
8009 if (!CondConstant)
8010 return;
8011 } else {
8012 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "taskloop.if.then");
8013 ContBlock = CGF.createBasicBlock(name: "taskloop.if.end");
8014 emitPreCond(CGF, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
8015 TrueCount: CGF.getProfileCount(S: &S));
8016 CGF.EmitBlock(BB: ThenBlock);
8017 CGF.incrementProfileCounter(S: &S);
8018 }
8019
8020 (void)CGF.EmitOMPLinearClauseInit(D: S);
8021
8022 OMPPrivateScope LoopScope(CGF);
8023 // Emit helper vars inits.
8024 enum { LowerBound = 5, UpperBound, Stride, LastIter };
8025 auto *I = CS->getCapturedDecl()->param_begin();
8026 auto *LBP = std::next(x: I, n: LowerBound);
8027 auto *UBP = std::next(x: I, n: UpperBound);
8028 auto *STP = std::next(x: I, n: Stride);
8029 auto *LIP = std::next(x: I, n: LastIter);
8030 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getLowerBoundVariable()), PVD: *LBP,
8031 Privates&: LoopScope);
8032 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getUpperBoundVariable()), PVD: *UBP,
8033 Privates&: LoopScope);
8034 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable()), PVD: *STP, Privates&: LoopScope);
8035 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable()), PVD: *LIP,
8036 Privates&: LoopScope);
8037 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
8038 CGF.EmitOMPLinearClause(D: S, PrivateScope&: LoopScope);
8039 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
8040 (void)LoopScope.Privatize();
8041 // Emit the loop iteration variable.
8042 const Expr *IVExpr = S.getIterationVariable();
8043 const auto *IVDecl = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IVExpr)->getDecl());
8044 CGF.EmitVarDecl(D: *IVDecl);
8045 CGF.EmitIgnoredExpr(E: S.getInit());
8046
8047 // Emit the iterations count variable.
8048 // If it is not a variable, Sema decided to calculate iterations count on
8049 // each iteration (e.g., it is foldable into a constant).
8050 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
8051 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
8052 // Emit calculation of the iterations count.
8053 CGF.EmitIgnoredExpr(E: S.getCalcLastIteration());
8054 }
8055
8056 {
8057 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
8058 emitCommonSimdLoop(
8059 CGF, S,
8060 SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8061 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()))
8062 CGF.EmitOMPSimdInit(D: S);
8063 },
8064 BodyCodeGen: [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
8065 CGF.EmitOMPInnerLoop(
8066 S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: S.getCond(), IncExpr: S.getInc(),
8067 BodyGen: [&S](CodeGenFunction &CGF) {
8068 emitOMPLoopBodyWithStopPoint(CGF, S,
8069 LoopExit: CodeGenFunction::JumpDest());
8070 },
8071 PostIncGen: [](CodeGenFunction &) {});
8072 });
8073 }
8074 // Emit: if (PreCond) - end.
8075 if (ContBlock) {
8076 CGF.EmitBranch(Block: ContBlock);
8077 CGF.EmitBlock(BB: ContBlock, IsFinished: true);
8078 }
8079 // Emit final copy of the lastprivate variables if IsLastIter != 0.
8080 if (HasLastprivateClause) {
8081 CGF.EmitOMPLastprivateClauseFinal(
8082 D: S, NoFinals: isOpenMPSimdDirective(DKind: S.getDirectiveKind()),
8083 IsLastIterCond: CGF.Builder.CreateIsNotNull(Arg: CGF.EmitLoadOfScalar(
8084 Addr: CGF.GetAddrOfLocalVar(VD: *LIP), /*Volatile=*/false,
8085 Ty: (*LIP)->getType(), Loc: S.getBeginLoc())));
8086 }
8087 LoopScope.restoreMap();
8088 CGF.EmitOMPLinearClauseFinal(D: S, CondGen: [LIP, &S](CodeGenFunction &CGF) {
8089 return CGF.Builder.CreateIsNotNull(
8090 Arg: CGF.EmitLoadOfScalar(Addr: CGF.GetAddrOfLocalVar(VD: *LIP), /*Volatile=*/false,
8091 Ty: (*LIP)->getType(), Loc: S.getBeginLoc()));
8092 });
8093 };
8094 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
8095 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
8096 const OMPTaskDataTy &Data) {
8097 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
8098 &Data](CodeGenFunction &CGF, PrePostActionTy &) {
8099 OMPLoopScope PreInitScope(CGF, S);
8100 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, Loc: S.getBeginLoc(), D: S,
8101 TaskFunction: OutlinedFn, SharedsTy,
8102 Shareds: CapturedStruct, IfCond, Data);
8103 };
8104 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_taskloop,
8105 CodeGen);
8106 };
8107 if (Data.Nogroup) {
8108 EmitOMPTaskBasedDirective(S, CapturedRegion: OMPD_taskloop, BodyGen, TaskGen, Data);
8109 } else {
8110 CGM.getOpenMPRuntime().emitTaskgroupRegion(
8111 CGF&: *this,
8112 TaskgroupOpGen: [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
8113 PrePostActionTy &Action) {
8114 Action.Enter(CGF);
8115 CGF.EmitOMPTaskBasedDirective(S, CapturedRegion: OMPD_taskloop, BodyGen, TaskGen,
8116 Data);
8117 },
8118 Loc: S.getBeginLoc());
8119 }
8120}
8121
8122void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
8123 auto LPCRegion =
8124 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8125 EmitOMPTaskLoopBasedDirective(S);
8126}
8127
8128void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
8129 const OMPTaskLoopSimdDirective &S) {
8130 auto LPCRegion =
8131 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8132 OMPLexicalScope Scope(*this, S);
8133 EmitOMPTaskLoopBasedDirective(S);
8134}
8135
8136void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
8137 const OMPMasterTaskLoopDirective &S) {
8138 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8139 Action.Enter(CGF);
8140 EmitOMPTaskLoopBasedDirective(S);
8141 };
8142 auto LPCRegion =
8143 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8144 OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
8145 CGM.getOpenMPRuntime().emitMasterRegion(CGF&: *this, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
8146}
8147
8148void CodeGenFunction::EmitOMPMaskedTaskLoopDirective(
8149 const OMPMaskedTaskLoopDirective &S) {
8150 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8151 Action.Enter(CGF);
8152 EmitOMPTaskLoopBasedDirective(S);
8153 };
8154 auto LPCRegion =
8155 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8156 OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
8157 CGM.getOpenMPRuntime().emitMaskedRegion(CGF&: *this, MaskedOpGen: CodeGen, Loc: S.getBeginLoc());
8158}
8159
8160void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
8161 const OMPMasterTaskLoopSimdDirective &S) {
8162 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8163 Action.Enter(CGF);
8164 EmitOMPTaskLoopBasedDirective(S);
8165 };
8166 auto LPCRegion =
8167 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8168 OMPLexicalScope Scope(*this, S);
8169 CGM.getOpenMPRuntime().emitMasterRegion(CGF&: *this, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
8170}
8171
8172void CodeGenFunction::EmitOMPMaskedTaskLoopSimdDirective(
8173 const OMPMaskedTaskLoopSimdDirective &S) {
8174 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8175 Action.Enter(CGF);
8176 EmitOMPTaskLoopBasedDirective(S);
8177 };
8178 auto LPCRegion =
8179 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8180 OMPLexicalScope Scope(*this, S);
8181 CGM.getOpenMPRuntime().emitMaskedRegion(CGF&: *this, MaskedOpGen: CodeGen, Loc: S.getBeginLoc());
8182}
8183
8184void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
8185 const OMPParallelMasterTaskLoopDirective &S) {
8186 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8187 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
8188 PrePostActionTy &Action) {
8189 Action.Enter(CGF);
8190 CGF.EmitOMPTaskLoopBasedDirective(S);
8191 };
8192 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
8193 CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: TaskLoopCodeGen,
8194 Loc: S.getBeginLoc());
8195 };
8196 auto LPCRegion =
8197 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8198 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_master_taskloop, CodeGen,
8199 CodeGenBoundParameters: emitEmptyBoundParameters);
8200}
8201
8202void CodeGenFunction::EmitOMPParallelMaskedTaskLoopDirective(
8203 const OMPParallelMaskedTaskLoopDirective &S) {
8204 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8205 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
8206 PrePostActionTy &Action) {
8207 Action.Enter(CGF);
8208 CGF.EmitOMPTaskLoopBasedDirective(S);
8209 };
8210 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
8211 CGM.getOpenMPRuntime().emitMaskedRegion(CGF, MaskedOpGen: TaskLoopCodeGen,
8212 Loc: S.getBeginLoc());
8213 };
8214 auto LPCRegion =
8215 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8216 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_masked_taskloop, CodeGen,
8217 CodeGenBoundParameters: emitEmptyBoundParameters);
8218}
8219
8220void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
8221 const OMPParallelMasterTaskLoopSimdDirective &S) {
8222 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8223 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
8224 PrePostActionTy &Action) {
8225 Action.Enter(CGF);
8226 CGF.EmitOMPTaskLoopBasedDirective(S);
8227 };
8228 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
8229 CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: TaskLoopCodeGen,
8230 Loc: S.getBeginLoc());
8231 };
8232 auto LPCRegion =
8233 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8234 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_master_taskloop_simd, CodeGen,
8235 CodeGenBoundParameters: emitEmptyBoundParameters);
8236}
8237
8238void CodeGenFunction::EmitOMPParallelMaskedTaskLoopSimdDirective(
8239 const OMPParallelMaskedTaskLoopSimdDirective &S) {
8240 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8241 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
8242 PrePostActionTy &Action) {
8243 Action.Enter(CGF);
8244 CGF.EmitOMPTaskLoopBasedDirective(S);
8245 };
8246 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
8247 CGM.getOpenMPRuntime().emitMaskedRegion(CGF, MaskedOpGen: TaskLoopCodeGen,
8248 Loc: S.getBeginLoc());
8249 };
8250 auto LPCRegion =
8251 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8252 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_masked_taskloop_simd, CodeGen,
8253 CodeGenBoundParameters: emitEmptyBoundParameters);
8254}
8255
8256// Generate the instructions for '#pragma omp target update' directive.
8257void CodeGenFunction::EmitOMPTargetUpdateDirective(
8258 const OMPTargetUpdateDirective &S) {
8259 // If we don't have target devices, don't bother emitting the data mapping
8260 // code.
8261 if (CGM.getLangOpts().OMPTargetTriples.empty())
8262 return;
8263
8264 // Check if we have any if clause associated with the directive.
8265 const Expr *IfCond = nullptr;
8266 if (const auto *C = S.getSingleClause<OMPIfClause>())
8267 IfCond = C->getCondition();
8268
8269 // Check if we have any device clause associated with the directive.
8270 const Expr *Device = nullptr;
8271 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
8272 Device = C->getDevice();
8273
8274 OMPLexicalScope Scope(*this, S, OMPD_task);
8275 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF&: *this, D: S, IfCond, Device);
8276}
8277
8278void CodeGenFunction::EmitOMPGenericLoopDirective(
8279 const OMPGenericLoopDirective &S) {
8280 // Always expect a bind clause on the loop directive. It it wasn't
8281 // in the source, it should have been added in sema.
8282
8283 OpenMPBindClauseKind BindKind = OMPC_BIND_unknown;
8284 if (const auto *C = S.getSingleClause<OMPBindClause>())
8285 BindKind = C->getBindKind();
8286
8287 switch (BindKind) {
8288 case OMPC_BIND_parallel: // for
8289 return emitOMPForDirective(S, CGF&: *this, CGM, /*HasCancel=*/false);
8290 case OMPC_BIND_teams: // distribute
8291 return emitOMPDistributeDirective(S, CGF&: *this, CGM);
8292 case OMPC_BIND_thread: // simd
8293 return emitOMPSimdDirective(S, CGF&: *this, CGM);
8294 case OMPC_BIND_unknown:
8295 break;
8296 }
8297
8298 // Unimplemented, just inline the underlying statement for now.
8299 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8300 // Emit the loop iteration variable.
8301 const Stmt *CS =
8302 cast<CapturedStmt>(Val: S.getAssociatedStmt())->getCapturedStmt();
8303 const auto *ForS = dyn_cast<ForStmt>(Val: CS);
8304 if (ForS && !isa<DeclStmt>(Val: ForS->getInit())) {
8305 OMPPrivateScope LoopScope(CGF);
8306 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
8307 (void)LoopScope.Privatize();
8308 CGF.EmitStmt(S: CS);
8309 LoopScope.restoreMap();
8310 } else {
8311 CGF.EmitStmt(S: CS);
8312 }
8313 };
8314 OMPLexicalScope Scope(*this, S, OMPD_unknown);
8315 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_loop, CodeGen);
8316}
8317
8318void CodeGenFunction::EmitOMPParallelGenericLoopDirective(
8319 const OMPLoopDirective &S) {
8320 // Emit combined directive as if its constituent constructs are 'parallel'
8321 // and 'for'.
8322 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8323 Action.Enter(CGF);
8324 emitOMPCopyinClause(CGF, S);
8325 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
8326 };
8327 {
8328 auto LPCRegion =
8329 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8330 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_for, CodeGen,
8331 CodeGenBoundParameters: emitEmptyBoundParameters);
8332 }
8333 // Check for outer lastprivate conditional update.
8334 checkForLastprivateConditionalUpdate(CGF&: *this, S);
8335}
8336
8337void CodeGenFunction::EmitOMPTeamsGenericLoopDirective(
8338 const OMPTeamsGenericLoopDirective &S) {
8339 // To be consistent with current behavior of 'target teams loop', emit
8340 // 'teams loop' as if its constituent constructs are 'teams' and 'distribute'.
8341 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8342 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
8343 };
8344
8345 // Emit teams region as a standalone region.
8346 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8347 PrePostActionTy &Action) {
8348 Action.Enter(CGF);
8349 OMPPrivateScope PrivateScope(CGF);
8350 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
8351 (void)PrivateScope.Privatize();
8352 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
8353 CodeGen: CodeGenDistribute);
8354 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
8355 };
8356 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute, CodeGen);
8357 emitPostUpdateForReductionClause(CGF&: *this, D: S,
8358 CondGen: [](CodeGenFunction &) { return nullptr; });
8359}
8360
8361#ifndef NDEBUG
8362static void emitTargetTeamsLoopCodegenStatus(CodeGenFunction &CGF,
8363 std::string StatusMsg,
8364 const OMPExecutableDirective &D) {
8365 bool IsDevice = CGF.CGM.getLangOpts().OpenMPIsTargetDevice;
8366 if (IsDevice)
8367 StatusMsg += ": DEVICE";
8368 else
8369 StatusMsg += ": HOST";
8370 SourceLocation L = D.getBeginLoc();
8371 auto &SM = CGF.getContext().getSourceManager();
8372 PresumedLoc PLoc = SM.getPresumedLoc(L);
8373 const char *FileName = PLoc.isValid() ? PLoc.getFilename() : nullptr;
8374 unsigned LineNo =
8375 PLoc.isValid() ? PLoc.getLine() : SM.getExpansionLineNumber(L);
8376 llvm::dbgs() << StatusMsg << ": " << FileName << ": " << LineNo << "\n";
8377}
8378#endif
8379
8380static void emitTargetTeamsGenericLoopRegionAsParallel(
8381 CodeGenFunction &CGF, PrePostActionTy &Action,
8382 const OMPTargetTeamsGenericLoopDirective &S) {
8383 Action.Enter(CGF);
8384 // Emit 'teams loop' as if its constituent constructs are 'distribute,
8385 // 'parallel, and 'for'.
8386 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8387 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
8388 IncExpr: S.getDistInc());
8389 };
8390
8391 // Emit teams region as a standalone region.
8392 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8393 PrePostActionTy &Action) {
8394 Action.Enter(CGF);
8395 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
8396 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
8397 (void)PrivateScope.Privatize();
8398 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
8399 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
8400 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
8401 };
8402 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE,
8403 emitTargetTeamsLoopCodegenStatus(
8404 CGF, TTL_CODEGEN_TYPE " as parallel for", S));
8405 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_parallel_for,
8406 CodeGen: CodeGenTeams);
8407 emitPostUpdateForReductionClause(CGF, D: S,
8408 CondGen: [](CodeGenFunction &) { return nullptr; });
8409}
8410
8411static void emitTargetTeamsGenericLoopRegionAsDistribute(
8412 CodeGenFunction &CGF, PrePostActionTy &Action,
8413 const OMPTargetTeamsGenericLoopDirective &S) {
8414 Action.Enter(CGF);
8415 // Emit 'teams loop' as if its constituent construct is 'distribute'.
8416 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8417 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
8418 };
8419
8420 // Emit teams region as a standalone region.
8421 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8422 PrePostActionTy &Action) {
8423 Action.Enter(CGF);
8424 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
8425 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
8426 (void)PrivateScope.Privatize();
8427 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
8428 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
8429 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
8430 };
8431 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE,
8432 emitTargetTeamsLoopCodegenStatus(
8433 CGF, TTL_CODEGEN_TYPE " as distribute", S));
8434 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute, CodeGen);
8435 emitPostUpdateForReductionClause(CGF, D: S,
8436 CondGen: [](CodeGenFunction &) { return nullptr; });
8437}
8438
8439void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective(
8440 const OMPTargetTeamsGenericLoopDirective &S) {
8441 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8442 if (S.canBeParallelFor())
8443 emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S);
8444 else
8445 emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S);
8446 };
8447 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
8448}
8449
8450void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
8451 CodeGenModule &CGM, StringRef ParentName,
8452 const OMPTargetTeamsGenericLoopDirective &S) {
8453 // Emit SPMD target parallel loop region as a standalone region.
8454 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8455 if (S.canBeParallelFor())
8456 emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S);
8457 else
8458 emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S);
8459 };
8460 llvm::Function *Fn;
8461 llvm::Constant *Addr;
8462 // Emit target region as a standalone region.
8463 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8464 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
8465 assert(Fn && Addr &&
8466 "Target device function emission failed for 'target teams loop'.");
8467}
8468
8469static void emitTargetParallelGenericLoopRegion(
8470 CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S,
8471 PrePostActionTy &Action) {
8472 Action.Enter(CGF);
8473 // Emit as 'parallel for'.
8474 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8475 Action.Enter(CGF);
8476 CodeGenFunction::OMPCancelStackRAII CancelRegion(
8477 CGF, OMPD_target_parallel_loop, /*hasCancel=*/false);
8478 CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds,
8479 CGDispatchBounds: emitDispatchForLoopBounds);
8480 };
8481 emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_for, CodeGen,
8482 CodeGenBoundParameters: emitEmptyBoundParameters);
8483}
8484
8485void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
8486 CodeGenModule &CGM, StringRef ParentName,
8487 const OMPTargetParallelGenericLoopDirective &S) {
8488 // Emit target parallel loop region as a standalone region.
8489 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8490 emitTargetParallelGenericLoopRegion(CGF, S, Action);
8491 };
8492 llvm::Function *Fn;
8493 llvm::Constant *Addr;
8494 // Emit target region as a standalone region.
8495 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8496 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
8497 assert(Fn && Addr && "Target device function emission failed.");
8498}
8499
8500/// Emit combined directive 'target parallel loop' as if its constituent
8501/// constructs are 'target', 'parallel', and 'for'.
8502void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective(
8503 const OMPTargetParallelGenericLoopDirective &S) {
8504 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8505 emitTargetParallelGenericLoopRegion(CGF, S, Action);
8506 };
8507 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
8508}
8509
8510void CodeGenFunction::EmitSimpleOMPExecutableDirective(
8511 const OMPExecutableDirective &D) {
8512 if (const auto *SD = dyn_cast<OMPScanDirective>(Val: &D)) {
8513 EmitOMPScanDirective(S: *SD);
8514 return;
8515 }
8516 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
8517 return;
8518 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
8519 OMPPrivateScope GlobalsScope(CGF);
8520 if (isOpenMPTaskingDirective(Kind: D.getDirectiveKind())) {
8521 // Capture global firstprivates to avoid crash.
8522 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
8523 for (const Expr *Ref : C->varlist()) {
8524 const auto *DRE = cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
8525 if (!DRE)
8526 continue;
8527 const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl());
8528 if (!VD || VD->hasLocalStorage())
8529 continue;
8530 if (!CGF.LocalDeclMap.count(Val: VD)) {
8531 LValue GlobLVal = CGF.EmitLValue(E: Ref);
8532 GlobalsScope.addPrivate(LocalVD: VD, Addr: GlobLVal.getAddress());
8533 }
8534 }
8535 }
8536 }
8537 if (isOpenMPSimdDirective(DKind: D.getDirectiveKind())) {
8538 (void)GlobalsScope.Privatize();
8539 ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
8540 emitOMPSimdRegion(CGF, S: cast<OMPLoopDirective>(Val: D), Action);
8541 } else {
8542 if (const auto *LD = dyn_cast<OMPLoopDirective>(Val: &D)) {
8543 for (const Expr *E : LD->counters()) {
8544 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
8545 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(Val: VD)) {
8546 LValue GlobLVal = CGF.EmitLValue(E);
8547 GlobalsScope.addPrivate(LocalVD: VD, Addr: GlobLVal.getAddress());
8548 }
8549 if (isa<OMPCapturedExprDecl>(Val: VD)) {
8550 // Emit only those that were not explicitly referenced in clauses.
8551 if (!CGF.LocalDeclMap.count(Val: VD))
8552 CGF.EmitVarDecl(D: *VD);
8553 }
8554 }
8555 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
8556 if (!C->getNumForLoops())
8557 continue;
8558 for (unsigned I = LD->getLoopsNumber(),
8559 E = C->getLoopNumIterations().size();
8560 I < E; ++I) {
8561 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
8562 Val: cast<DeclRefExpr>(Val: C->getLoopCounter(NumLoop: I))->getDecl())) {
8563 // Emit only those that were not explicitly referenced in clauses.
8564 if (!CGF.LocalDeclMap.count(Val: VD))
8565 CGF.EmitVarDecl(D: *VD);
8566 }
8567 }
8568 }
8569 }
8570 (void)GlobalsScope.Privatize();
8571 CGF.EmitStmt(S: D.getInnermostCapturedStmt()->getCapturedStmt());
8572 }
8573 };
8574 if (D.getDirectiveKind() == OMPD_atomic ||
8575 D.getDirectiveKind() == OMPD_critical ||
8576 D.getDirectiveKind() == OMPD_section ||
8577 D.getDirectiveKind() == OMPD_master ||
8578 D.getDirectiveKind() == OMPD_masked ||
8579 D.getDirectiveKind() == OMPD_unroll ||
8580 D.getDirectiveKind() == OMPD_assume) {
8581 EmitStmt(S: D.getAssociatedStmt());
8582 } else {
8583 auto LPCRegion =
8584 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S: D);
8585 OMPSimdLexicalScope Scope(*this, D);
8586 CGM.getOpenMPRuntime().emitInlinedDirective(
8587 CGF&: *this,
8588 InnermostKind: isOpenMPSimdDirective(DKind: D.getDirectiveKind()) ? OMPD_simd
8589 : D.getDirectiveKind(),
8590 CodeGen);
8591 }
8592 // Check for outer lastprivate conditional update.
8593 checkForLastprivateConditionalUpdate(CGF&: *this, S: D);
8594}
8595
8596void CodeGenFunction::EmitOMPAssumeDirective(const OMPAssumeDirective &S) {
8597 EmitStmt(S: S.getAssociatedStmt());
8598}
8599