1//===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit OpenMP nodes as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGCleanup.h"
14#include "CGDebugInfo.h"
15#include "CGOpenMPRuntime.h"
16#include "CodeGenFunction.h"
17#include "CodeGenModule.h"
18#include "CodeGenPGO.h"
19#include "TargetInfo.h"
20#include "clang/AST/ASTContext.h"
21#include "clang/AST/Attr.h"
22#include "clang/AST/DeclOpenMP.h"
23#include "clang/AST/OpenMPClause.h"
24#include "clang/AST/Stmt.h"
25#include "clang/AST/StmtOpenMP.h"
26#include "clang/AST/StmtVisitor.h"
27#include "clang/Basic/DiagnosticFrontend.h"
28#include "clang/Basic/OpenMPKinds.h"
29#include "clang/Basic/PrettyStackTrace.h"
30#include "clang/Basic/SourceManager.h"
31#include "llvm/ADT/SmallSet.h"
32#include "llvm/BinaryFormat/Dwarf.h"
33#include "llvm/Frontend/OpenMP/OMPConstants.h"
34#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DebugInfoMetadata.h"
37#include "llvm/IR/Instructions.h"
38#include "llvm/IR/IntrinsicInst.h"
39#include "llvm/IR/Metadata.h"
40#include "llvm/Support/AtomicOrdering.h"
41#include "llvm/Support/Debug.h"
42#include <optional>
43using namespace clang;
44using namespace CodeGen;
45using namespace llvm::omp;
46
47#define TTL_CODEGEN_TYPE "target-teams-loop-codegen"
48
49static const VarDecl *getBaseDecl(const Expr *Ref);
50static OpenMPDirectiveKind
51getEffectiveDirectiveKind(const OMPExecutableDirective &S);
52
53namespace {
54/// Lexical scope for OpenMP executable constructs, that handles correct codegen
55/// for captured expressions.
56class OMPLexicalScope : public CodeGenFunction::LexicalScope {
57 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
58 for (const auto *C : S.clauses()) {
59 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
60 if (const auto *PreInit =
61 cast_or_null<DeclStmt>(Val: CPI->getPreInitStmt())) {
62 for (const auto *I : PreInit->decls()) {
63 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
64 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
65 } else {
66 CodeGenFunction::AutoVarEmission Emission =
67 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
68 CGF.EmitAutoVarCleanups(emission: Emission);
69 }
70 }
71 }
72 }
73 }
74 }
75 CodeGenFunction::OMPPrivateScope InlinedShareds;
76
77 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
78 return CGF.LambdaCaptureFields.lookup(Val: VD) ||
79 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
80 (isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl) &&
81 cast<BlockDecl>(Val: CGF.CurCodeDecl)->capturesVariable(var: VD));
82 }
83
84public:
85 OMPLexicalScope(
86 CodeGenFunction &CGF, const OMPExecutableDirective &S,
87 const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt,
88 const bool EmitPreInitStmt = true)
89 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
90 InlinedShareds(CGF) {
91 if (EmitPreInitStmt)
92 emitPreInitStmt(CGF, S);
93 if (!CapturedRegion)
94 return;
95 assert(S.hasAssociatedStmt() &&
96 "Expected associated statement for inlined directive.");
97 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: *CapturedRegion);
98 for (const auto &C : CS->captures()) {
99 if (C.capturesVariable() || C.capturesVariableByCopy()) {
100 auto *VD = C.getCapturedVar();
101 assert(VD == VD->getCanonicalDecl() &&
102 "Canonical decl must be captured.");
103 DeclRefExpr DRE(
104 CGF.getContext(), const_cast<VarDecl *>(VD),
105 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
106 InlinedShareds.isGlobalVarCaptured(VD)),
107 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
108 InlinedShareds.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(E: &DRE).getAddress());
109 }
110 }
111 (void)InlinedShareds.Privatize();
112 }
113};
114
115/// Lexical scope for OpenMP parallel construct, that handles correct codegen
116/// for captured expressions.
117class OMPParallelScope final : public OMPLexicalScope {
118 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
119 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
120 return !(isOpenMPTargetExecutionDirective(DKind: EKind) ||
121 isOpenMPLoopBoundSharingDirective(Kind: EKind)) &&
122 isOpenMPParallelDirective(DKind: EKind);
123 }
124
125public:
126 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
127 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
128 EmitPreInitStmt(S)) {}
129};
130
131/// Lexical scope for OpenMP teams construct, that handles correct codegen
132/// for captured expressions.
133class OMPTeamsScope final : public OMPLexicalScope {
134 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
135 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
136 return !isOpenMPTargetExecutionDirective(DKind: EKind) &&
137 isOpenMPTeamsDirective(DKind: EKind);
138 }
139
140public:
141 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
142 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
143 EmitPreInitStmt(S)) {}
144};
145
146/// Private scope for OpenMP loop-based directives, that supports capturing
147/// of used expression from loop statement.
148class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
149 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
150 const Stmt *PreInits;
151 CodeGenFunction::OMPMapVars PreCondVars;
152 if (auto *LD = dyn_cast<OMPLoopDirective>(Val: &S)) {
153 // Emit init, __range, __begin and __end variables for C++ range loops.
154 (void)OMPLoopBasedDirective::doForAllLoops(
155 CurStmt: LD->getInnermostCapturedStmt()->getCapturedStmt(),
156 /*TryImperfectlyNestedLoops=*/true, NumLoops: LD->getLoopsNumber(),
157 Callback: [&CGF](unsigned Cnt, const Stmt *CurStmt) {
158 if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(Val: CurStmt)) {
159 if (const Stmt *Init = CXXFor->getInit())
160 CGF.EmitStmt(S: Init);
161 CGF.EmitStmt(S: CXXFor->getRangeStmt());
162 CGF.EmitStmt(S: CXXFor->getBeginStmt());
163 CGF.EmitStmt(S: CXXFor->getEndStmt());
164 }
165 return false;
166 });
167 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
168 for (const auto *E : LD->counters()) {
169 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
170 EmittedAsPrivate.insert(V: VD->getCanonicalDecl());
171 (void)PreCondVars.setVarAddr(
172 CGF, LocalVD: VD, TempAddr: CGF.CreateMemTemp(T: VD->getType().getNonReferenceType()));
173 }
174 // Mark private vars as undefs.
175 for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
176 for (const Expr *IRef : C->varlist()) {
177 const auto *OrigVD =
178 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IRef)->getDecl());
179 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
180 QualType OrigVDTy = OrigVD->getType().getNonReferenceType();
181 (void)PreCondVars.setVarAddr(
182 CGF, LocalVD: OrigVD,
183 TempAddr: Address(llvm::UndefValue::get(T: CGF.ConvertTypeForMem(
184 T: CGF.getContext().getPointerType(T: OrigVDTy))),
185 CGF.ConvertTypeForMem(T: OrigVDTy),
186 CGF.getContext().getDeclAlign(D: OrigVD)));
187 }
188 }
189 }
190 (void)PreCondVars.apply(CGF);
191 PreInits = LD->getPreInits();
192 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(Val: &S)) {
193 PreInits = Tile->getPreInits();
194 } else if (const auto *Stripe = dyn_cast<OMPStripeDirective>(Val: &S)) {
195 PreInits = Stripe->getPreInits();
196 } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(Val: &S)) {
197 PreInits = Unroll->getPreInits();
198 } else if (const auto *Reverse = dyn_cast<OMPReverseDirective>(Val: &S)) {
199 PreInits = Reverse->getPreInits();
200 } else if (const auto *Interchange =
201 dyn_cast<OMPInterchangeDirective>(Val: &S)) {
202 PreInits = Interchange->getPreInits();
203 } else {
204 llvm_unreachable("Unknown loop-based directive kind.");
205 }
206 doEmitPreinits(PreInits);
207 PreCondVars.restore(CGF);
208 }
209
210 void
211 emitPreInitStmt(CodeGenFunction &CGF,
212 const OMPCanonicalLoopSequenceTransformationDirective &S) {
213 const Stmt *PreInits;
214 if (const auto *Fuse = dyn_cast<OMPFuseDirective>(Val: &S)) {
215 PreInits = Fuse->getPreInits();
216 } else {
217 llvm_unreachable(
218 "Unknown canonical loop sequence transform directive kind.");
219 }
220 doEmitPreinits(PreInits);
221 }
222
223 void doEmitPreinits(const Stmt *PreInits) {
224 if (PreInits) {
225 // CompoundStmts and DeclStmts are used as lists of PreInit statements and
226 // declarations. Since declarations must be visible in the the following
227 // that they initialize, unpack the CompoundStmt they are nested in.
228 SmallVector<const Stmt *> PreInitStmts;
229 if (auto *PreInitCompound = dyn_cast<CompoundStmt>(Val: PreInits))
230 llvm::append_range(C&: PreInitStmts, R: PreInitCompound->body());
231 else
232 PreInitStmts.push_back(Elt: PreInits);
233
234 for (const Stmt *S : PreInitStmts) {
235 // EmitStmt skips any OMPCapturedExprDecls, but needs to be emitted
236 // here.
237 if (auto *PreInitDecl = dyn_cast<DeclStmt>(Val: S)) {
238 for (Decl *I : PreInitDecl->decls())
239 CGF.EmitVarDecl(D: cast<VarDecl>(Val&: *I));
240 continue;
241 }
242 CGF.EmitStmt(S);
243 }
244 }
245 }
246
247public:
248 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
249 : CodeGenFunction::RunCleanupsScope(CGF) {
250 emitPreInitStmt(CGF, S);
251 }
252 OMPLoopScope(CodeGenFunction &CGF,
253 const OMPCanonicalLoopSequenceTransformationDirective &S)
254 : CodeGenFunction::RunCleanupsScope(CGF) {
255 emitPreInitStmt(CGF, S);
256 }
257};
258
259class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
260 CodeGenFunction::OMPPrivateScope InlinedShareds;
261
262 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
263 return CGF.LambdaCaptureFields.lookup(Val: VD) ||
264 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
265 (isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl) &&
266 cast<BlockDecl>(Val: CGF.CurCodeDecl)->capturesVariable(var: VD));
267 }
268
269public:
270 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
271 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
272 InlinedShareds(CGF) {
273 for (const auto *C : S.clauses()) {
274 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
275 if (const auto *PreInit =
276 cast_or_null<DeclStmt>(Val: CPI->getPreInitStmt())) {
277 for (const auto *I : PreInit->decls()) {
278 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
279 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
280 } else {
281 CodeGenFunction::AutoVarEmission Emission =
282 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
283 CGF.EmitAutoVarCleanups(emission: Emission);
284 }
285 }
286 }
287 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(Val: C)) {
288 for (const Expr *E : UDP->varlist()) {
289 const Decl *D = cast<DeclRefExpr>(Val: E)->getDecl();
290 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D))
291 CGF.EmitVarDecl(D: *OED);
292 }
293 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(Val: C)) {
294 for (const Expr *E : UDP->varlist()) {
295 const Decl *D = getBaseDecl(Ref: E);
296 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D))
297 CGF.EmitVarDecl(D: *OED);
298 }
299 }
300 }
301 if (!isOpenMPSimdDirective(DKind: getEffectiveDirectiveKind(S)))
302 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: InlinedShareds);
303 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(Val: &S)) {
304 if (const Expr *E = TG->getReductionRef())
305 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()));
306 }
307 // Temp copy arrays for inscan reductions should not be emitted as they are
308 // not used in simd only mode.
309 llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
310 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
311 if (C->getModifier() != OMPC_REDUCTION_inscan)
312 continue;
313 for (const Expr *E : C->copy_array_temps())
314 CopyArrayTemps.insert(V: cast<DeclRefExpr>(Val: E)->getDecl());
315 }
316 const auto *CS = cast_or_null<CapturedStmt>(Val: S.getAssociatedStmt());
317 while (CS) {
318 for (auto &C : CS->captures()) {
319 if (C.capturesVariable() || C.capturesVariableByCopy()) {
320 auto *VD = C.getCapturedVar();
321 if (CopyArrayTemps.contains(V: VD))
322 continue;
323 assert(VD == VD->getCanonicalDecl() &&
324 "Canonical decl must be captured.");
325 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
326 isCapturedVar(CGF, VD) ||
327 (CGF.CapturedStmtInfo &&
328 InlinedShareds.isGlobalVarCaptured(VD)),
329 VD->getType().getNonReferenceType(), VK_LValue,
330 C.getLocation());
331 InlinedShareds.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(E: &DRE).getAddress());
332 }
333 }
334 CS = dyn_cast<CapturedStmt>(Val: CS->getCapturedStmt());
335 }
336 (void)InlinedShareds.Privatize();
337 }
338};
339
340} // namespace
341
342// The loop directive with a bind clause will be mapped to a different
343// directive with corresponding semantics.
344static OpenMPDirectiveKind
345getEffectiveDirectiveKind(const OMPExecutableDirective &S) {
346 OpenMPDirectiveKind Kind = S.getDirectiveKind();
347 if (Kind != OMPD_loop)
348 return Kind;
349
350 OpenMPBindClauseKind BindKind = OMPC_BIND_unknown;
351 if (const auto *C = S.getSingleClause<OMPBindClause>())
352 BindKind = C->getBindKind();
353
354 switch (BindKind) {
355 case OMPC_BIND_parallel:
356 return OMPD_for;
357 case OMPC_BIND_teams:
358 return OMPD_distribute;
359 case OMPC_BIND_thread:
360 return OMPD_simd;
361 default:
362 return OMPD_loop;
363 }
364}
365
366static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
367 const OMPExecutableDirective &S,
368 const RegionCodeGenTy &CodeGen);
369
370LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
371 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(Val: E)) {
372 if (const auto *OrigVD = dyn_cast<VarDecl>(Val: OrigDRE->getDecl())) {
373 OrigVD = OrigVD->getCanonicalDecl();
374 bool IsCaptured =
375 LambdaCaptureFields.lookup(Val: OrigVD) ||
376 (CapturedStmtInfo && CapturedStmtInfo->lookup(VD: OrigVD)) ||
377 (isa_and_nonnull<BlockDecl>(Val: CurCodeDecl));
378 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
379 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
380 return EmitLValue(E: &DRE);
381 }
382 }
383 return EmitLValue(E);
384}
385
386llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
387 ASTContext &C = getContext();
388 llvm::Value *Size = nullptr;
389 auto SizeInChars = C.getTypeSizeInChars(T: Ty);
390 if (SizeInChars.isZero()) {
391 // getTypeSizeInChars() returns 0 for a VLA.
392 while (const VariableArrayType *VAT = C.getAsVariableArrayType(T: Ty)) {
393 VlaSizePair VlaSize = getVLASize(vla: VAT);
394 Ty = VlaSize.Type;
395 Size =
396 Size ? Builder.CreateNUWMul(LHS: Size, RHS: VlaSize.NumElts) : VlaSize.NumElts;
397 }
398 SizeInChars = C.getTypeSizeInChars(T: Ty);
399 if (SizeInChars.isZero())
400 return llvm::ConstantInt::get(Ty: SizeTy, /*V=*/0);
401 return Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: SizeInChars));
402 }
403 return CGM.getSize(numChars: SizeInChars);
404}
405
406void CodeGenFunction::GenerateOpenMPCapturedVars(
407 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
408 const RecordDecl *RD = S.getCapturedRecordDecl();
409 auto CurField = RD->field_begin();
410 auto CurCap = S.captures().begin();
411 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
412 E = S.capture_init_end();
413 I != E; ++I, ++CurField, ++CurCap) {
414 if (CurField->hasCapturedVLAType()) {
415 const VariableArrayType *VAT = CurField->getCapturedVLAType();
416 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
417 CapturedVars.push_back(Elt: Val);
418 } else if (CurCap->capturesThis()) {
419 CapturedVars.push_back(Elt: CXXThisValue);
420 } else if (CurCap->capturesVariableByCopy()) {
421 llvm::Value *CV = EmitLoadOfScalar(lvalue: EmitLValue(E: *I), Loc: CurCap->getLocation());
422
423 // If the field is not a pointer, we need to save the actual value
424 // and load it as a void pointer.
425 if (!CurField->getType()->isAnyPointerType()) {
426 ASTContext &Ctx = getContext();
427 Address DstAddr = CreateMemTemp(
428 T: Ctx.getUIntPtrType(),
429 Name: Twine(CurCap->getCapturedVar()->getName(), ".casted"));
430 LValue DstLV = MakeAddrLValue(Addr: DstAddr, T: Ctx.getUIntPtrType());
431
432 llvm::Value *SrcAddrVal = EmitScalarConversion(
433 Src: DstAddr.emitRawPointer(CGF&: *this),
434 SrcTy: Ctx.getPointerType(T: Ctx.getUIntPtrType()),
435 DstTy: Ctx.getPointerType(T: CurField->getType()), Loc: CurCap->getLocation());
436 LValue SrcLV =
437 MakeNaturalAlignAddrLValue(V: SrcAddrVal, T: CurField->getType());
438
439 // Store the value using the source type pointer.
440 EmitStoreThroughLValue(Src: RValue::get(V: CV), Dst: SrcLV);
441
442 // Load the value using the destination type pointer.
443 CV = EmitLoadOfScalar(lvalue: DstLV, Loc: CurCap->getLocation());
444 }
445 CapturedVars.push_back(Elt: CV);
446 } else {
447 assert(CurCap->capturesVariable() && "Expected capture by reference.");
448 CapturedVars.push_back(Elt: EmitLValue(E: *I).getAddress().emitRawPointer(CGF&: *this));
449 }
450 }
451}
452
453static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
454 QualType DstType, StringRef Name,
455 LValue AddrLV) {
456 ASTContext &Ctx = CGF.getContext();
457
458 llvm::Value *CastedPtr = CGF.EmitScalarConversion(
459 Src: AddrLV.getAddress().emitRawPointer(CGF), SrcTy: Ctx.getUIntPtrType(),
460 DstTy: Ctx.getPointerType(T: DstType), Loc);
461 // FIXME: should the pointee type (DstType) be passed?
462 Address TmpAddr =
463 CGF.MakeNaturalAlignAddrLValue(V: CastedPtr, T: DstType).getAddress();
464 return TmpAddr;
465}
466
467static QualType getCanonicalParamType(ASTContext &C, QualType T) {
468 if (T->isLValueReferenceType())
469 return C.getLValueReferenceType(
470 T: getCanonicalParamType(C, T: T.getNonReferenceType()),
471 /*SpelledAsLValue=*/false);
472 if (T->isPointerType())
473 return C.getPointerType(T: getCanonicalParamType(C, T: T->getPointeeType()));
474 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
475 if (const auto *VLA = dyn_cast<VariableArrayType>(Val: A))
476 return getCanonicalParamType(C, T: VLA->getElementType());
477 if (!A->isVariablyModifiedType())
478 return C.getCanonicalType(T);
479 }
480 return C.getCanonicalParamType(T);
481}
482
483namespace {
484/// Contains required data for proper outlined function codegen.
485struct FunctionOptions {
486 /// Captured statement for which the function is generated.
487 const CapturedStmt *S = nullptr;
488 /// true if cast to/from UIntPtr is required for variables captured by
489 /// value.
490 const bool UIntPtrCastRequired = true;
491 /// true if only casted arguments must be registered as local args or VLA
492 /// sizes.
493 const bool RegisterCastedArgsOnly = false;
494 /// Name of the generated function.
495 const StringRef FunctionName;
496 /// Location of the non-debug version of the outlined function.
497 SourceLocation Loc;
498 const bool IsDeviceKernel = false;
499 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
500 bool RegisterCastedArgsOnly, StringRef FunctionName,
501 SourceLocation Loc, bool IsDeviceKernel)
502 : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
503 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
504 FunctionName(FunctionName), Loc(Loc), IsDeviceKernel(IsDeviceKernel) {}
505};
506} // namespace
507
508static llvm::Function *emitOutlinedFunctionPrologue(
509 CodeGenFunction &CGF, FunctionArgList &Args,
510 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
511 &LocalAddrs,
512 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
513 &VLASizes,
514 llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
515 const CapturedDecl *CD = FO.S->getCapturedDecl();
516 const RecordDecl *RD = FO.S->getCapturedRecordDecl();
517 assert(CD->hasBody() && "missing CapturedDecl body");
518
519 CXXThisValue = nullptr;
520 // Build the argument list.
521 CodeGenModule &CGM = CGF.CGM;
522 ASTContext &Ctx = CGM.getContext();
523 FunctionArgList TargetArgs;
524 Args.append(in_start: CD->param_begin(),
525 in_end: std::next(x: CD->param_begin(), n: CD->getContextParamPosition()));
526 TargetArgs.append(
527 in_start: CD->param_begin(),
528 in_end: std::next(x: CD->param_begin(), n: CD->getContextParamPosition()));
529 auto I = FO.S->captures().begin();
530 FunctionDecl *DebugFunctionDecl = nullptr;
531 if (!FO.UIntPtrCastRequired) {
532 FunctionProtoType::ExtProtoInfo EPI;
533 QualType FunctionTy = Ctx.getFunctionType(ResultTy: Ctx.VoidTy, Args: {}, EPI);
534 DebugFunctionDecl = FunctionDecl::Create(
535 C&: Ctx, DC: Ctx.getTranslationUnitDecl(), StartLoc: FO.S->getBeginLoc(),
536 NLoc: SourceLocation(), N: DeclarationName(), T: FunctionTy,
537 TInfo: Ctx.getTrivialTypeSourceInfo(T: FunctionTy), SC: SC_Static,
538 /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
539 /*hasWrittenPrototype=*/false);
540 }
541 for (const FieldDecl *FD : RD->fields()) {
542 QualType ArgType = FD->getType();
543 IdentifierInfo *II = nullptr;
544 VarDecl *CapVar = nullptr;
545
546 // If this is a capture by copy and the type is not a pointer, the outlined
547 // function argument type should be uintptr and the value properly casted to
548 // uintptr. This is necessary given that the runtime library is only able to
549 // deal with pointers. We can pass in the same way the VLA type sizes to the
550 // outlined function.
551 if (FO.UIntPtrCastRequired &&
552 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
553 I->capturesVariableArrayType()))
554 ArgType = Ctx.getUIntPtrType();
555
556 if (I->capturesVariable() || I->capturesVariableByCopy()) {
557 CapVar = I->getCapturedVar();
558 II = CapVar->getIdentifier();
559 } else if (I->capturesThis()) {
560 II = &Ctx.Idents.get(Name: "this");
561 } else {
562 assert(I->capturesVariableArrayType());
563 II = &Ctx.Idents.get(Name: "vla");
564 }
565 if (ArgType->isVariablyModifiedType())
566 ArgType = getCanonicalParamType(C&: Ctx, T: ArgType);
567 VarDecl *Arg;
568 if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) {
569 Arg = ImplicitParamDecl::Create(C&: Ctx, /*DC=*/nullptr, IdLoc: FD->getLocation(),
570 Id: II, T: ArgType,
571 ParamKind: ImplicitParamKind::ThreadPrivateVar);
572 } else if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
573 Arg = ParmVarDecl::Create(
574 C&: Ctx, DC: DebugFunctionDecl,
575 StartLoc: CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
576 IdLoc: CapVar ? CapVar->getLocation() : FD->getLocation(), Id: II, T: ArgType,
577 /*TInfo=*/nullptr, S: SC_None, /*DefArg=*/nullptr);
578 } else {
579 Arg = ImplicitParamDecl::Create(C&: Ctx, /*DC=*/nullptr, IdLoc: FD->getLocation(),
580 Id: II, T: ArgType, ParamKind: ImplicitParamKind::Other);
581 }
582 Args.emplace_back(Args&: Arg);
583 // Do not cast arguments if we emit function with non-original types.
584 TargetArgs.emplace_back(
585 Args: FO.UIntPtrCastRequired
586 ? Arg
587 : CGM.getOpenMPRuntime().translateParameter(FD, NativeParam: Arg));
588 ++I;
589 }
590 Args.append(in_start: std::next(x: CD->param_begin(), n: CD->getContextParamPosition() + 1),
591 in_end: CD->param_end());
592 TargetArgs.append(
593 in_start: std::next(x: CD->param_begin(), n: CD->getContextParamPosition() + 1),
594 in_end: CD->param_end());
595
596 // Create the function declaration.
597 const CGFunctionInfo &FuncInfo =
598 FO.IsDeviceKernel
599 ? CGM.getTypes().arrangeDeviceKernelCallerDeclaration(resultType: Ctx.VoidTy,
600 args: TargetArgs)
601 : CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: Ctx.VoidTy,
602 args: TargetArgs);
603 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(Info: FuncInfo);
604
605 auto *F =
606 llvm::Function::Create(Ty: FuncLLVMTy, Linkage: llvm::GlobalValue::InternalLinkage,
607 N: FO.FunctionName, M: &CGM.getModule());
608 CGM.SetInternalFunctionAttributes(GD: CD, F, FI: FuncInfo);
609 if (CD->isNothrow())
610 F->setDoesNotThrow();
611 F->setDoesNotRecurse();
612
613 // Always inline the outlined function if optimizations are enabled.
614 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
615 F->removeFnAttr(Kind: llvm::Attribute::NoInline);
616 F->addFnAttr(Kind: llvm::Attribute::AlwaysInline);
617 }
618 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
619 F->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
620
621 // Generate the function.
622 CGF.StartFunction(GD: CD, RetTy: Ctx.VoidTy, Fn: F, FnInfo: FuncInfo, Args: TargetArgs,
623 Loc: FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
624 StartLoc: FO.UIntPtrCastRequired ? FO.Loc
625 : CD->getBody()->getBeginLoc());
626 unsigned Cnt = CD->getContextParamPosition();
627 I = FO.S->captures().begin();
628 for (const FieldDecl *FD : RD->fields()) {
629 // Do not map arguments if we emit function with non-original types.
630 Address LocalAddr(Address::invalid());
631 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
632 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, NativeParam: Args[Cnt],
633 TargetParam: TargetArgs[Cnt]);
634 } else {
635 LocalAddr = CGF.GetAddrOfLocalVar(VD: Args[Cnt]);
636 }
637 // If we are capturing a pointer by copy we don't need to do anything, just
638 // use the value that we get from the arguments.
639 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
640 const VarDecl *CurVD = I->getCapturedVar();
641 if (!FO.RegisterCastedArgsOnly)
642 LocalAddrs.insert(KV: {Args[Cnt], {CurVD, LocalAddr}});
643 ++Cnt;
644 ++I;
645 continue;
646 }
647
648 LValue ArgLVal = CGF.MakeAddrLValue(Addr: LocalAddr, T: Args[Cnt]->getType(),
649 Source: AlignmentSource::Decl);
650 if (FD->hasCapturedVLAType()) {
651 if (FO.UIntPtrCastRequired) {
652 ArgLVal = CGF.MakeAddrLValue(
653 Addr: castValueFromUintptr(CGF, Loc: I->getLocation(), DstType: FD->getType(),
654 Name: Args[Cnt]->getName(), AddrLV: ArgLVal),
655 T: FD->getType(), Source: AlignmentSource::Decl);
656 }
657 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(lvalue: ArgLVal, Loc: I->getLocation());
658 const VariableArrayType *VAT = FD->getCapturedVLAType();
659 VLASizes.try_emplace(Key: Args[Cnt], Args: VAT->getSizeExpr(), Args&: ExprArg);
660 } else if (I->capturesVariable()) {
661 const VarDecl *Var = I->getCapturedVar();
662 QualType VarTy = Var->getType();
663 Address ArgAddr = ArgLVal.getAddress();
664 if (ArgLVal.getType()->isLValueReferenceType()) {
665 ArgAddr = CGF.EmitLoadOfReference(RefLVal: ArgLVal);
666 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
667 assert(ArgLVal.getType()->isPointerType());
668 ArgAddr = CGF.EmitLoadOfPointer(
669 Ptr: ArgAddr, PtrTy: ArgLVal.getType()->castAs<PointerType>());
670 }
671 if (!FO.RegisterCastedArgsOnly) {
672 LocalAddrs.insert(
673 KV: {Args[Cnt], {Var, ArgAddr.withAlignment(NewAlignment: Ctx.getDeclAlign(D: Var))}});
674 }
675 } else if (I->capturesVariableByCopy()) {
676 assert(!FD->getType()->isAnyPointerType() &&
677 "Not expecting a captured pointer.");
678 const VarDecl *Var = I->getCapturedVar();
679 LocalAddrs.insert(KV: {Args[Cnt],
680 {Var, FO.UIntPtrCastRequired
681 ? castValueFromUintptr(
682 CGF, Loc: I->getLocation(), DstType: FD->getType(),
683 Name: Args[Cnt]->getName(), AddrLV: ArgLVal)
684 : ArgLVal.getAddress()}});
685 } else {
686 // If 'this' is captured, load it into CXXThisValue.
687 assert(I->capturesThis());
688 CXXThisValue = CGF.EmitLoadOfScalar(lvalue: ArgLVal, Loc: I->getLocation());
689 LocalAddrs.insert(KV: {Args[Cnt], {nullptr, ArgLVal.getAddress()}});
690 }
691 ++Cnt;
692 ++I;
693 }
694
695 return F;
696}
697
698static llvm::Function *emitOutlinedFunctionPrologueAggregate(
699 CodeGenFunction &CGF, FunctionArgList &Args,
700 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
701 &LocalAddrs,
702 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
703 &VLASizes,
704 llvm::Value *&CXXThisValue, llvm::Value *&ContextV, const CapturedStmt &CS,
705 SourceLocation Loc, StringRef FunctionName) {
706 const CapturedDecl *CD = CS.getCapturedDecl();
707 const RecordDecl *RD = CS.getCapturedRecordDecl();
708
709 CXXThisValue = nullptr;
710 CodeGenModule &CGM = CGF.CGM;
711 ASTContext &Ctx = CGM.getContext();
712 Args.push_back(Elt: CD->getContextParam());
713
714 const CGFunctionInfo &FuncInfo =
715 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: Ctx.VoidTy, args: Args);
716 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(Info: FuncInfo);
717
718 auto *F =
719 llvm::Function::Create(Ty: FuncLLVMTy, Linkage: llvm::GlobalValue::InternalLinkage,
720 N: FunctionName, M: &CGM.getModule());
721 CGM.SetInternalFunctionAttributes(GD: CD, F, FI: FuncInfo);
722 if (CD->isNothrow())
723 F->setDoesNotThrow();
724 F->setDoesNotRecurse();
725
726 CGF.StartFunction(GD: CD, RetTy: Ctx.VoidTy, Fn: F, FnInfo: FuncInfo, Args, Loc, StartLoc: Loc);
727 Address ContextAddr = CGF.GetAddrOfLocalVar(VD: CD->getContextParam());
728 ContextV = CGF.Builder.CreateLoad(Addr: ContextAddr);
729
730 // The runtime passes arguments as a flat array of promoted intptr_t values.
731 llvm::Type *IntPtrTy = CGF.IntPtrTy;
732 llvm::Type *PtrTy = CGF.Builder.getPtrTy();
733 llvm::Align PtrAlign = CGM.getDataLayout().getPointerABIAlignment(AS: 0);
734 CharUnits SlotAlign = CharUnits::fromQuantity(Quantity: PtrAlign.value());
735
736 for (auto [FD, C, FieldIdx] :
737 llvm::zip(t: RD->fields(), u: CS.captures(),
738 args: llvm::seq<unsigned>(Size: RD->getNumFields()))) {
739 llvm::Value *Slot =
740 CGF.Builder.CreateConstInBoundsGEP1_32(Ty: IntPtrTy, Ptr: ContextV, Idx0: FieldIdx);
741
742 // Generate the appropriate load from the GEP into the __context struct.
743 // This includes all of the user arguments as well as the implicit kernel
744 // argument pointer.
745 if (C.capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
746 const VarDecl *CurVD = C.getCapturedVar();
747 Slot->setName(CurVD->getName());
748 Address SlotAddr(Slot, PtrTy, SlotAlign);
749 LocalAddrs.insert(KV: {FD, {CurVD, SlotAddr}});
750 } else if (FD->hasCapturedVLAType()) {
751 // VLA size is stored as intptr_t directly in the slot.
752 Address SlotAddr(Slot, CGF.ConvertTypeForMem(T: FD->getType()), SlotAlign);
753 LValue ArgLVal =
754 CGF.MakeAddrLValue(Addr: SlotAddr, T: FD->getType(), Source: AlignmentSource::Decl);
755 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(lvalue: ArgLVal, Loc: C.getLocation());
756 const VariableArrayType *VAT = FD->getCapturedVLAType();
757 VLASizes.try_emplace(Key: FD, Args: VAT->getSizeExpr(), Args&: ExprArg);
758 } else if (C.capturesVariable()) {
759 const VarDecl *Var = C.getCapturedVar();
760 QualType VarTy = Var->getType();
761
762 if (VarTy->isVariablyModifiedType() && VarTy->isPointerType()) {
763 Slot->setName(Var->getName() + ".addr");
764 Address SlotAddr(Slot, PtrTy, SlotAlign);
765 LocalAddrs.insert(KV: {FD, {Var, SlotAddr}});
766 } else {
767 llvm::Value *VarAddr = CGF.Builder.CreateAlignedLoad(
768 Ty: PtrTy, Ptr: Slot, Align: PtrAlign, Name: Var->getName());
769 LocalAddrs.insert(KV: {FD,
770 {Var, Address(VarAddr, CGF.ConvertTypeForMem(T: VarTy),
771 Ctx.getDeclAlign(D: Var))}});
772 }
773 } else if (C.capturesVariableByCopy()) {
774 assert(!FD->getType()->isAnyPointerType() &&
775 "Not expecting a captured pointer.");
776 const VarDecl *Var = C.getCapturedVar();
777 QualType FieldTy = FD->getType();
778
779 // Scalar values are promoted and stored directly in the slot.
780 Address SlotAddr(Slot, CGF.ConvertTypeForMem(T: FieldTy), SlotAlign);
781 Address CopyAddr =
782 CGF.CreateMemTemp(T: FieldTy, Align: Ctx.getDeclAlign(D: FD), Name: Var->getName());
783 LValue SrcLVal =
784 CGF.MakeAddrLValue(Addr: SlotAddr, T: FieldTy, Source: AlignmentSource::Decl);
785 LValue CopyLVal =
786 CGF.MakeAddrLValue(Addr: CopyAddr, T: FieldTy, Source: AlignmentSource::Decl);
787
788 RValue ArgRVal = CGF.EmitLoadOfLValue(V: SrcLVal, Loc: C.getLocation());
789 CGF.EmitStoreThroughLValue(Src: ArgRVal, Dst: CopyLVal);
790
791 LocalAddrs.insert(KV: {FD, {Var, CopyAddr}});
792 } else {
793 assert(C.capturesThis() && "Default case expected to be CXX 'this'");
794 CXXThisValue =
795 CGF.Builder.CreateAlignedLoad(Ty: PtrTy, Ptr: Slot, Align: PtrAlign, Name: "this");
796 Address SlotAddr(Slot, PtrTy, SlotAlign);
797 LocalAddrs.insert(KV: {FD, {nullptr, SlotAddr}});
798 }
799 }
800
801 return F;
802}
803
804llvm::Function *CodeGenFunction::GenerateOpenMPCapturedStmtFunction(
805 const CapturedStmt &S, const OMPExecutableDirective &D) {
806 SourceLocation Loc = D.getBeginLoc();
807 assert(
808 CapturedStmtInfo &&
809 "CapturedStmtInfo should be set when generating the captured function");
810 const CapturedDecl *CD = S.getCapturedDecl();
811 // Build the argument list.
812 bool NeedWrapperFunction =
813 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
814 FunctionArgList Args, WrapperArgs;
815 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs,
816 WrapperLocalAddrs;
817 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes,
818 WrapperVLASizes;
819 SmallString<256> Buffer;
820 llvm::raw_svector_ostream Out(Buffer);
821 Out << CapturedStmtInfo->getHelperName();
822 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
823 bool IsDeviceKernel = CGM.getOpenMPRuntime().isGPU() &&
824 isOpenMPTargetExecutionDirective(DKind: EKind) &&
825 D.getCapturedStmt(RegionKind: OMPD_target) == &S;
826 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
827 llvm::Function *WrapperF = nullptr;
828 if (NeedWrapperFunction) {
829 // Emit the final kernel early to allow attributes to be added by the
830 // OpenMPI-IR-Builder.
831 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
832 /*RegisterCastedArgsOnly=*/true,
833 CapturedStmtInfo->getHelperName(), Loc,
834 IsDeviceKernel);
835 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
836 WrapperF =
837 emitOutlinedFunctionPrologue(CGF&: WrapperCGF, Args, LocalAddrs, VLASizes,
838 CXXThisValue&: WrapperCGF.CXXThisValue, FO: WrapperFO);
839 Out << "_debug__";
840 }
841 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
842 Out.str(), Loc, !NeedWrapperFunction && IsDeviceKernel);
843 llvm::Function *F = emitOutlinedFunctionPrologue(
844 CGF&: *this, Args&: WrapperArgs, LocalAddrs&: WrapperLocalAddrs, VLASizes&: WrapperVLASizes, CXXThisValue, FO);
845 CodeGenFunction::OMPPrivateScope LocalScope(*this);
846 for (const auto &LocalAddrPair : WrapperLocalAddrs) {
847 if (LocalAddrPair.second.first) {
848 LocalScope.addPrivate(LocalVD: LocalAddrPair.second.first,
849 Addr: LocalAddrPair.second.second);
850 }
851 }
852 (void)LocalScope.Privatize();
853 for (const auto &VLASizePair : WrapperVLASizes)
854 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
855 PGO->assignRegionCounters(GD: GlobalDecl(CD), Fn: F);
856 CapturedStmtInfo->EmitBody(CGF&: *this, S: CD->getBody());
857 LocalScope.ForceCleanup();
858 FinishFunction(EndLoc: CD->getBodyRBrace());
859 if (!NeedWrapperFunction)
860 return F;
861
862 // Reverse the order.
863 WrapperF->removeFromParent();
864 F->getParent()->getFunctionList().insertAfter(where: F->getIterator(), New: WrapperF);
865
866 llvm::SmallVector<llvm::Value *, 4> CallArgs;
867 auto *PI = F->arg_begin();
868 for (const auto *Arg : Args) {
869 llvm::Value *CallArg;
870 auto I = LocalAddrs.find(Key: Arg);
871 if (I != LocalAddrs.end()) {
872 LValue LV = WrapperCGF.MakeAddrLValue(
873 Addr: I->second.second,
874 T: I->second.first ? I->second.first->getType() : Arg->getType(),
875 Source: AlignmentSource::Decl);
876 if (LV.getType()->isAnyComplexType())
877 LV.setAddress(LV.getAddress().withElementType(ElemTy: PI->getType()));
878 CallArg = WrapperCGF.EmitLoadOfScalar(lvalue: LV, Loc: S.getBeginLoc());
879 } else {
880 auto EI = VLASizes.find(Val: Arg);
881 if (EI != VLASizes.end()) {
882 CallArg = EI->second.second;
883 } else {
884 LValue LV =
885 WrapperCGF.MakeAddrLValue(Addr: WrapperCGF.GetAddrOfLocalVar(VD: Arg),
886 T: Arg->getType(), Source: AlignmentSource::Decl);
887 CallArg = WrapperCGF.EmitLoadOfScalar(lvalue: LV, Loc: S.getBeginLoc());
888 }
889 }
890 CallArgs.emplace_back(Args: WrapperCGF.EmitFromMemory(Value: CallArg, Ty: Arg->getType()));
891 ++PI;
892 }
893 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF&: WrapperCGF, Loc, OutlinedFn: F, Args: CallArgs);
894 WrapperCGF.FinishFunction();
895 return WrapperF;
896}
897
898llvm::Function *CodeGenFunction::GenerateOpenMPCapturedStmtFunctionAggregate(
899 const CapturedStmt &S, const OMPExecutableDirective &D) {
900 SourceLocation Loc = D.getBeginLoc();
901 assert(
902 CapturedStmtInfo &&
903 "CapturedStmtInfo should be set when generating the captured function");
904 const CapturedDecl *CD = S.getCapturedDecl();
905 const RecordDecl *RD = S.getCapturedRecordDecl();
906 StringRef FunctionName = CapturedStmtInfo->getHelperName();
907 bool NeedWrapperFunction =
908 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
909
910 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
911 llvm::Function *WrapperF = nullptr;
912 llvm::Value *WrapperContextV = nullptr;
913 if (NeedWrapperFunction) {
914 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
915 FunctionArgList WrapperArgs;
916 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
917 WrapperLocalAddrs;
918 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
919 WrapperVLASizes;
920 WrapperF = emitOutlinedFunctionPrologueAggregate(
921 CGF&: WrapperCGF, Args&: WrapperArgs, LocalAddrs&: WrapperLocalAddrs, VLASizes&: WrapperVLASizes,
922 CXXThisValue&: WrapperCGF.CXXThisValue, ContextV&: WrapperContextV, CS: S, Loc, FunctionName);
923 }
924
925 FunctionArgList Args;
926 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
927 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
928 llvm::Function *F;
929
930 if (NeedWrapperFunction) {
931 SmallString<256> Buffer;
932 llvm::raw_svector_ostream Out(Buffer);
933 Out << FunctionName << "_debug__";
934
935 FunctionOptions FO(&S, /*UIntPtrCastRequired=*/false,
936 /*RegisterCastedArgsOnly=*/false, Out.str(), Loc,
937 /*IsDeviceKernel=*/false);
938 F = emitOutlinedFunctionPrologue(CGF&: *this, Args, LocalAddrs, VLASizes,
939 CXXThisValue, FO);
940 } else {
941 llvm::Value *ContextV = nullptr;
942 F = emitOutlinedFunctionPrologueAggregate(CGF&: *this, Args, LocalAddrs, VLASizes,
943 CXXThisValue, ContextV, CS: S, Loc,
944 FunctionName);
945
946 const RecordDecl *RD = S.getCapturedRecordDecl();
947 unsigned FieldIdx = RD->getNumFields();
948 for (unsigned I = 0; I < CD->getNumParams(); ++I) {
949 const ImplicitParamDecl *Param = CD->getParam(i: I);
950 if (Param == CD->getContextParam())
951 continue;
952 llvm::Value *ParamAddr = Builder.CreateConstInBoundsGEP1_32(
953 Ty: IntPtrTy, Ptr: ContextV, Idx0: FieldIdx, Name: Twine(Param->getName()) + ".addr");
954 llvm::Value *ParamVal = Builder.CreateAlignedLoad(
955 Ty: Builder.getPtrTy(), Ptr: ParamAddr,
956 Align: CGM.getDataLayout().getPointerABIAlignment(AS: 0), Name: Param->getName());
957 Address ParamLocalAddr =
958 CreateMemTemp(T: Param->getType(), Name: Param->getName());
959 Builder.CreateStore(Val: ParamVal, Addr: ParamLocalAddr);
960 LocalAddrs.insert(KV: {Param, {Param, ParamLocalAddr}});
961 ++FieldIdx;
962 }
963 }
964
965 CodeGenFunction::OMPPrivateScope LocalScope(*this);
966 for (const auto &LocalAddrPair : LocalAddrs) {
967 if (LocalAddrPair.second.first)
968 LocalScope.addPrivate(LocalVD: LocalAddrPair.second.first,
969 Addr: LocalAddrPair.second.second);
970 }
971 (void)LocalScope.Privatize();
972 for (const auto &VLASizePair : VLASizes)
973 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
974 PGO->assignRegionCounters(GD: GlobalDecl(CD), Fn: F);
975 CapturedStmtInfo->EmitBody(CGF&: *this, S: CD->getBody());
976 (void)LocalScope.ForceCleanup();
977 FinishFunction(EndLoc: CD->getBodyRBrace());
978
979 if (!NeedWrapperFunction)
980 return F;
981
982 // Reverse the order.
983 WrapperF->removeFromParent();
984 F->getParent()->getFunctionList().insertAfter(where: F->getIterator(), New: WrapperF);
985
986 llvm::Align PtrAlign = CGM.getDataLayout().getPointerABIAlignment(AS: 0);
987 llvm::SmallVector<llvm::Value *, 16> CallArgs;
988 assert(CD->getContextParamPosition() == 0 &&
989 "Expected context param at position 0 for target regions");
990 assert(RD->getNumFields() + 1 == F->getNumOperands() &&
991 "Argument count mismatch");
992
993 for (auto [FD, InnerParam, SlotIdx] : llvm::zip(
994 t: RD->fields(), u: F->args(), args: llvm::seq<unsigned>(Size: RD->getNumFields()))) {
995 llvm::Value *Slot = WrapperCGF.Builder.CreateConstInBoundsGEP1_32(
996 Ty: WrapperCGF.IntPtrTy, Ptr: WrapperContextV, Idx0: SlotIdx);
997 llvm::Value *Val = WrapperCGF.Builder.CreateAlignedLoad(
998 Ty: InnerParam.getType(), Ptr: Slot, Align: PtrAlign, Name: InnerParam.getName());
999 CallArgs.push_back(Elt: Val);
1000 }
1001
1002 // Handle the load from the implicit dyn_ptr at the end of the __context.
1003 unsigned SlotIdx = RD->getNumFields();
1004 auto InnerParam = F->arg_begin() + SlotIdx;
1005 llvm::Value *Slot = WrapperCGF.Builder.CreateConstInBoundsGEP1_32(
1006 Ty: WrapperCGF.IntPtrTy, Ptr: WrapperContextV, Idx0: SlotIdx);
1007 llvm::Value *Val = WrapperCGF.Builder.CreateAlignedLoad(
1008 Ty: InnerParam->getType(), Ptr: Slot, Align: PtrAlign, Name: InnerParam->getName());
1009 CallArgs.push_back(Elt: Val);
1010
1011 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF&: WrapperCGF, Loc, OutlinedFn: F, Args: CallArgs);
1012 WrapperCGF.FinishFunction();
1013 return WrapperF;
1014}
1015
1016//===----------------------------------------------------------------------===//
1017// OpenMP Directive Emission
1018//===----------------------------------------------------------------------===//
1019void CodeGenFunction::EmitOMPAggregateAssign(
1020 Address DestAddr, Address SrcAddr, QualType OriginalType,
1021 const llvm::function_ref<void(Address, Address)> CopyGen) {
1022 // Perform element-by-element initialization.
1023 QualType ElementTy;
1024
1025 // Drill down to the base element type on both arrays.
1026 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
1027 llvm::Value *NumElements = emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: DestAddr);
1028 SrcAddr = SrcAddr.withElementType(ElemTy: DestAddr.getElementType());
1029
1030 llvm::Value *SrcBegin = SrcAddr.emitRawPointer(CGF&: *this);
1031 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF&: *this);
1032 // Cast from pointer to array type to pointer to single element.
1033 llvm::Value *DestEnd = Builder.CreateInBoundsGEP(Ty: DestAddr.getElementType(),
1034 Ptr: DestBegin, IdxList: NumElements);
1035
1036 // The basic structure here is a while-do loop.
1037 llvm::BasicBlock *BodyBB = createBasicBlock(name: "omp.arraycpy.body");
1038 llvm::BasicBlock *DoneBB = createBasicBlock(name: "omp.arraycpy.done");
1039 llvm::Value *IsEmpty =
1040 Builder.CreateICmpEQ(LHS: DestBegin, RHS: DestEnd, Name: "omp.arraycpy.isempty");
1041 Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
1042
1043 // Enter the loop body, making that address the current address.
1044 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
1045 EmitBlock(BB: BodyBB);
1046
1047 CharUnits ElementSize = getContext().getTypeSizeInChars(T: ElementTy);
1048
1049 llvm::PHINode *SrcElementPHI =
1050 Builder.CreatePHI(Ty: SrcBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.srcElementPast");
1051 SrcElementPHI->addIncoming(V: SrcBegin, BB: EntryBB);
1052 Address SrcElementCurrent =
1053 Address(SrcElementPHI, SrcAddr.getElementType(),
1054 SrcAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
1055
1056 llvm::PHINode *DestElementPHI = Builder.CreatePHI(
1057 Ty: DestBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
1058 DestElementPHI->addIncoming(V: DestBegin, BB: EntryBB);
1059 Address DestElementCurrent =
1060 Address(DestElementPHI, DestAddr.getElementType(),
1061 DestAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
1062
1063 // Emit copy.
1064 CopyGen(DestElementCurrent, SrcElementCurrent);
1065
1066 // Shift the address forward by one element.
1067 llvm::Value *DestElementNext =
1068 Builder.CreateConstGEP1_32(Ty: DestAddr.getElementType(), Ptr: DestElementPHI,
1069 /*Idx0=*/1, Name: "omp.arraycpy.dest.element");
1070 llvm::Value *SrcElementNext =
1071 Builder.CreateConstGEP1_32(Ty: SrcAddr.getElementType(), Ptr: SrcElementPHI,
1072 /*Idx0=*/1, Name: "omp.arraycpy.src.element");
1073 // Check whether we've reached the end.
1074 llvm::Value *Done =
1075 Builder.CreateICmpEQ(LHS: DestElementNext, RHS: DestEnd, Name: "omp.arraycpy.done");
1076 Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
1077 DestElementPHI->addIncoming(V: DestElementNext, BB: Builder.GetInsertBlock());
1078 SrcElementPHI->addIncoming(V: SrcElementNext, BB: Builder.GetInsertBlock());
1079
1080 // Done.
1081 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
1082}
1083
1084void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
1085 Address SrcAddr, const VarDecl *DestVD,
1086 const VarDecl *SrcVD, const Expr *Copy) {
1087 if (OriginalType->isArrayType()) {
1088 const auto *BO = dyn_cast<BinaryOperator>(Val: Copy);
1089 if (BO && BO->getOpcode() == BO_Assign) {
1090 // Perform simple memcpy for simple copying.
1091 LValue Dest = MakeAddrLValue(Addr: DestAddr, T: OriginalType);
1092 LValue Src = MakeAddrLValue(Addr: SrcAddr, T: OriginalType);
1093 EmitAggregateAssign(Dest, Src, EltTy: OriginalType);
1094 } else {
1095 // For arrays with complex element types perform element by element
1096 // copying.
1097 EmitOMPAggregateAssign(
1098 DestAddr, SrcAddr, OriginalType,
1099 CopyGen: [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
1100 // Working with the single array element, so have to remap
1101 // destination and source variables to corresponding array
1102 // elements.
1103 CodeGenFunction::OMPPrivateScope Remap(*this);
1104 Remap.addPrivate(LocalVD: DestVD, Addr: DestElement);
1105 Remap.addPrivate(LocalVD: SrcVD, Addr: SrcElement);
1106 (void)Remap.Privatize();
1107 EmitIgnoredExpr(E: Copy);
1108 });
1109 }
1110 } else {
1111 // Remap pseudo source variable to private copy.
1112 CodeGenFunction::OMPPrivateScope Remap(*this);
1113 Remap.addPrivate(LocalVD: SrcVD, Addr: SrcAddr);
1114 Remap.addPrivate(LocalVD: DestVD, Addr: DestAddr);
1115 (void)Remap.Privatize();
1116 // Emit copying of the whole variable.
1117 EmitIgnoredExpr(E: Copy);
1118 }
1119}
1120
1121bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
1122 OMPPrivateScope &PrivateScope) {
1123 if (!HaveInsertPoint())
1124 return false;
1125 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
1126 bool DeviceConstTarget = getLangOpts().OpenMPIsTargetDevice &&
1127 isOpenMPTargetExecutionDirective(DKind: EKind);
1128 bool FirstprivateIsLastprivate = false;
1129 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
1130 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1131 for (const auto *D : C->varlist())
1132 Lastprivates.try_emplace(
1133 Key: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D)->getDecl())->getCanonicalDecl(),
1134 Args: C->getKind());
1135 }
1136 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
1137 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
1138 getOpenMPCaptureRegions(CaptureRegions, DKind: EKind);
1139 // Force emission of the firstprivate copy if the directive does not emit
1140 // outlined function, like omp for, omp simd, omp distribute etc.
1141 bool MustEmitFirstprivateCopy =
1142 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
1143 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
1144 const auto *IRef = C->varlist_begin();
1145 const auto *InitsRef = C->inits().begin();
1146 for (const Expr *IInit : C->private_copies()) {
1147 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1148 bool ThisFirstprivateIsLastprivate =
1149 Lastprivates.count(Val: OrigVD->getCanonicalDecl()) > 0;
1150 const FieldDecl *FD = CapturedStmtInfo->lookup(VD: OrigVD);
1151 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl());
1152 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
1153 !FD->getType()->isReferenceType() &&
1154 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
1155 EmittedAsFirstprivate.insert(V: OrigVD->getCanonicalDecl());
1156 ++IRef;
1157 ++InitsRef;
1158 continue;
1159 }
1160 // Do not emit copy for firstprivate constant variables in target regions,
1161 // captured by reference.
1162 if (DeviceConstTarget && OrigVD->getType().isConstant(Ctx: getContext()) &&
1163 FD && FD->getType()->isReferenceType() &&
1164 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
1165 EmittedAsFirstprivate.insert(V: OrigVD->getCanonicalDecl());
1166 ++IRef;
1167 ++InitsRef;
1168 continue;
1169 }
1170 FirstprivateIsLastprivate =
1171 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
1172 if (EmittedAsFirstprivate.insert(V: OrigVD->getCanonicalDecl()).second) {
1173 const auto *VDInit =
1174 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *InitsRef)->getDecl());
1175 bool IsRegistered;
1176 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1177 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
1178 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1179 LValue OriginalLVal;
1180 if (!FD) {
1181 // Check if the firstprivate variable is just a constant value.
1182 ConstantEmission CE = tryEmitAsConstant(RefExpr: &DRE);
1183 if (CE && !CE.isReference()) {
1184 // Constant value, no need to create a copy.
1185 ++IRef;
1186 ++InitsRef;
1187 continue;
1188 }
1189 if (CE && CE.isReference()) {
1190 OriginalLVal = CE.getReferenceLValue(CGF&: *this, RefExpr: &DRE);
1191 } else {
1192 assert(!CE && "Expected non-constant firstprivate.");
1193 OriginalLVal = EmitLValue(E: &DRE);
1194 }
1195 } else {
1196 OriginalLVal = EmitLValue(E: &DRE);
1197 }
1198 QualType Type = VD->getType();
1199 if (Type->isArrayType()) {
1200 // Emit VarDecl with copy init for arrays.
1201 // Get the address of the original variable captured in current
1202 // captured region.
1203 AutoVarEmission Emission = EmitAutoVarAlloca(var: *VD);
1204 const Expr *Init = VD->getInit();
1205 if (!isa<CXXConstructExpr>(Val: Init) || isTrivialInitializer(Init)) {
1206 // Perform simple memcpy.
1207 LValue Dest = MakeAddrLValue(Addr: Emission.getAllocatedAddress(), T: Type);
1208 EmitAggregateAssign(Dest, Src: OriginalLVal, EltTy: Type);
1209 } else {
1210 EmitOMPAggregateAssign(
1211 DestAddr: Emission.getAllocatedAddress(), SrcAddr: OriginalLVal.getAddress(), OriginalType: Type,
1212 CopyGen: [this, VDInit, Init](Address DestElement, Address SrcElement) {
1213 // Clean up any temporaries needed by the
1214 // initialization.
1215 RunCleanupsScope InitScope(*this);
1216 // Emit initialization for single element.
1217 setAddrOfLocalVar(VD: VDInit, Addr: SrcElement);
1218 EmitAnyExprToMem(E: Init, Location: DestElement,
1219 Quals: Init->getType().getQualifiers(),
1220 /*IsInitializer*/ false);
1221 LocalDeclMap.erase(Val: VDInit);
1222 });
1223 }
1224 EmitAutoVarCleanups(emission: Emission);
1225 IsRegistered =
1226 PrivateScope.addPrivate(LocalVD: OrigVD, Addr: Emission.getAllocatedAddress());
1227 } else {
1228 Address OriginalAddr = OriginalLVal.getAddress();
1229 // Emit private VarDecl with copy init.
1230 // Remap temp VDInit variable to the address of the original
1231 // variable (for proper handling of captured global variables).
1232 setAddrOfLocalVar(VD: VDInit, Addr: OriginalAddr);
1233 EmitDecl(D: *VD);
1234 LocalDeclMap.erase(Val: VDInit);
1235 Address VDAddr = GetAddrOfLocalVar(VD);
1236 if (ThisFirstprivateIsLastprivate &&
1237 Lastprivates[OrigVD->getCanonicalDecl()] ==
1238 OMPC_LASTPRIVATE_conditional) {
1239 // Create/init special variable for lastprivate conditionals.
1240 llvm::Value *V =
1241 EmitLoadOfScalar(lvalue: MakeAddrLValue(Addr: VDAddr, T: (*IRef)->getType(),
1242 Source: AlignmentSource::Decl),
1243 Loc: (*IRef)->getExprLoc());
1244 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
1245 CGF&: *this, VD: OrigVD);
1246 EmitStoreOfScalar(value: V, lvalue: MakeAddrLValue(Addr: VDAddr, T: (*IRef)->getType(),
1247 Source: AlignmentSource::Decl));
1248 LocalDeclMap.erase(Val: VD);
1249 setAddrOfLocalVar(VD, Addr: VDAddr);
1250 }
1251 IsRegistered = PrivateScope.addPrivate(LocalVD: OrigVD, Addr: VDAddr);
1252 }
1253 assert(IsRegistered &&
1254 "firstprivate var already registered as private");
1255 // Silence the warning about unused variable.
1256 (void)IsRegistered;
1257 }
1258 ++IRef;
1259 ++InitsRef;
1260 }
1261 }
1262 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
1263}
1264
1265void CodeGenFunction::EmitOMPPrivateClause(
1266 const OMPExecutableDirective &D,
1267 CodeGenFunction::OMPPrivateScope &PrivateScope) {
1268 if (!HaveInsertPoint())
1269 return;
1270 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
1271 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
1272 auto IRef = C->varlist_begin();
1273 for (const Expr *IInit : C->private_copies()) {
1274 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1275 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
1276 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl());
1277 EmitDecl(D: *VD);
1278 // Emit private VarDecl with copy init.
1279 bool IsRegistered =
1280 PrivateScope.addPrivate(LocalVD: OrigVD, Addr: GetAddrOfLocalVar(VD));
1281 assert(IsRegistered && "private var already registered as private");
1282 // Silence the warning about unused variable.
1283 (void)IsRegistered;
1284 }
1285 ++IRef;
1286 }
1287 }
1288}
1289
1290bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
1291 if (!HaveInsertPoint())
1292 return false;
1293 // threadprivate_var1 = master_threadprivate_var1;
1294 // operator=(threadprivate_var2, master_threadprivate_var2);
1295 // ...
1296 // __kmpc_barrier(&loc, global_tid);
1297 llvm::DenseSet<const VarDecl *> CopiedVars;
1298 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
1299 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
1300 auto IRef = C->varlist_begin();
1301 auto ISrcRef = C->source_exprs().begin();
1302 auto IDestRef = C->destination_exprs().begin();
1303 for (const Expr *AssignOp : C->assignment_ops()) {
1304 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1305 QualType Type = VD->getType();
1306 if (CopiedVars.insert(V: VD->getCanonicalDecl()).second) {
1307 // Get the address of the master variable. If we are emitting code with
1308 // TLS support, the address is passed from the master as field in the
1309 // captured declaration.
1310 Address MasterAddr = Address::invalid();
1311 if (getLangOpts().OpenMPUseTLS &&
1312 getContext().getTargetInfo().isTLSSupported()) {
1313 assert(CapturedStmtInfo->lookup(VD) &&
1314 "Copyin threadprivates should have been captured!");
1315 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
1316 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1317 MasterAddr = EmitLValue(E: &DRE).getAddress();
1318 LocalDeclMap.erase(Val: VD);
1319 } else {
1320 MasterAddr =
1321 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(D: VD)
1322 : CGM.GetAddrOfGlobal(GD: VD),
1323 CGM.getTypes().ConvertTypeForMem(T: VD->getType()),
1324 getContext().getDeclAlign(D: VD));
1325 }
1326 // Get the address of the threadprivate variable.
1327 Address PrivateAddr = EmitLValue(E: *IRef).getAddress();
1328 if (CopiedVars.size() == 1) {
1329 // At first check if current thread is a master thread. If it is, no
1330 // need to copy data.
1331 CopyBegin = createBasicBlock(name: "copyin.not.master");
1332 CopyEnd = createBasicBlock(name: "copyin.not.master.end");
1333 // TODO: Avoid ptrtoint conversion.
1334 auto *MasterAddrInt = Builder.CreatePtrToInt(
1335 V: MasterAddr.emitRawPointer(CGF&: *this), DestTy: CGM.IntPtrTy);
1336 auto *PrivateAddrInt = Builder.CreatePtrToInt(
1337 V: PrivateAddr.emitRawPointer(CGF&: *this), DestTy: CGM.IntPtrTy);
1338 Builder.CreateCondBr(
1339 Cond: Builder.CreateICmpNE(LHS: MasterAddrInt, RHS: PrivateAddrInt), True: CopyBegin,
1340 False: CopyEnd);
1341 EmitBlock(BB: CopyBegin);
1342 }
1343 const auto *SrcVD =
1344 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ISrcRef)->getDecl());
1345 const auto *DestVD =
1346 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl());
1347 EmitOMPCopy(OriginalType: Type, DestAddr: PrivateAddr, SrcAddr: MasterAddr, DestVD, SrcVD, Copy: AssignOp);
1348 }
1349 ++IRef;
1350 ++ISrcRef;
1351 ++IDestRef;
1352 }
1353 }
1354 if (CopyEnd) {
1355 // Exit out of copying procedure for non-master thread.
1356 EmitBlock(BB: CopyEnd, /*IsFinished=*/true);
1357 return true;
1358 }
1359 return false;
1360}
1361
1362bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1363 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1364 if (!HaveInsertPoint())
1365 return false;
1366 bool HasAtLeastOneLastprivate = false;
1367 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
1368 llvm::DenseSet<const VarDecl *> SIMDLCVs;
1369 if (isOpenMPSimdDirective(DKind: EKind)) {
1370 const auto *LoopDirective = cast<OMPLoopDirective>(Val: &D);
1371 for (const Expr *C : LoopDirective->counters()) {
1372 SIMDLCVs.insert(
1373 V: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: C)->getDecl())->getCanonicalDecl());
1374 }
1375 }
1376 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1377 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1378 HasAtLeastOneLastprivate = true;
1379 if (isOpenMPTaskLoopDirective(DKind: EKind) && !getLangOpts().OpenMPSimd)
1380 break;
1381 const auto *IRef = C->varlist_begin();
1382 const auto *IDestRef = C->destination_exprs().begin();
1383 for (const Expr *IInit : C->private_copies()) {
1384 // Keep the address of the original variable for future update at the end
1385 // of the loop.
1386 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1387 // Taskloops do not require additional initialization, it is done in
1388 // runtime support library.
1389 if (AlreadyEmittedVars.insert(V: OrigVD->getCanonicalDecl()).second) {
1390 const auto *DestVD =
1391 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl());
1392 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1393 /*RefersToEnclosingVariableOrCapture=*/
1394 CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
1395 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1396 PrivateScope.addPrivate(LocalVD: DestVD, Addr: EmitLValue(E: &DRE).getAddress());
1397 // Check if the variable is also a firstprivate: in this case IInit is
1398 // not generated. Initialization of this variable will happen in codegen
1399 // for 'firstprivate' clause.
1400 if (IInit && !SIMDLCVs.count(V: OrigVD->getCanonicalDecl())) {
1401 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl());
1402 Address VDAddr = Address::invalid();
1403 if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1404 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
1405 CGF&: *this, VD: OrigVD);
1406 setAddrOfLocalVar(VD, Addr: VDAddr);
1407 } else {
1408 // Emit private VarDecl with copy init.
1409 EmitDecl(D: *VD);
1410 VDAddr = GetAddrOfLocalVar(VD);
1411 }
1412 bool IsRegistered = PrivateScope.addPrivate(LocalVD: OrigVD, Addr: VDAddr);
1413 assert(IsRegistered &&
1414 "lastprivate var already registered as private");
1415 (void)IsRegistered;
1416 }
1417 }
1418 ++IRef;
1419 ++IDestRef;
1420 }
1421 }
1422 return HasAtLeastOneLastprivate;
1423}
1424
1425void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1426 const OMPExecutableDirective &D, bool NoFinals,
1427 llvm::Value *IsLastIterCond) {
1428 if (!HaveInsertPoint())
1429 return;
1430 // Emit following code:
1431 // if (<IsLastIterCond>) {
1432 // orig_var1 = private_orig_var1;
1433 // ...
1434 // orig_varn = private_orig_varn;
1435 // }
1436 llvm::BasicBlock *ThenBB = nullptr;
1437 llvm::BasicBlock *DoneBB = nullptr;
1438 if (IsLastIterCond) {
1439 // Emit implicit barrier if at least one lastprivate conditional is found
1440 // and this is not a simd mode.
1441 if (!getLangOpts().OpenMPSimd &&
1442 llvm::any_of(Range: D.getClausesOfKind<OMPLastprivateClause>(),
1443 P: [](const OMPLastprivateClause *C) {
1444 return C->getKind() == OMPC_LASTPRIVATE_conditional;
1445 })) {
1446 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: D.getBeginLoc(),
1447 Kind: OMPD_unknown,
1448 /*EmitChecks=*/false,
1449 /*ForceSimpleCall=*/true);
1450 }
1451 ThenBB = createBasicBlock(name: ".omp.lastprivate.then");
1452 DoneBB = createBasicBlock(name: ".omp.lastprivate.done");
1453 Builder.CreateCondBr(Cond: IsLastIterCond, True: ThenBB, False: DoneBB);
1454 EmitBlock(BB: ThenBB);
1455 }
1456 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1457 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1458 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(Val: &D)) {
1459 auto IC = LoopDirective->counters().begin();
1460 for (const Expr *F : LoopDirective->finals()) {
1461 const auto *D =
1462 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IC)->getDecl())->getCanonicalDecl();
1463 if (NoFinals)
1464 AlreadyEmittedVars.insert(V: D);
1465 else
1466 LoopCountersAndUpdates[D] = F;
1467 ++IC;
1468 }
1469 }
1470 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1471 auto IRef = C->varlist_begin();
1472 auto ISrcRef = C->source_exprs().begin();
1473 auto IDestRef = C->destination_exprs().begin();
1474 for (const Expr *AssignOp : C->assignment_ops()) {
1475 const auto *PrivateVD =
1476 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1477 QualType Type = PrivateVD->getType();
1478 const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1479 if (AlreadyEmittedVars.insert(V: CanonicalVD).second) {
1480 // If lastprivate variable is a loop control variable for loop-based
1481 // directive, update its value before copyin back to original
1482 // variable.
1483 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(Val: CanonicalVD))
1484 EmitIgnoredExpr(E: FinalExpr);
1485 const auto *SrcVD =
1486 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ISrcRef)->getDecl());
1487 const auto *DestVD =
1488 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl());
1489 // Get the address of the private variable.
1490 Address PrivateAddr = GetAddrOfLocalVar(VD: PrivateVD);
1491 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1492 PrivateAddr = Address(
1493 Builder.CreateLoad(Addr: PrivateAddr),
1494 CGM.getTypes().ConvertTypeForMem(T: RefTy->getPointeeType()),
1495 CGM.getNaturalTypeAlignment(T: RefTy->getPointeeType()));
1496 // Store the last value to the private copy in the last iteration.
1497 if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1498 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1499 CGF&: *this, PrivLVal: MakeAddrLValue(Addr: PrivateAddr, T: (*IRef)->getType()), VD: PrivateVD,
1500 Loc: (*IRef)->getExprLoc());
1501 // Get the address of the original variable.
1502 Address OriginalAddr = GetAddrOfLocalVar(VD: DestVD);
1503 EmitOMPCopy(OriginalType: Type, DestAddr: OriginalAddr, SrcAddr: PrivateAddr, DestVD, SrcVD, Copy: AssignOp);
1504 }
1505 ++IRef;
1506 ++ISrcRef;
1507 ++IDestRef;
1508 }
1509 if (const Expr *PostUpdate = C->getPostUpdateExpr())
1510 EmitIgnoredExpr(E: PostUpdate);
1511 }
1512 if (IsLastIterCond)
1513 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
1514}
1515
1516void CodeGenFunction::EmitOMPReductionClauseInit(
1517 const OMPExecutableDirective &D,
1518 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1519 if (!HaveInsertPoint())
1520 return;
1521 SmallVector<const Expr *, 4> Shareds;
1522 SmallVector<const Expr *, 4> Privates;
1523 SmallVector<const Expr *, 4> ReductionOps;
1524 SmallVector<const Expr *, 4> LHSs;
1525 SmallVector<const Expr *, 4> RHSs;
1526 OMPTaskDataTy Data;
1527 SmallVector<const Expr *, 4> TaskLHSs;
1528 SmallVector<const Expr *, 4> TaskRHSs;
1529 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1530 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1531 continue;
1532 Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
1533 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
1534 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
1535 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
1536 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
1537 if (C->getModifier() == OMPC_REDUCTION_task) {
1538 Data.ReductionVars.append(in_start: C->privates().begin(), in_end: C->privates().end());
1539 Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
1540 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
1541 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
1542 in_end: C->reduction_ops().end());
1543 TaskLHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
1544 TaskRHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
1545 }
1546 }
1547 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1548 unsigned Count = 0;
1549 auto *ILHS = LHSs.begin();
1550 auto *IRHS = RHSs.begin();
1551 auto *IPriv = Privates.begin();
1552 for (const Expr *IRef : Shareds) {
1553 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IPriv)->getDecl());
1554 // Emit private VarDecl with reduction init.
1555 RedCG.emitSharedOrigLValue(CGF&: *this, N: Count);
1556 RedCG.emitAggregateType(CGF&: *this, N: Count);
1557 AutoVarEmission Emission = EmitAutoVarAlloca(var: *PrivateVD);
1558 RedCG.emitInitialization(CGF&: *this, N: Count, PrivateAddr: Emission.getAllocatedAddress(),
1559 SharedAddr: RedCG.getSharedLValue(N: Count).getAddress(),
1560 DefaultInit: [&Emission](CodeGenFunction &CGF) {
1561 CGF.EmitAutoVarInit(emission: Emission);
1562 return true;
1563 });
1564 EmitAutoVarCleanups(emission: Emission);
1565 Address BaseAddr = RedCG.adjustPrivateAddress(
1566 CGF&: *this, N: Count, PrivateAddr: Emission.getAllocatedAddress());
1567 bool IsRegistered =
1568 PrivateScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Count), Addr: BaseAddr);
1569 assert(IsRegistered && "private var already registered as private");
1570 // Silence the warning about unused variable.
1571 (void)IsRegistered;
1572
1573 const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
1574 const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
1575 QualType Type = PrivateVD->getType();
1576 bool isaOMPArraySectionExpr = isa<ArraySectionExpr>(Val: IRef);
1577 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1578 // Store the address of the original variable associated with the LHS
1579 // implicit variable.
1580 PrivateScope.addPrivate(LocalVD: LHSVD, Addr: RedCG.getSharedLValue(N: Count).getAddress());
1581 PrivateScope.addPrivate(LocalVD: RHSVD, Addr: GetAddrOfLocalVar(VD: PrivateVD));
1582 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1583 isa<ArraySubscriptExpr>(Val: IRef)) {
1584 // Store the address of the original variable associated with the LHS
1585 // implicit variable.
1586 PrivateScope.addPrivate(LocalVD: LHSVD, Addr: RedCG.getSharedLValue(N: Count).getAddress());
1587 PrivateScope.addPrivate(LocalVD: RHSVD,
1588 Addr: GetAddrOfLocalVar(VD: PrivateVD).withElementType(
1589 ElemTy: ConvertTypeForMem(T: RHSVD->getType())));
1590 } else {
1591 QualType Type = PrivateVD->getType();
1592 bool IsArray = getContext().getAsArrayType(T: Type) != nullptr;
1593 Address OriginalAddr = RedCG.getSharedLValue(N: Count).getAddress();
1594 // Store the address of the original variable associated with the LHS
1595 // implicit variable.
1596 if (IsArray) {
1597 OriginalAddr =
1598 OriginalAddr.withElementType(ElemTy: ConvertTypeForMem(T: LHSVD->getType()));
1599 }
1600 PrivateScope.addPrivate(LocalVD: LHSVD, Addr: OriginalAddr);
1601 PrivateScope.addPrivate(
1602 LocalVD: RHSVD, Addr: IsArray ? GetAddrOfLocalVar(VD: PrivateVD).withElementType(
1603 ElemTy: ConvertTypeForMem(T: RHSVD->getType()))
1604 : GetAddrOfLocalVar(VD: PrivateVD));
1605 }
1606 ++ILHS;
1607 ++IRHS;
1608 ++IPriv;
1609 ++Count;
1610 }
1611 if (!Data.ReductionVars.empty()) {
1612 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
1613 Data.IsReductionWithTaskMod = true;
1614 Data.IsWorksharingReduction = isOpenMPWorksharingDirective(DKind: EKind);
1615 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1616 CGF&: *this, Loc: D.getBeginLoc(), LHSExprs: TaskLHSs, RHSExprs: TaskRHSs, Data);
1617 const Expr *TaskRedRef = nullptr;
1618 switch (EKind) {
1619 case OMPD_parallel:
1620 TaskRedRef = cast<OMPParallelDirective>(Val: D).getTaskReductionRefExpr();
1621 break;
1622 case OMPD_for:
1623 TaskRedRef = cast<OMPForDirective>(Val: D).getTaskReductionRefExpr();
1624 break;
1625 case OMPD_sections:
1626 TaskRedRef = cast<OMPSectionsDirective>(Val: D).getTaskReductionRefExpr();
1627 break;
1628 case OMPD_parallel_for:
1629 TaskRedRef = cast<OMPParallelForDirective>(Val: D).getTaskReductionRefExpr();
1630 break;
1631 case OMPD_parallel_master:
1632 TaskRedRef =
1633 cast<OMPParallelMasterDirective>(Val: D).getTaskReductionRefExpr();
1634 break;
1635 case OMPD_parallel_sections:
1636 TaskRedRef =
1637 cast<OMPParallelSectionsDirective>(Val: D).getTaskReductionRefExpr();
1638 break;
1639 case OMPD_target_parallel:
1640 TaskRedRef =
1641 cast<OMPTargetParallelDirective>(Val: D).getTaskReductionRefExpr();
1642 break;
1643 case OMPD_target_parallel_for:
1644 TaskRedRef =
1645 cast<OMPTargetParallelForDirective>(Val: D).getTaskReductionRefExpr();
1646 break;
1647 case OMPD_distribute_parallel_for:
1648 TaskRedRef =
1649 cast<OMPDistributeParallelForDirective>(Val: D).getTaskReductionRefExpr();
1650 break;
1651 case OMPD_teams_distribute_parallel_for:
1652 TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(Val: D)
1653 .getTaskReductionRefExpr();
1654 break;
1655 case OMPD_target_teams_distribute_parallel_for:
1656 TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(Val: D)
1657 .getTaskReductionRefExpr();
1658 break;
1659 case OMPD_simd:
1660 case OMPD_for_simd:
1661 case OMPD_section:
1662 case OMPD_single:
1663 case OMPD_master:
1664 case OMPD_critical:
1665 case OMPD_parallel_for_simd:
1666 case OMPD_task:
1667 case OMPD_taskyield:
1668 case OMPD_error:
1669 case OMPD_barrier:
1670 case OMPD_taskwait:
1671 case OMPD_taskgroup:
1672 case OMPD_flush:
1673 case OMPD_depobj:
1674 case OMPD_scan:
1675 case OMPD_ordered:
1676 case OMPD_atomic:
1677 case OMPD_teams:
1678 case OMPD_target:
1679 case OMPD_cancellation_point:
1680 case OMPD_cancel:
1681 case OMPD_target_data:
1682 case OMPD_target_enter_data:
1683 case OMPD_target_exit_data:
1684 case OMPD_taskloop:
1685 case OMPD_taskloop_simd:
1686 case OMPD_master_taskloop:
1687 case OMPD_master_taskloop_simd:
1688 case OMPD_parallel_master_taskloop:
1689 case OMPD_parallel_master_taskloop_simd:
1690 case OMPD_distribute:
1691 case OMPD_target_update:
1692 case OMPD_distribute_parallel_for_simd:
1693 case OMPD_distribute_simd:
1694 case OMPD_target_parallel_for_simd:
1695 case OMPD_target_simd:
1696 case OMPD_teams_distribute:
1697 case OMPD_teams_distribute_simd:
1698 case OMPD_teams_distribute_parallel_for_simd:
1699 case OMPD_target_teams:
1700 case OMPD_target_teams_distribute:
1701 case OMPD_target_teams_distribute_parallel_for_simd:
1702 case OMPD_target_teams_distribute_simd:
1703 case OMPD_declare_target:
1704 case OMPD_end_declare_target:
1705 case OMPD_threadprivate:
1706 case OMPD_allocate:
1707 case OMPD_declare_reduction:
1708 case OMPD_declare_mapper:
1709 case OMPD_declare_simd:
1710 case OMPD_requires:
1711 case OMPD_declare_variant:
1712 case OMPD_begin_declare_variant:
1713 case OMPD_end_declare_variant:
1714 case OMPD_unknown:
1715 default:
1716 llvm_unreachable("Unexpected directive with task reductions.");
1717 }
1718
1719 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: TaskRedRef)->getDecl());
1720 EmitVarDecl(D: *VD);
1721 EmitStoreOfScalar(Value: ReductionDesc, Addr: GetAddrOfLocalVar(VD),
1722 /*Volatile=*/false, Ty: TaskRedRef->getType());
1723 }
1724}
1725
1726void CodeGenFunction::EmitOMPReductionClauseFinal(
1727 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1728 if (!HaveInsertPoint())
1729 return;
1730 llvm::SmallVector<const Expr *, 8> Privates;
1731 llvm::SmallVector<const Expr *, 8> LHSExprs;
1732 llvm::SmallVector<const Expr *, 8> RHSExprs;
1733 llvm::SmallVector<const Expr *, 8> ReductionOps;
1734 llvm::SmallVector<bool, 8> IsPrivateVarReduction;
1735 bool HasAtLeastOneReduction = false;
1736 bool IsReductionWithTaskMod = false;
1737 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1738 // Do not emit for inscan reductions.
1739 if (C->getModifier() == OMPC_REDUCTION_inscan)
1740 continue;
1741 HasAtLeastOneReduction = true;
1742 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
1743 LHSExprs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
1744 RHSExprs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
1745 IsPrivateVarReduction.append(in_start: C->private_var_reduction_flags().begin(),
1746 in_end: C->private_var_reduction_flags().end());
1747 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
1748 IsReductionWithTaskMod =
1749 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1750 }
1751 if (HasAtLeastOneReduction) {
1752 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
1753 if (IsReductionWithTaskMod) {
1754 CGM.getOpenMPRuntime().emitTaskReductionFini(
1755 CGF&: *this, Loc: D.getBeginLoc(), IsWorksharingReduction: isOpenMPWorksharingDirective(DKind: EKind));
1756 }
1757 bool TeamsLoopCanBeParallel = false;
1758 if (auto *TTLD = dyn_cast<OMPTargetTeamsGenericLoopDirective>(Val: &D))
1759 TeamsLoopCanBeParallel = TTLD->canBeParallelFor();
1760 bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1761 isOpenMPParallelDirective(DKind: EKind) ||
1762 TeamsLoopCanBeParallel || ReductionKind == OMPD_simd;
1763 bool SimpleReduction = ReductionKind == OMPD_simd;
1764 // Emit nowait reduction if nowait clause is present or directive is a
1765 // parallel directive (it always has implicit barrier).
1766 CGM.getOpenMPRuntime().emitReduction(
1767 CGF&: *this, Loc: D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1768 Options: {.WithNowait: WithNowait, .SimpleReduction: SimpleReduction, .IsPrivateVarReduction: IsPrivateVarReduction, .ReductionKind: ReductionKind});
1769 }
1770}
1771
1772static void emitPostUpdateForReductionClause(
1773 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1774 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1775 if (!CGF.HaveInsertPoint())
1776 return;
1777 llvm::BasicBlock *DoneBB = nullptr;
1778 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1779 if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1780 if (!DoneBB) {
1781 if (llvm::Value *Cond = CondGen(CGF)) {
1782 // If the first post-update expression is found, emit conditional
1783 // block if it was requested.
1784 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: ".omp.reduction.pu");
1785 DoneBB = CGF.createBasicBlock(name: ".omp.reduction.pu.done");
1786 CGF.Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB);
1787 CGF.EmitBlock(BB: ThenBB);
1788 }
1789 }
1790 CGF.EmitIgnoredExpr(E: PostUpdate);
1791 }
1792 }
1793 if (DoneBB)
1794 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
1795}
1796
1797namespace {
1798/// Codegen lambda for appending distribute lower and upper bounds to outlined
1799/// parallel function. This is necessary for combined constructs such as
1800/// 'distribute parallel for'
1801typedef llvm::function_ref<void(CodeGenFunction &,
1802 const OMPExecutableDirective &,
1803 llvm::SmallVectorImpl<llvm::Value *> &)>
1804 CodeGenBoundParametersTy;
1805} // anonymous namespace
1806
1807static void
1808checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1809 const OMPExecutableDirective &S) {
1810 if (CGF.getLangOpts().OpenMP < 50)
1811 return;
1812 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1813 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1814 for (const Expr *Ref : C->varlist()) {
1815 if (!Ref->getType()->isScalarType())
1816 continue;
1817 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
1818 if (!DRE)
1819 continue;
1820 PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl()));
1821 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: Ref);
1822 }
1823 }
1824 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1825 for (const Expr *Ref : C->varlist()) {
1826 if (!Ref->getType()->isScalarType())
1827 continue;
1828 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
1829 if (!DRE)
1830 continue;
1831 PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl()));
1832 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: Ref);
1833 }
1834 }
1835 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1836 for (const Expr *Ref : C->varlist()) {
1837 if (!Ref->getType()->isScalarType())
1838 continue;
1839 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
1840 if (!DRE)
1841 continue;
1842 PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl()));
1843 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: Ref);
1844 }
1845 }
1846 // Privates should ne analyzed since they are not captured at all.
1847 // Task reductions may be skipped - tasks are ignored.
1848 // Firstprivates do not return value but may be passed by reference - no need
1849 // to check for updated lastprivate conditional.
1850 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1851 for (const Expr *Ref : C->varlist()) {
1852 if (!Ref->getType()->isScalarType())
1853 continue;
1854 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
1855 if (!DRE)
1856 continue;
1857 PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl()));
1858 }
1859 }
1860 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1861 CGF, D: S, IgnoredDecls: PrivateDecls);
1862}
1863
1864static void emitCommonOMPParallelDirective(
1865 CodeGenFunction &CGF, const OMPExecutableDirective &S,
1866 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1867 const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1868 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_parallel);
1869 llvm::Value *NumThreads = nullptr;
1870 OpenMPNumThreadsClauseModifier Modifier = OMPC_NUMTHREADS_unknown;
1871 // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is as
1872 // if sev-level is fatal."
1873 OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal;
1874 clang::Expr *Message = nullptr;
1875 SourceLocation SeverityLoc = SourceLocation();
1876 SourceLocation MessageLoc = SourceLocation();
1877
1878 llvm::Function *OutlinedFn =
1879 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1880 CGF, D: S, ThreadIDVar: *CS->getCapturedDecl()->param_begin(), InnermostKind,
1881 CodeGen);
1882
1883 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1884 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1885 NumThreads = CGF.EmitScalarExpr(E: NumThreadsClause->getNumThreads(),
1886 /*IgnoreResultAssign=*/true);
1887 Modifier = NumThreadsClause->getModifier();
1888 if (const auto *MessageClause = S.getSingleClause<OMPMessageClause>()) {
1889 Message = MessageClause->getMessageString();
1890 MessageLoc = MessageClause->getBeginLoc();
1891 }
1892 if (const auto *SeverityClause = S.getSingleClause<OMPSeverityClause>()) {
1893 Severity = SeverityClause->getSeverityKind();
1894 SeverityLoc = SeverityClause->getBeginLoc();
1895 }
1896 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1897 CGF, NumThreads, Loc: NumThreadsClause->getBeginLoc(), Modifier, Severity,
1898 SeverityLoc, Message, MessageLoc);
1899 }
1900 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1901 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1902 CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1903 CGF, ProcBind: ProcBindClause->getProcBindKind(), Loc: ProcBindClause->getBeginLoc());
1904 }
1905 const Expr *IfCond = nullptr;
1906 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1907 if (C->getNameModifier() == OMPD_unknown ||
1908 C->getNameModifier() == OMPD_parallel) {
1909 IfCond = C->getCondition();
1910 break;
1911 }
1912 }
1913
1914 OMPParallelScope Scope(CGF, S);
1915 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1916 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1917 // lower and upper bounds with the pragma 'for' chunking mechanism.
1918 // The following lambda takes care of appending the lower and upper bound
1919 // parameters when necessary
1920 CodeGenBoundParameters(CGF, S, CapturedVars);
1921 CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
1922 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, Loc: S.getBeginLoc(), OutlinedFn,
1923 CapturedVars, IfCond, NumThreads,
1924 NumThreadsModifier: Modifier, Severity, Message);
1925}
1926
1927static bool isAllocatableDecl(const VarDecl *VD) {
1928 const VarDecl *CVD = VD->getCanonicalDecl();
1929 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1930 return false;
1931 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1932 // Use the default allocation.
1933 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1934 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1935 !AA->getAllocator());
1936}
1937
1938static void emitEmptyBoundParameters(CodeGenFunction &,
1939 const OMPExecutableDirective &,
1940 llvm::SmallVectorImpl<llvm::Value *> &) {}
1941
1942static void emitOMPCopyinClause(CodeGenFunction &CGF,
1943 const OMPExecutableDirective &S) {
1944 bool Copyins = CGF.EmitOMPCopyinClause(D: S);
1945 if (Copyins) {
1946 // Emit implicit barrier to synchronize threads and avoid data races on
1947 // propagation master's thread values of threadprivate variables to local
1948 // instances of that variables of all other implicit threads.
1949 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1950 CGF, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
1951 /*ForceSimpleCall=*/true);
1952 }
1953}
1954
1955Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1956 CodeGenFunction &CGF, const VarDecl *VD) {
1957 CodeGenModule &CGM = CGF.CGM;
1958 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1959
1960 if (!VD)
1961 return Address::invalid();
1962 const VarDecl *CVD = VD->getCanonicalDecl();
1963 if (!isAllocatableDecl(VD: CVD))
1964 return Address::invalid();
1965 llvm::Value *Size;
1966 CharUnits Align = CGM.getContext().getDeclAlign(D: CVD);
1967 if (CVD->getType()->isVariablyModifiedType()) {
1968 Size = CGF.getTypeSize(Ty: CVD->getType());
1969 // Align the size: ((size + align - 1) / align) * align
1970 Size = CGF.Builder.CreateNUWAdd(
1971 LHS: Size, RHS: CGM.getSize(numChars: Align - CharUnits::fromQuantity(Quantity: 1)));
1972 Size = CGF.Builder.CreateUDiv(LHS: Size, RHS: CGM.getSize(numChars: Align));
1973 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: Align));
1974 } else {
1975 CharUnits Sz = CGM.getContext().getTypeSizeInChars(T: CVD->getType());
1976 Size = CGM.getSize(numChars: Sz.alignTo(Align));
1977 }
1978
1979 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1980 assert(AA->getAllocator() &&
1981 "Expected allocator expression for non-default allocator.");
1982 llvm::Value *Allocator = CGF.EmitScalarExpr(E: AA->getAllocator());
1983 // According to the standard, the original allocator type is a enum (integer).
1984 // Convert to pointer type, if required.
1985 if (Allocator->getType()->isIntegerTy())
1986 Allocator = CGF.Builder.CreateIntToPtr(V: Allocator, DestTy: CGM.VoidPtrTy);
1987 else if (Allocator->getType()->isPointerTy())
1988 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: Allocator,
1989 DestTy: CGM.VoidPtrTy);
1990
1991 llvm::Value *Addr = OMPBuilder.createOMPAlloc(
1992 Loc: CGF.Builder, Size, Allocator,
1993 Name: getNameWithSeparators(Parts: {CVD->getName(), ".void.addr"}, FirstSeparator: ".", Separator: "."));
1994 llvm::CallInst *FreeCI =
1995 OMPBuilder.createOMPFree(Loc: CGF.Builder, Addr, Allocator);
1996
1997 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(Kind: NormalAndEHCleanup, A: FreeCI);
1998 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1999 V: Addr,
2000 DestTy: CGF.ConvertTypeForMem(T: CGM.getContext().getPointerType(T: CVD->getType())),
2001 Name: getNameWithSeparators(Parts: {CVD->getName(), ".addr"}, FirstSeparator: ".", Separator: "."));
2002 return Address(Addr, CGF.ConvertTypeForMem(T: CVD->getType()), Align);
2003}
2004
2005Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
2006 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
2007 SourceLocation Loc) {
2008 CodeGenModule &CGM = CGF.CGM;
2009 if (CGM.getLangOpts().OpenMPUseTLS &&
2010 CGM.getContext().getTargetInfo().isTLSSupported())
2011 return VDAddr;
2012
2013 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2014
2015 llvm::Type *VarTy = VDAddr.getElementType();
2016 llvm::Value *Data =
2017 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.Int8PtrTy);
2018 llvm::ConstantInt *Size = CGM.getSize(numChars: CGM.GetTargetTypeStoreSize(Ty: VarTy));
2019 std::string Suffix = getNameWithSeparators(Parts: {"cache", ""});
2020 llvm::Twine CacheName = Twine(CGM.getMangledName(GD: VD)).concat(Suffix);
2021
2022 llvm::CallInst *ThreadPrivateCacheCall =
2023 OMPBuilder.createCachedThreadPrivate(Loc: CGF.Builder, Pointer: Data, Size, Name: CacheName);
2024
2025 return Address(ThreadPrivateCacheCall, CGM.Int8Ty, VDAddr.getAlignment());
2026}
2027
2028std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
2029 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
2030 SmallString<128> Buffer;
2031 llvm::raw_svector_ostream OS(Buffer);
2032 StringRef Sep = FirstSeparator;
2033 for (StringRef Part : Parts) {
2034 OS << Sep << Part;
2035 Sep = Separator;
2036 }
2037 return OS.str().str();
2038}
2039
2040void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
2041 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
2042 InsertPointTy CodeGenIP, Twine RegionName) {
2043 CGBuilderTy &Builder = CGF.Builder;
2044 Builder.restoreIP(IP: CodeGenIP);
2045 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
2046 Suffix: "." + RegionName + ".after");
2047
2048 {
2049 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
2050 CGF.EmitStmt(S: RegionBodyStmt);
2051 }
2052
2053 if (Builder.saveIP().isSet())
2054 Builder.CreateBr(Dest: FiniBB);
2055}
2056
2057void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
2058 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
2059 InsertPointTy CodeGenIP, Twine RegionName) {
2060 CGBuilderTy &Builder = CGF.Builder;
2061 Builder.restoreIP(IP: CodeGenIP);
2062 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
2063 Suffix: "." + RegionName + ".after");
2064
2065 {
2066 OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
2067 CGF.EmitStmt(S: RegionBodyStmt);
2068 }
2069
2070 if (Builder.saveIP().isSet())
2071 Builder.CreateBr(Dest: FiniBB);
2072}
2073
2074void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
2075 if (CGM.getLangOpts().OpenMPIRBuilder) {
2076 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2077 // Check if we have any if clause associated with the directive.
2078 llvm::Value *IfCond = nullptr;
2079 if (const auto *C = S.getSingleClause<OMPIfClause>())
2080 IfCond = EmitScalarExpr(E: C->getCondition(),
2081 /*IgnoreResultAssign=*/true);
2082
2083 llvm::Value *NumThreads = nullptr;
2084 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
2085 NumThreads = EmitScalarExpr(E: NumThreadsClause->getNumThreads(),
2086 /*IgnoreResultAssign=*/true);
2087
2088 ProcBindKind ProcBind = OMP_PROC_BIND_default;
2089 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
2090 ProcBind = ProcBindClause->getProcBindKind();
2091
2092 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2093
2094 // The cleanup callback that finalizes all variables at the given location,
2095 // thus calls destructors etc.
2096 auto FiniCB = [this](InsertPointTy IP) {
2097 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
2098 return llvm::Error::success();
2099 };
2100
2101 // Privatization callback that performs appropriate action for
2102 // shared/private/firstprivate/lastprivate/copyin/... variables.
2103 //
2104 // TODO: This defaults to shared right now.
2105 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
2106 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
2107 // The next line is appropriate only for variables (Val) with the
2108 // data-sharing attribute "shared".
2109 ReplVal = &Val;
2110
2111 return CodeGenIP;
2112 };
2113
2114 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_parallel);
2115 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
2116
2117 auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
2118 InsertPointTy CodeGenIP) {
2119 OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
2120 CGF&: *this, RegionBodyStmt: ParallelRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "parallel");
2121 return llvm::Error::success();
2122 };
2123
2124 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
2125 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
2126 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
2127 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
2128 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
2129 ValOrErr: OMPBuilder.createParallel(Loc: Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
2130 IfCondition: IfCond, NumThreads, ProcBind, IsCancellable: S.hasCancel()));
2131 Builder.restoreIP(IP: AfterIP);
2132 return;
2133 }
2134
2135 // Emit parallel region as a standalone region.
2136 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2137 Action.Enter(CGF);
2138 OMPPrivateScope PrivateScope(CGF);
2139 emitOMPCopyinClause(CGF, S);
2140 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
2141 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
2142 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
2143 (void)PrivateScope.Privatize();
2144 CGF.EmitStmt(S: S.getCapturedStmt(RegionKind: OMPD_parallel)->getCapturedStmt());
2145 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
2146 };
2147 {
2148 auto LPCRegion =
2149 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
2150 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_parallel, CodeGen,
2151 CodeGenBoundParameters: emitEmptyBoundParameters);
2152 emitPostUpdateForReductionClause(CGF&: *this, D: S,
2153 CondGen: [](CodeGenFunction &) { return nullptr; });
2154 }
2155 // Check for outer lastprivate conditional update.
2156 checkForLastprivateConditionalUpdate(CGF&: *this, S);
2157}
2158
2159void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) {
2160 EmitStmt(S: S.getIfStmt());
2161}
2162
2163namespace {
2164/// RAII to handle scopes for loop transformation directives.
2165class OMPTransformDirectiveScopeRAII {
2166 OMPLoopScope *Scope = nullptr;
2167 CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr;
2168 CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
2169
2170 OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII &) =
2171 delete;
2172 OMPTransformDirectiveScopeRAII &
2173 operator=(const OMPTransformDirectiveScopeRAII &) = delete;
2174
2175public:
2176 OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
2177 if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(Val: S)) {
2178 Scope = new OMPLoopScope(CGF, *Dir);
2179 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
2180 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
2181 } else if (const auto *Dir =
2182 dyn_cast<OMPCanonicalLoopSequenceTransformationDirective>(
2183 Val: S)) {
2184 // For simplicity we reuse the loop scope similarly to what we do with
2185 // OMPCanonicalLoopNestTransformationDirective do by being a subclass
2186 // of OMPLoopBasedDirective.
2187 Scope = new OMPLoopScope(CGF, *Dir);
2188 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
2189 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
2190 }
2191 }
2192 ~OMPTransformDirectiveScopeRAII() {
2193 if (!Scope)
2194 return;
2195 delete CapInfoRAII;
2196 delete CGSI;
2197 delete Scope;
2198 }
2199};
2200} // namespace
2201
2202static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
2203 int MaxLevel, int Level = 0) {
2204 assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
2205 const Stmt *SimplifiedS = S->IgnoreContainers();
2206 if (const auto *CS = dyn_cast<CompoundStmt>(Val: SimplifiedS)) {
2207 PrettyStackTraceLoc CrashInfo(
2208 CGF.getContext().getSourceManager(), CS->getLBracLoc(),
2209 "LLVM IR generation of compound statement ('{}')");
2210
2211 // Keep track of the current cleanup stack depth, including debug scopes.
2212 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
2213 for (const Stmt *CurStmt : CS->body())
2214 emitBody(CGF, S: CurStmt, NextLoop, MaxLevel, Level);
2215 return;
2216 }
2217 if (SimplifiedS == NextLoop) {
2218 if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(Val: SimplifiedS))
2219 SimplifiedS = Dir->getTransformedStmt();
2220 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(Val: SimplifiedS))
2221 SimplifiedS = CanonLoop->getLoopStmt();
2222 if (const auto *For = dyn_cast<ForStmt>(Val: SimplifiedS)) {
2223 S = For->getBody();
2224 } else {
2225 assert(isa<CXXForRangeStmt>(SimplifiedS) &&
2226 "Expected canonical for loop or range-based for loop.");
2227 const auto *CXXFor = cast<CXXForRangeStmt>(Val: SimplifiedS);
2228 CGF.EmitStmt(S: CXXFor->getLoopVarStmt());
2229 S = CXXFor->getBody();
2230 }
2231 if (Level + 1 < MaxLevel) {
2232 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
2233 CurStmt: S, /*TryImperfectlyNestedLoops=*/true);
2234 emitBody(CGF, S, NextLoop, MaxLevel, Level: Level + 1);
2235 return;
2236 }
2237 }
2238 CGF.EmitStmt(S);
2239}
2240
2241void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
2242 JumpDest LoopExit) {
2243 RunCleanupsScope BodyScope(*this);
2244 // Update counters values on current iteration.
2245 for (const Expr *UE : D.updates())
2246 EmitIgnoredExpr(E: UE);
2247 // Update the linear variables.
2248 // In distribute directives only loop counters may be marked as linear, no
2249 // need to generate the code for them.
2250 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
2251 if (!isOpenMPDistributeDirective(DKind: EKind)) {
2252 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2253 for (const Expr *UE : C->updates())
2254 EmitIgnoredExpr(E: UE);
2255 }
2256 }
2257
2258 // On a continue in the body, jump to the end.
2259 JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.body.continue");
2260 BreakContinueStack.push_back(Elt: BreakContinue(D, LoopExit, Continue));
2261 for (const Expr *E : D.finals_conditions()) {
2262 if (!E)
2263 continue;
2264 // Check that loop counter in non-rectangular nest fits into the iteration
2265 // space.
2266 llvm::BasicBlock *NextBB = createBasicBlock(name: "omp.body.next");
2267 EmitBranchOnBoolExpr(Cond: E, TrueBlock: NextBB, FalseBlock: Continue.getBlock(),
2268 TrueCount: getProfileCount(S: D.getBody()));
2269 EmitBlock(BB: NextBB);
2270 }
2271
2272 OMPPrivateScope InscanScope(*this);
2273 EmitOMPReductionClauseInit(D, PrivateScope&: InscanScope, /*ForInscan=*/true);
2274 bool IsInscanRegion = InscanScope.Privatize();
2275 if (IsInscanRegion) {
2276 // Need to remember the block before and after scan directive
2277 // to dispatch them correctly depending on the clause used in
2278 // this directive, inclusive or exclusive. For inclusive scan the natural
2279 // order of the blocks is used, for exclusive clause the blocks must be
2280 // executed in reverse order.
2281 OMPBeforeScanBlock = createBasicBlock(name: "omp.before.scan.bb");
2282 OMPAfterScanBlock = createBasicBlock(name: "omp.after.scan.bb");
2283 // No need to allocate inscan exit block, in simd mode it is selected in the
2284 // codegen for the scan directive.
2285 if (EKind != OMPD_simd && !getLangOpts().OpenMPSimd)
2286 OMPScanExitBlock = createBasicBlock(name: "omp.exit.inscan.bb");
2287 OMPScanDispatch = createBasicBlock(name: "omp.inscan.dispatch");
2288 EmitBranch(Block: OMPScanDispatch);
2289 EmitBlock(BB: OMPBeforeScanBlock);
2290 }
2291
2292 // Emit loop variables for C++ range loops.
2293 const Stmt *Body =
2294 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
2295 // Emit loop body.
2296 emitBody(CGF&: *this, S: Body,
2297 NextLoop: OMPLoopBasedDirective::tryToFindNextInnerLoop(
2298 CurStmt: Body, /*TryImperfectlyNestedLoops=*/true),
2299 MaxLevel: D.getLoopsNumber());
2300
2301 // Jump to the dispatcher at the end of the loop body.
2302 if (IsInscanRegion)
2303 EmitBranch(Block: OMPScanExitBlock);
2304
2305 // The end (updates/cleanups).
2306 EmitBlock(BB: Continue.getBlock());
2307 BreakContinueStack.pop_back();
2308}
2309
2310using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
2311
2312/// Emit a captured statement and return the function as well as its captured
2313/// closure context.
2314static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF,
2315 const CapturedStmt *S) {
2316 LValue CapStruct = ParentCGF.InitCapturedStruct(S: *S);
2317 CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
2318 std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
2319 std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(args: *S);
2320 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
2321 llvm::Function *F = CGF.GenerateCapturedStmtFunction(S: *S);
2322
2323 return {F, CapStruct.getPointer(CGF&: ParentCGF)};
2324}
2325
2326/// Emit a call to a previously captured closure.
2327static llvm::CallInst *
2328emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap,
2329 llvm::ArrayRef<llvm::Value *> Args) {
2330 // Append the closure context to the argument.
2331 SmallVector<llvm::Value *> EffectiveArgs;
2332 EffectiveArgs.reserve(N: Args.size() + 1);
2333 llvm::append_range(C&: EffectiveArgs, R&: Args);
2334 EffectiveArgs.push_back(Elt: Cap.second);
2335
2336 return ParentCGF.Builder.CreateCall(Callee: Cap.first, Args: EffectiveArgs);
2337}
2338
2339llvm::CanonicalLoopInfo *
2340CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
2341 assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
2342
2343 // The caller is processing the loop-associated directive processing the \p
2344 // Depth loops nested in \p S. Put the previous pending loop-associated
2345 // directive to the stack. If the current loop-associated directive is a loop
2346 // transformation directive, it will push its generated loops onto the stack
2347 // such that together with the loops left here they form the combined loop
2348 // nest for the parent loop-associated directive.
2349 int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth;
2350 ExpectedOMPLoopDepth = Depth;
2351
2352 EmitStmt(S);
2353 assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
2354
2355 // The last added loop is the outermost one.
2356 llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back();
2357
2358 // Pop the \p Depth loops requested by the call from that stack and restore
2359 // the previous context.
2360 OMPLoopNestStack.pop_back_n(NumItems: Depth);
2361 ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth;
2362
2363 return Result;
2364}
2365
2366void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
2367 const Stmt *SyntacticalLoop = S->getLoopStmt();
2368 if (!getLangOpts().OpenMPIRBuilder) {
2369 // Ignore if OpenMPIRBuilder is not enabled.
2370 EmitStmt(S: SyntacticalLoop);
2371 return;
2372 }
2373
2374 LexicalScope ForScope(*this, S->getSourceRange());
2375
2376 // Emit init statements. The Distance/LoopVar funcs may reference variable
2377 // declarations they contain.
2378 const Stmt *BodyStmt;
2379 if (const auto *For = dyn_cast<ForStmt>(Val: SyntacticalLoop)) {
2380 if (const Stmt *InitStmt = For->getInit())
2381 EmitStmt(S: InitStmt);
2382 BodyStmt = For->getBody();
2383 } else if (const auto *RangeFor =
2384 dyn_cast<CXXForRangeStmt>(Val: SyntacticalLoop)) {
2385 if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
2386 EmitStmt(S: RangeStmt);
2387 if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
2388 EmitStmt(S: BeginStmt);
2389 if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
2390 EmitStmt(S: EndStmt);
2391 if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
2392 EmitStmt(S: LoopVarStmt);
2393 BodyStmt = RangeFor->getBody();
2394 } else
2395 llvm_unreachable("Expected for-stmt or range-based for-stmt");
2396
2397 // Emit closure for later use. By-value captures will be captured here.
2398 const CapturedStmt *DistanceFunc = S->getDistanceFunc();
2399 EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(ParentCGF&: *this, S: DistanceFunc);
2400 const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
2401 EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(ParentCGF&: *this, S: LoopVarFunc);
2402
2403 // Call the distance function to get the number of iterations of the loop to
2404 // come.
2405 QualType LogicalTy = DistanceFunc->getCapturedDecl()
2406 ->getParam(i: 0)
2407 ->getType()
2408 .getNonReferenceType();
2409 RawAddress CountAddr = CreateMemTemp(T: LogicalTy, Name: ".count.addr");
2410 emitCapturedStmtCall(ParentCGF&: *this, Cap: DistanceClosure, Args: {CountAddr.getPointer()});
2411 llvm::Value *DistVal = Builder.CreateLoad(Addr: CountAddr, Name: ".count");
2412
2413 // Emit the loop structure.
2414 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2415 auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
2416 llvm::Value *IndVar) {
2417 Builder.restoreIP(IP: CodeGenIP);
2418
2419 // Emit the loop body: Convert the logical iteration number to the loop
2420 // variable and emit the body.
2421 const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
2422 LValue LCVal = EmitLValue(E: LoopVarRef);
2423 Address LoopVarAddress = LCVal.getAddress();
2424 emitCapturedStmtCall(ParentCGF&: *this, Cap: LoopVarClosure,
2425 Args: {LoopVarAddress.emitRawPointer(CGF&: *this), IndVar});
2426
2427 RunCleanupsScope BodyScope(*this);
2428 EmitStmt(S: BodyStmt);
2429 return llvm::Error::success();
2430 };
2431
2432 llvm::CanonicalLoopInfo *CL =
2433 cantFail(ValOrErr: OMPBuilder.createCanonicalLoop(Loc: Builder, BodyGenCB: BodyGen, TripCount: DistVal));
2434
2435 // Finish up the loop.
2436 Builder.restoreIP(IP: CL->getAfterIP());
2437 ForScope.ForceCleanup();
2438
2439 // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2440 OMPLoopNestStack.push_back(Elt: CL);
2441}
2442
2443void CodeGenFunction::EmitOMPInnerLoop(
2444 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
2445 const Expr *IncExpr,
2446 const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
2447 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
2448 auto LoopExit = getJumpDestInCurrentScope(Name: "omp.inner.for.end");
2449
2450 // Start the loop with a block that tests the condition.
2451 auto CondBlock = createBasicBlock(name: "omp.inner.for.cond");
2452 EmitBlock(BB: CondBlock);
2453 const SourceRange R = S.getSourceRange();
2454
2455 // If attributes are attached, push to the basic block with them.
2456 const auto &OMPED = cast<OMPExecutableDirective>(Val: S);
2457 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
2458 const Stmt *SS = ICS->getCapturedStmt();
2459 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(Val: SS);
2460 OMPLoopNestStack.clear();
2461 if (AS)
2462 LoopStack.push(Header: CondBlock, Ctx&: CGM.getContext(), CGOpts: CGM.getCodeGenOpts(),
2463 Attrs: AS->getAttrs(), StartLoc: SourceLocToDebugLoc(Location: R.getBegin()),
2464 EndLoc: SourceLocToDebugLoc(Location: R.getEnd()));
2465 else
2466 LoopStack.push(Header: CondBlock, StartLoc: SourceLocToDebugLoc(Location: R.getBegin()),
2467 EndLoc: SourceLocToDebugLoc(Location: R.getEnd()));
2468
2469 // If there are any cleanups between here and the loop-exit scope,
2470 // create a block to stage a loop exit along.
2471 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2472 if (RequiresCleanup)
2473 ExitBlock = createBasicBlock(name: "omp.inner.for.cond.cleanup");
2474
2475 llvm::BasicBlock *LoopBody = createBasicBlock(name: "omp.inner.for.body");
2476
2477 // Emit condition.
2478 EmitBranchOnBoolExpr(Cond: LoopCond, TrueBlock: LoopBody, FalseBlock: ExitBlock, TrueCount: getProfileCount(S: &S));
2479 if (ExitBlock != LoopExit.getBlock()) {
2480 EmitBlock(BB: ExitBlock);
2481 EmitBranchThroughCleanup(Dest: LoopExit);
2482 }
2483
2484 EmitBlock(BB: LoopBody);
2485 incrementProfileCounter(S: &S);
2486
2487 // Create a block for the increment.
2488 JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.inner.for.inc");
2489 BreakContinueStack.push_back(Elt: BreakContinue(S, LoopExit, Continue));
2490
2491 BodyGen(*this);
2492
2493 // Emit "IV = IV + 1" and a back-edge to the condition block.
2494 EmitBlock(BB: Continue.getBlock());
2495 EmitIgnoredExpr(E: IncExpr);
2496 PostIncGen(*this);
2497 BreakContinueStack.pop_back();
2498 EmitBranch(Block: CondBlock);
2499 LoopStack.pop();
2500 // Emit the fall-through block.
2501 EmitBlock(BB: LoopExit.getBlock());
2502}
2503
2504bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
2505 if (!HaveInsertPoint())
2506 return false;
2507 // Emit inits for the linear variables.
2508 bool HasLinears = false;
2509 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2510 for (const Expr *Init : C->inits()) {
2511 HasLinears = true;
2512 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Init)->getDecl());
2513 if (const auto *Ref =
2514 dyn_cast<DeclRefExpr>(Val: VD->getInit()->IgnoreImpCasts())) {
2515 AutoVarEmission Emission = EmitAutoVarAlloca(var: *VD);
2516 const auto *OrigVD = cast<VarDecl>(Val: Ref->getDecl());
2517 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2518 CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
2519 VD->getInit()->getType(), VK_LValue,
2520 VD->getInit()->getExprLoc());
2521 EmitExprAsInit(
2522 init: &DRE, D: VD,
2523 lvalue: MakeAddrLValue(Addr: Emission.getAllocatedAddress(), T: VD->getType()),
2524 /*capturedByInit=*/false);
2525 EmitAutoVarCleanups(emission: Emission);
2526 } else {
2527 EmitVarDecl(D: *VD);
2528 }
2529 }
2530 // Emit the linear steps for the linear clauses.
2531 // If a step is not constant, it is pre-calculated before the loop.
2532 if (const auto *CS = cast_or_null<BinaryOperator>(Val: C->getCalcStep()))
2533 if (const auto *SaveRef = cast<DeclRefExpr>(Val: CS->getLHS())) {
2534 EmitVarDecl(D: *cast<VarDecl>(Val: SaveRef->getDecl()));
2535 // Emit calculation of the linear step.
2536 EmitIgnoredExpr(E: CS);
2537 }
2538 }
2539 return HasLinears;
2540}
2541
2542void CodeGenFunction::EmitOMPLinearClauseFinal(
2543 const OMPLoopDirective &D,
2544 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2545 if (!HaveInsertPoint())
2546 return;
2547 llvm::BasicBlock *DoneBB = nullptr;
2548 // Emit the final values of the linear variables.
2549 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2550 auto IC = C->varlist_begin();
2551 for (const Expr *F : C->finals()) {
2552 if (!DoneBB) {
2553 if (llvm::Value *Cond = CondGen(*this)) {
2554 // If the first post-update expression is found, emit conditional
2555 // block if it was requested.
2556 llvm::BasicBlock *ThenBB = createBasicBlock(name: ".omp.linear.pu");
2557 DoneBB = createBasicBlock(name: ".omp.linear.pu.done");
2558 Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB);
2559 EmitBlock(BB: ThenBB);
2560 }
2561 }
2562 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IC)->getDecl());
2563 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2564 CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
2565 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
2566 Address OrigAddr = EmitLValue(E: &DRE).getAddress();
2567 CodeGenFunction::OMPPrivateScope VarScope(*this);
2568 VarScope.addPrivate(LocalVD: OrigVD, Addr: OrigAddr);
2569 (void)VarScope.Privatize();
2570 EmitIgnoredExpr(E: F);
2571 ++IC;
2572 }
2573 if (const Expr *PostUpdate = C->getPostUpdateExpr())
2574 EmitIgnoredExpr(E: PostUpdate);
2575 }
2576 if (DoneBB)
2577 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
2578}
2579
2580static void emitAlignedClause(CodeGenFunction &CGF,
2581 const OMPExecutableDirective &D) {
2582 if (!CGF.HaveInsertPoint())
2583 return;
2584 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2585 llvm::APInt ClauseAlignment(64, 0);
2586 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2587 auto *AlignmentCI =
2588 cast<llvm::ConstantInt>(Val: CGF.EmitScalarExpr(E: AlignmentExpr));
2589 ClauseAlignment = AlignmentCI->getValue();
2590 }
2591 for (const Expr *E : Clause->varlist()) {
2592 llvm::APInt Alignment(ClauseAlignment);
2593 if (Alignment == 0) {
2594 // OpenMP [2.8.1, Description]
2595 // If no optional parameter is specified, implementation-defined default
2596 // alignments for SIMD instructions on the target platforms are assumed.
2597 Alignment =
2598 CGF.getContext()
2599 .toCharUnitsFromBits(BitSize: CGF.getContext().getOpenMPDefaultSimdAlign(
2600 T: E->getType()->getPointeeType()))
2601 .getQuantity();
2602 }
2603 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2604 "alignment is not power of 2");
2605 if (Alignment != 0) {
2606 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2607 CGF.emitAlignmentAssumption(
2608 PtrValue, E, /*No second loc needed*/ AssumptionLoc: SourceLocation(),
2609 Alignment: llvm::ConstantInt::get(Context&: CGF.getLLVMContext(), V: Alignment));
2610 }
2611 }
2612 }
2613}
2614
2615void CodeGenFunction::EmitOMPPrivateLoopCounters(
2616 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2617 if (!HaveInsertPoint())
2618 return;
2619 auto I = S.private_counters().begin();
2620 for (const Expr *E : S.counters()) {
2621 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
2622 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl());
2623 // Emit var without initialization.
2624 AutoVarEmission VarEmission = EmitAutoVarAlloca(var: *PrivateVD);
2625 EmitAutoVarCleanups(emission: VarEmission);
2626 LocalDeclMap.erase(Val: PrivateVD);
2627 (void)LoopScope.addPrivate(LocalVD: VD, Addr: VarEmission.getAllocatedAddress());
2628 if (LocalDeclMap.count(Val: VD) || CapturedStmtInfo->lookup(VD) ||
2629 VD->hasGlobalStorage()) {
2630 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2631 LocalDeclMap.count(Val: VD) || CapturedStmtInfo->lookup(VD),
2632 E->getType(), VK_LValue, E->getExprLoc());
2633 (void)LoopScope.addPrivate(LocalVD: PrivateVD, Addr: EmitLValue(E: &DRE).getAddress());
2634 } else {
2635 (void)LoopScope.addPrivate(LocalVD: PrivateVD, Addr: VarEmission.getAllocatedAddress());
2636 }
2637 ++I;
2638 }
2639 // Privatize extra loop counters used in loops for ordered(n) clauses.
2640 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2641 if (!C->getNumForLoops())
2642 continue;
2643 for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
2644 I < E; ++I) {
2645 const auto *DRE = cast<DeclRefExpr>(Val: C->getLoopCounter(NumLoop: I));
2646 const auto *VD = cast<VarDecl>(Val: DRE->getDecl());
2647 // Override only those variables that can be captured to avoid re-emission
2648 // of the variables declared within the loops.
2649 if (DRE->refersToEnclosingVariableOrCapture()) {
2650 (void)LoopScope.addPrivate(
2651 LocalVD: VD, Addr: CreateMemTemp(T: DRE->getType(), Name: VD->getName()));
2652 }
2653 }
2654 }
2655}
2656
2657static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2658 const Expr *Cond, llvm::BasicBlock *TrueBlock,
2659 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2660 if (!CGF.HaveInsertPoint())
2661 return;
2662 {
2663 CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2664 CGF.EmitOMPPrivateLoopCounters(S, LoopScope&: PreCondScope);
2665 (void)PreCondScope.Privatize();
2666 // Get initial values of real counters.
2667 for (const Expr *I : S.inits()) {
2668 CGF.EmitIgnoredExpr(E: I);
2669 }
2670 }
2671 // Create temp loop control variables with their init values to support
2672 // non-rectangular loops.
2673 CodeGenFunction::OMPMapVars PreCondVars;
2674 for (const Expr *E : S.dependent_counters()) {
2675 if (!E)
2676 continue;
2677 assert(!E->getType().getNonReferenceType()->isRecordType() &&
2678 "dependent counter must not be an iterator.");
2679 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
2680 Address CounterAddr =
2681 CGF.CreateMemTemp(T: VD->getType().getNonReferenceType());
2682 (void)PreCondVars.setVarAddr(CGF, LocalVD: VD, TempAddr: CounterAddr);
2683 }
2684 (void)PreCondVars.apply(CGF);
2685 for (const Expr *E : S.dependent_inits()) {
2686 if (!E)
2687 continue;
2688 CGF.EmitIgnoredExpr(E);
2689 }
2690 // Check that loop is executed at least one time.
2691 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2692 PreCondVars.restore(CGF);
2693}
2694
2695void CodeGenFunction::EmitOMPLinearClause(
2696 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2697 if (!HaveInsertPoint())
2698 return;
2699 llvm::DenseSet<const VarDecl *> SIMDLCVs;
2700 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
2701 if (isOpenMPSimdDirective(DKind: EKind)) {
2702 const auto *LoopDirective = cast<OMPLoopDirective>(Val: &D);
2703 for (const Expr *C : LoopDirective->counters()) {
2704 SIMDLCVs.insert(
2705 V: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: C)->getDecl())->getCanonicalDecl());
2706 }
2707 }
2708 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2709 auto CurPrivate = C->privates().begin();
2710 for (const Expr *E : C->varlist()) {
2711 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
2712 const auto *PrivateVD =
2713 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *CurPrivate)->getDecl());
2714 if (!SIMDLCVs.count(V: VD->getCanonicalDecl())) {
2715 // Emit private VarDecl with copy init.
2716 EmitVarDecl(D: *PrivateVD);
2717 bool IsRegistered =
2718 PrivateScope.addPrivate(LocalVD: VD, Addr: GetAddrOfLocalVar(VD: PrivateVD));
2719 assert(IsRegistered && "linear var already registered as private");
2720 // Silence the warning about unused variable.
2721 (void)IsRegistered;
2722 } else {
2723 EmitVarDecl(D: *PrivateVD);
2724 }
2725 ++CurPrivate;
2726 }
2727 }
2728}
2729
2730static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
2731 const OMPExecutableDirective &D) {
2732 if (!CGF.HaveInsertPoint())
2733 return;
2734 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2735 RValue Len = CGF.EmitAnyExpr(E: C->getSimdlen(), aggSlot: AggValueSlot::ignored(),
2736 /*ignoreResult=*/true);
2737 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
2738 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2739 // In presence of finite 'safelen', it may be unsafe to mark all
2740 // the memory instructions parallel, because loop-carried
2741 // dependences of 'safelen' iterations are possible.
2742 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2743 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2744 RValue Len = CGF.EmitAnyExpr(E: C->getSafelen(), aggSlot: AggValueSlot::ignored(),
2745 /*ignoreResult=*/true);
2746 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
2747 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2748 // In presence of finite 'safelen', it may be unsafe to mark all
2749 // the memory instructions parallel, because loop-carried
2750 // dependences of 'safelen' iterations are possible.
2751 CGF.LoopStack.setParallel(/*Enable=*/false);
2752 }
2753}
2754
2755// Check for the presence of an `OMPOrderedDirective`,
2756// i.e., `ordered` in `#pragma omp ordered simd`.
2757//
2758// Consider the following source code:
2759// ```
2760// __attribute__((noinline)) void omp_simd_loop(float X[ARRAY_SIZE][ARRAY_SIZE])
2761// {
2762// for (int r = 1; r < ARRAY_SIZE; ++r) {
2763// for (int c = 1; c < ARRAY_SIZE; ++c) {
2764// #pragma omp simd
2765// for (int k = 2; k < ARRAY_SIZE; ++k) {
2766// #pragma omp ordered simd
2767// X[r][k] = X[r][k - 2] + sinf((float)(r / c));
2768// }
2769// }
2770// }
2771// }
2772// ```
2773//
2774// Suppose we are in `CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective
2775// &D)`. By examining `D.dump()` we have the following AST containing
2776// `OMPOrderedDirective`:
2777//
2778// ```
2779// OMPSimdDirective 0x1c32950
2780// `-CapturedStmt 0x1c32028
2781// |-CapturedDecl 0x1c310e8
2782// | |-ForStmt 0x1c31e30
2783// | | |-DeclStmt 0x1c31298
2784// | | | `-VarDecl 0x1c31208 used k 'int' cinit
2785// | | | `-IntegerLiteral 0x1c31278 'int' 2
2786// | | |-<<<NULL>>>
2787// | | |-BinaryOperator 0x1c31308 'int' '<'
2788// | | | |-ImplicitCastExpr 0x1c312f0 'int' <LValueToRValue>
2789// | | | | `-DeclRefExpr 0x1c312b0 'int' lvalue Var 0x1c31208 'k' 'int'
2790// | | | `-IntegerLiteral 0x1c312d0 'int' 256
2791// | | |-UnaryOperator 0x1c31348 'int' prefix '++'
2792// | | | `-DeclRefExpr 0x1c31328 'int' lvalue Var 0x1c31208 'k' 'int'
2793// | | `-CompoundStmt 0x1c31e18
2794// | | `-OMPOrderedDirective 0x1c31dd8
2795// | | |-OMPSimdClause 0x1c31380
2796// | | `-CapturedStmt 0x1c31cd0
2797// ```
2798//
2799// Note the presence of `OMPOrderedDirective` above:
2800// It's (transitively) nested in a `CapturedStmt` representing the pragma
2801// annotated compound statement. Thus, we need to consider this nesting and
2802// include checking the `getCapturedStmt` in this case.
2803static bool hasOrderedDirective(const Stmt *S) {
2804 if (isa<OMPOrderedDirective>(Val: S))
2805 return true;
2806
2807 if (const auto *CS = dyn_cast<CapturedStmt>(Val: S))
2808 return hasOrderedDirective(S: CS->getCapturedStmt());
2809
2810 for (const Stmt *Child : S->children()) {
2811 if (Child && hasOrderedDirective(S: Child))
2812 return true;
2813 }
2814
2815 return false;
2816}
2817
2818static void applyConservativeSimdOrderedDirective(const Stmt &AssociatedStmt,
2819 LoopInfoStack &LoopStack) {
2820 // Check for the presence of an `OMPOrderedDirective`
2821 // i.e., `ordered` in `#pragma omp ordered simd`
2822 bool HasOrderedDirective = hasOrderedDirective(S: &AssociatedStmt);
2823 // If present then conservatively disable loop vectorization
2824 // analogously to how `emitSimdlenSafelenClause` does.
2825 if (HasOrderedDirective)
2826 LoopStack.setParallel(/*Enable=*/false);
2827}
2828
2829void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
2830 // Walk clauses and process safelen/lastprivate.
2831 LoopStack.setParallel(/*Enable=*/true);
2832 LoopStack.setVectorizeEnable();
2833 const Stmt *AssociatedStmt = D.getAssociatedStmt();
2834 applyConservativeSimdOrderedDirective(AssociatedStmt: *AssociatedStmt, LoopStack);
2835 emitSimdlenSafelenClause(CGF&: *this, D);
2836 if (const auto *C = D.getSingleClause<OMPOrderClause>())
2837 if (C->getKind() == OMPC_ORDER_concurrent)
2838 LoopStack.setParallel(/*Enable=*/true);
2839 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
2840 if ((EKind == OMPD_simd ||
2841 (getLangOpts().OpenMPSimd && isOpenMPSimdDirective(DKind: EKind))) &&
2842 llvm::any_of(Range: D.getClausesOfKind<OMPReductionClause>(),
2843 P: [](const OMPReductionClause *C) {
2844 return C->getModifier() == OMPC_REDUCTION_inscan;
2845 }))
2846 // Disable parallel access in case of prefix sum.
2847 LoopStack.setParallel(/*Enable=*/false);
2848}
2849
2850void CodeGenFunction::EmitOMPSimdFinal(
2851 const OMPLoopDirective &D,
2852 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2853 if (!HaveInsertPoint())
2854 return;
2855 llvm::BasicBlock *DoneBB = nullptr;
2856 auto IC = D.counters().begin();
2857 auto IPC = D.private_counters().begin();
2858 for (const Expr *F : D.finals()) {
2859 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: (*IC))->getDecl());
2860 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: (*IPC))->getDecl());
2861 const auto *CED = dyn_cast<OMPCapturedExprDecl>(Val: OrigVD);
2862 if (LocalDeclMap.count(Val: OrigVD) || CapturedStmtInfo->lookup(VD: OrigVD) ||
2863 OrigVD->hasGlobalStorage() || CED) {
2864 if (!DoneBB) {
2865 if (llvm::Value *Cond = CondGen(*this)) {
2866 // If the first post-update expression is found, emit conditional
2867 // block if it was requested.
2868 llvm::BasicBlock *ThenBB = createBasicBlock(name: ".omp.final.then");
2869 DoneBB = createBasicBlock(name: ".omp.final.done");
2870 Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB);
2871 EmitBlock(BB: ThenBB);
2872 }
2873 }
2874 Address OrigAddr = Address::invalid();
2875 if (CED) {
2876 OrigAddr = EmitLValue(E: CED->getInit()->IgnoreImpCasts()).getAddress();
2877 } else {
2878 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2879 /*RefersToEnclosingVariableOrCapture=*/false,
2880 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2881 OrigAddr = EmitLValue(E: &DRE).getAddress();
2882 }
2883 OMPPrivateScope VarScope(*this);
2884 VarScope.addPrivate(LocalVD: OrigVD, Addr: OrigAddr);
2885 (void)VarScope.Privatize();
2886 EmitIgnoredExpr(E: F);
2887 }
2888 ++IC;
2889 ++IPC;
2890 }
2891 if (DoneBB)
2892 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
2893}
2894
2895static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
2896 const OMPLoopDirective &S,
2897 CodeGenFunction::JumpDest LoopExit) {
2898 CGF.EmitOMPLoopBody(D: S, LoopExit);
2899 CGF.EmitStopPoint(S: &S);
2900}
2901
2902/// Emit a helper variable and return corresponding lvalue.
2903static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2904 const DeclRefExpr *Helper) {
2905 auto VDecl = cast<VarDecl>(Val: Helper->getDecl());
2906 CGF.EmitVarDecl(D: *VDecl);
2907 return CGF.EmitLValue(E: Helper);
2908}
2909
2910static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
2911 const RegionCodeGenTy &SimdInitGen,
2912 const RegionCodeGenTy &BodyCodeGen) {
2913 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2914 PrePostActionTy &) {
2915 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2916 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2917 SimdInitGen(CGF);
2918
2919 BodyCodeGen(CGF);
2920 };
2921 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2922 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2923 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2924
2925 BodyCodeGen(CGF);
2926 };
2927 const Expr *IfCond = nullptr;
2928 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
2929 if (isOpenMPSimdDirective(DKind: EKind)) {
2930 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2931 if (CGF.getLangOpts().OpenMP >= 50 &&
2932 (C->getNameModifier() == OMPD_unknown ||
2933 C->getNameModifier() == OMPD_simd)) {
2934 IfCond = C->getCondition();
2935 break;
2936 }
2937 }
2938 }
2939 if (IfCond) {
2940 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, Cond: IfCond, ThenGen, ElseGen);
2941 } else {
2942 RegionCodeGenTy ThenRCG(ThenGen);
2943 ThenRCG(CGF);
2944 }
2945}
2946
2947static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2948 PrePostActionTy &Action) {
2949 Action.Enter(CGF);
2950 OMPLoopScope PreInitScope(CGF, S);
2951 // if (PreCond) {
2952 // for (IV in 0..LastIteration) BODY;
2953 // <Final counter/linear vars updates>;
2954 // }
2955
2956 // The presence of lower/upper bound variable depends on the actual directive
2957 // kind in the AST node. The variables must be emitted because some of the
2958 // expressions associated with the loop will use them.
2959 OpenMPDirectiveKind DKind = S.getDirectiveKind();
2960 if (isOpenMPDistributeDirective(DKind) ||
2961 isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) ||
2962 isOpenMPGenericLoopDirective(DKind)) {
2963 (void)EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: S.getLowerBoundVariable()));
2964 (void)EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: S.getUpperBoundVariable()));
2965 }
2966
2967 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
2968 // Emit: if (PreCond) - begin.
2969 // If the condition constant folds and can be elided, avoid emitting the
2970 // whole loop.
2971 bool CondConstant;
2972 llvm::BasicBlock *ContBlock = nullptr;
2973 if (CGF.ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
2974 if (!CondConstant)
2975 return;
2976 } else {
2977 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "simd.if.then");
2978 ContBlock = CGF.createBasicBlock(name: "simd.if.end");
2979 emitPreCond(CGF, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
2980 TrueCount: CGF.getProfileCount(S: &S));
2981 CGF.EmitBlock(BB: ThenBlock);
2982 CGF.incrementProfileCounter(S: &S);
2983 }
2984
2985 // Emit the loop iteration variable.
2986 const Expr *IVExpr = S.getIterationVariable();
2987 const auto *IVDecl = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IVExpr)->getDecl());
2988 CGF.EmitVarDecl(D: *IVDecl);
2989 CGF.EmitIgnoredExpr(E: S.getInit());
2990
2991 // Emit the iterations count variable.
2992 // If it is not a variable, Sema decided to calculate iterations count on
2993 // each iteration (e.g., it is foldable into a constant).
2994 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
2995 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
2996 // Emit calculation of the iterations count.
2997 CGF.EmitIgnoredExpr(E: S.getCalcLastIteration());
2998 }
2999
3000 emitAlignedClause(CGF, D: S);
3001 (void)CGF.EmitOMPLinearClauseInit(D: S);
3002 {
3003 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
3004 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
3005 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
3006 CGF.EmitOMPLinearClause(D: S, PrivateScope&: LoopScope);
3007 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
3008 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
3009 CGF, S, CGF.EmitLValue(E: S.getIterationVariable()));
3010 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
3011 (void)LoopScope.Privatize();
3012 if (isOpenMPTargetExecutionDirective(DKind: EKind))
3013 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
3014
3015 emitCommonSimdLoop(
3016 CGF, S,
3017 SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3018 CGF.EmitOMPSimdInit(D: S);
3019 },
3020 BodyCodeGen: [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3021 CGF.EmitOMPInnerLoop(
3022 S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: S.getCond(), IncExpr: S.getInc(),
3023 BodyGen: [&S](CodeGenFunction &CGF) {
3024 emitOMPLoopBodyWithStopPoint(CGF, S,
3025 LoopExit: CodeGenFunction::JumpDest());
3026 },
3027 PostIncGen: [](CodeGenFunction &) {});
3028 });
3029 CGF.EmitOMPSimdFinal(D: S, CondGen: [](CodeGenFunction &) { return nullptr; });
3030 // Emit final copy of the lastprivate variables at the end of loops.
3031 if (HasLastprivateClause)
3032 CGF.EmitOMPLastprivateClauseFinal(D: S, /*NoFinals=*/true);
3033 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_simd);
3034 emitPostUpdateForReductionClause(CGF, D: S,
3035 CondGen: [](CodeGenFunction &) { return nullptr; });
3036 LoopScope.restoreMap();
3037 CGF.EmitOMPLinearClauseFinal(D: S, CondGen: [](CodeGenFunction &) { return nullptr; });
3038 }
3039 // Emit: if (PreCond) - end.
3040 if (ContBlock) {
3041 CGF.EmitBranch(Block: ContBlock);
3042 CGF.EmitBlock(BB: ContBlock, IsFinished: true);
3043 }
3044}
3045
3046// Pass OMPLoopDirective (instead of OMPSimdDirective) to make this function
3047// available for "loop bind(thread)", which maps to "simd".
3048static bool isSimdSupportedByOpenMPIRBuilder(const OMPLoopDirective &S) {
3049 // Check for unsupported clauses
3050 for (OMPClause *C : S.clauses()) {
3051 // Currently only order, simdlen and safelen clauses are supported
3052 if (!(isa<OMPSimdlenClause>(Val: C) || isa<OMPSafelenClause>(Val: C) ||
3053 isa<OMPOrderClause>(Val: C) || isa<OMPAlignedClause>(Val: C)))
3054 return false;
3055 }
3056
3057 // Check if we have a statement with the ordered directive.
3058 // Visit the statement hierarchy to find a compound statement
3059 // with a ordered directive in it.
3060 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(Val: S.getRawStmt())) {
3061 if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) {
3062 for (const Stmt *SubStmt : SyntacticalLoop->children()) {
3063 if (!SubStmt)
3064 continue;
3065 if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(Val: SubStmt)) {
3066 for (const Stmt *CSSubStmt : CS->children()) {
3067 if (!CSSubStmt)
3068 continue;
3069 if (isa<OMPOrderedDirective>(Val: CSSubStmt)) {
3070 return false;
3071 }
3072 }
3073 }
3074 }
3075 }
3076 }
3077 return true;
3078}
3079
3080static llvm::MapVector<llvm::Value *, llvm::Value *>
3081GetAlignedMapping(const OMPLoopDirective &S, CodeGenFunction &CGF) {
3082 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars;
3083 for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) {
3084 llvm::APInt ClauseAlignment(64, 0);
3085 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
3086 auto *AlignmentCI =
3087 cast<llvm::ConstantInt>(Val: CGF.EmitScalarExpr(E: AlignmentExpr));
3088 ClauseAlignment = AlignmentCI->getValue();
3089 }
3090 for (const Expr *E : Clause->varlist()) {
3091 llvm::APInt Alignment(ClauseAlignment);
3092 if (Alignment == 0) {
3093 // OpenMP [2.8.1, Description]
3094 // If no optional parameter is specified, implementation-defined default
3095 // alignments for SIMD instructions on the target platforms are assumed.
3096 Alignment =
3097 CGF.getContext()
3098 .toCharUnitsFromBits(BitSize: CGF.getContext().getOpenMPDefaultSimdAlign(
3099 T: E->getType()->getPointeeType()))
3100 .getQuantity();
3101 }
3102 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
3103 "alignment is not power of 2");
3104 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
3105 AlignedVars[PtrValue] = CGF.Builder.getInt64(C: Alignment.getSExtValue());
3106 }
3107 }
3108 return AlignedVars;
3109}
3110
3111// Pass OMPLoopDirective (instead of OMPSimdDirective) to make this function
3112// available for "loop bind(thread)", which maps to "simd".
3113static void emitOMPSimdDirective(const OMPLoopDirective &S,
3114 CodeGenFunction &CGF, CodeGenModule &CGM) {
3115 bool UseOMPIRBuilder =
3116 CGM.getLangOpts().OpenMPIRBuilder && isSimdSupportedByOpenMPIRBuilder(S);
3117 if (UseOMPIRBuilder) {
3118 auto &&CodeGenIRBuilder = [&S, &CGM, UseOMPIRBuilder](CodeGenFunction &CGF,
3119 PrePostActionTy &) {
3120 // Use the OpenMPIRBuilder if enabled.
3121 if (UseOMPIRBuilder) {
3122 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars =
3123 GetAlignedMapping(S, CGF);
3124 // Emit the associated statement and get its loop representation.
3125 const Stmt *Inner = S.getRawStmt();
3126 llvm::CanonicalLoopInfo *CLI =
3127 CGF.EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1);
3128
3129 llvm::OpenMPIRBuilder &OMPBuilder =
3130 CGM.getOpenMPRuntime().getOMPBuilder();
3131 // Add SIMD specific metadata
3132 llvm::ConstantInt *Simdlen = nullptr;
3133 if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) {
3134 RValue Len = CGF.EmitAnyExpr(E: C->getSimdlen(), aggSlot: AggValueSlot::ignored(),
3135 /*ignoreResult=*/true);
3136 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
3137 Simdlen = Val;
3138 }
3139 llvm::ConstantInt *Safelen = nullptr;
3140 if (const auto *C = S.getSingleClause<OMPSafelenClause>()) {
3141 RValue Len = CGF.EmitAnyExpr(E: C->getSafelen(), aggSlot: AggValueSlot::ignored(),
3142 /*ignoreResult=*/true);
3143 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
3144 Safelen = Val;
3145 }
3146 llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown;
3147 if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3148 if (C->getKind() == OpenMPOrderClauseKind::OMPC_ORDER_concurrent) {
3149 Order = llvm::omp::OrderKind::OMP_ORDER_concurrent;
3150 }
3151 }
3152 // Add simd metadata to the collapsed loop. Do not generate
3153 // another loop for if clause. Support for if clause is done earlier.
3154 OMPBuilder.applySimd(Loop: CLI, AlignedVars,
3155 /*IfCond*/ nullptr, Order, Simdlen, Safelen);
3156 return;
3157 }
3158 };
3159 {
3160 auto LPCRegion =
3161 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
3162 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
3163 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_simd,
3164 CodeGen: CodeGenIRBuilder);
3165 }
3166 return;
3167 }
3168
3169 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
3170 CGF.OMPFirstScanLoop = true;
3171 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3172 emitOMPSimdRegion(CGF, S, Action);
3173 };
3174 {
3175 auto LPCRegion =
3176 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
3177 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
3178 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_simd, CodeGen);
3179 }
3180 // Check for outer lastprivate conditional update.
3181 checkForLastprivateConditionalUpdate(CGF, S);
3182}
3183
3184void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
3185 emitOMPSimdDirective(S, CGF&: *this, CGM);
3186}
3187
3188void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
3189 // Emit the de-sugared statement.
3190 OMPTransformDirectiveScopeRAII TileScope(*this, &S);
3191 EmitStmt(S: S.getTransformedStmt());
3192}
3193
3194void CodeGenFunction::EmitOMPStripeDirective(const OMPStripeDirective &S) {
3195 // Emit the de-sugared statement.
3196 OMPTransformDirectiveScopeRAII StripeScope(*this, &S);
3197 EmitStmt(S: S.getTransformedStmt());
3198}
3199
3200void CodeGenFunction::EmitOMPReverseDirective(const OMPReverseDirective &S) {
3201 // Emit the de-sugared statement.
3202 OMPTransformDirectiveScopeRAII ReverseScope(*this, &S);
3203 EmitStmt(S: S.getTransformedStmt());
3204}
3205
3206void CodeGenFunction::EmitOMPInterchangeDirective(
3207 const OMPInterchangeDirective &S) {
3208 // Emit the de-sugared statement.
3209 OMPTransformDirectiveScopeRAII InterchangeScope(*this, &S);
3210 EmitStmt(S: S.getTransformedStmt());
3211}
3212
3213void CodeGenFunction::EmitOMPFuseDirective(const OMPFuseDirective &S) {
3214 // Emit the de-sugared statement
3215 OMPTransformDirectiveScopeRAII FuseScope(*this, &S);
3216 EmitStmt(S: S.getTransformedStmt());
3217}
3218
3219void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
3220 bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
3221
3222 if (UseOMPIRBuilder) {
3223 auto DL = SourceLocToDebugLoc(Location: S.getBeginLoc());
3224 const Stmt *Inner = S.getRawStmt();
3225
3226 // Consume nested loop. Clear the entire remaining loop stack because a
3227 // fully unrolled loop is non-transformable. For partial unrolling the
3228 // generated outer loop is pushed back to the stack.
3229 llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1);
3230 OMPLoopNestStack.clear();
3231
3232 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3233
3234 bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1;
3235 llvm::CanonicalLoopInfo *UnrolledCLI = nullptr;
3236
3237 if (S.hasClausesOfKind<OMPFullClause>()) {
3238 assert(ExpectedOMPLoopDepth == 0);
3239 OMPBuilder.unrollLoopFull(DL, Loop: CLI);
3240 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
3241 uint64_t Factor = 0;
3242 if (Expr *FactorExpr = PartialClause->getFactor()) {
3243 Factor = FactorExpr->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue();
3244 assert(Factor >= 1 && "Only positive factors are valid");
3245 }
3246 OMPBuilder.unrollLoopPartial(DL, Loop: CLI, Factor,
3247 UnrolledCLI: NeedsUnrolledCLI ? &UnrolledCLI : nullptr);
3248 } else {
3249 OMPBuilder.unrollLoopHeuristic(DL, Loop: CLI);
3250 }
3251
3252 assert((!NeedsUnrolledCLI || UnrolledCLI) &&
3253 "NeedsUnrolledCLI implies UnrolledCLI to be set");
3254 if (UnrolledCLI)
3255 OMPLoopNestStack.push_back(Elt: UnrolledCLI);
3256
3257 return;
3258 }
3259
3260 // This function is only called if the unrolled loop is not consumed by any
3261 // other loop-associated construct. Such a loop-associated construct will have
3262 // used the transformed AST.
3263
3264 // Set the unroll metadata for the next emitted loop.
3265 LoopStack.setUnrollState(LoopAttributes::Enable);
3266
3267 if (S.hasClausesOfKind<OMPFullClause>()) {
3268 LoopStack.setUnrollState(LoopAttributes::Full);
3269 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
3270 if (Expr *FactorExpr = PartialClause->getFactor()) {
3271 uint64_t Factor =
3272 FactorExpr->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue();
3273 assert(Factor >= 1 && "Only positive factors are valid");
3274 LoopStack.setUnrollCount(Factor);
3275 }
3276 }
3277
3278 EmitStmt(S: S.getAssociatedStmt());
3279}
3280
3281void CodeGenFunction::EmitOMPOuterLoop(
3282 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
3283 CodeGenFunction::OMPPrivateScope &LoopScope,
3284 const CodeGenFunction::OMPLoopArguments &LoopArgs,
3285 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
3286 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
3287 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3288
3289 const Expr *IVExpr = S.getIterationVariable();
3290 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
3291 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3292
3293 JumpDest LoopExit = getJumpDestInCurrentScope(Name: "omp.dispatch.end");
3294
3295 // Start the loop with a block that tests the condition.
3296 llvm::BasicBlock *CondBlock = createBasicBlock(name: "omp.dispatch.cond");
3297 EmitBlock(BB: CondBlock);
3298 const SourceRange R = S.getSourceRange();
3299 OMPLoopNestStack.clear();
3300 LoopStack.push(Header: CondBlock, StartLoc: SourceLocToDebugLoc(Location: R.getBegin()),
3301 EndLoc: SourceLocToDebugLoc(Location: R.getEnd()));
3302
3303 llvm::Value *BoolCondVal = nullptr;
3304 if (!DynamicOrOrdered) {
3305 // UB = min(UB, GlobalUB) or
3306 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
3307 // 'distribute parallel for')
3308 EmitIgnoredExpr(E: LoopArgs.EUB);
3309 // IV = LB
3310 EmitIgnoredExpr(E: LoopArgs.Init);
3311 // IV < UB
3312 BoolCondVal = EvaluateExprAsBool(E: LoopArgs.Cond);
3313 } else {
3314 BoolCondVal =
3315 RT.emitForNext(CGF&: *this, Loc: S.getBeginLoc(), IVSize, IVSigned, IL: LoopArgs.IL,
3316 LB: LoopArgs.LB, UB: LoopArgs.UB, ST: LoopArgs.ST);
3317 }
3318
3319 // If there are any cleanups between here and the loop-exit scope,
3320 // create a block to stage a loop exit along.
3321 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
3322 if (LoopScope.requiresCleanups())
3323 ExitBlock = createBasicBlock(name: "omp.dispatch.cleanup");
3324
3325 llvm::BasicBlock *LoopBody = createBasicBlock(name: "omp.dispatch.body");
3326 Builder.CreateCondBr(Cond: BoolCondVal, True: LoopBody, False: ExitBlock);
3327 if (ExitBlock != LoopExit.getBlock()) {
3328 EmitBlock(BB: ExitBlock);
3329 EmitBranchThroughCleanup(Dest: LoopExit);
3330 }
3331 EmitBlock(BB: LoopBody);
3332
3333 // Emit "IV = LB" (in case of static schedule, we have already calculated new
3334 // LB for loop condition and emitted it above).
3335 if (DynamicOrOrdered)
3336 EmitIgnoredExpr(E: LoopArgs.Init);
3337
3338 // Create a block for the increment.
3339 JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.dispatch.inc");
3340 BreakContinueStack.push_back(Elt: BreakContinue(S, LoopExit, Continue));
3341
3342 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3343 emitCommonSimdLoop(
3344 CGF&: *this, S,
3345 SimdInitGen: [&S, IsMonotonic, EKind](CodeGenFunction &CGF, PrePostActionTy &) {
3346 // Generate !llvm.loop.parallel metadata for loads and stores for loops
3347 // with dynamic/guided scheduling and without ordered clause.
3348 if (!isOpenMPSimdDirective(DKind: EKind)) {
3349 CGF.LoopStack.setParallel(!IsMonotonic);
3350 if (const auto *C = S.getSingleClause<OMPOrderClause>())
3351 if (C->getKind() == OMPC_ORDER_concurrent)
3352 CGF.LoopStack.setParallel(/*Enable=*/true);
3353 } else {
3354 CGF.EmitOMPSimdInit(D: S);
3355 }
3356 },
3357 BodyCodeGen: [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
3358 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3359 SourceLocation Loc = S.getBeginLoc();
3360 // when 'distribute' is not combined with a 'for':
3361 // while (idx <= UB) { BODY; ++idx; }
3362 // when 'distribute' is combined with a 'for'
3363 // (e.g. 'distribute parallel for')
3364 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3365 CGF.EmitOMPInnerLoop(
3366 S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: LoopArgs.Cond, IncExpr: LoopArgs.IncExpr,
3367 BodyGen: [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3368 CodeGenLoop(CGF, S, LoopExit);
3369 },
3370 PostIncGen: [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
3371 CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
3372 });
3373 });
3374
3375 EmitBlock(BB: Continue.getBlock());
3376 BreakContinueStack.pop_back();
3377 if (!DynamicOrOrdered) {
3378 // Emit "LB = LB + Stride", "UB = UB + Stride".
3379 EmitIgnoredExpr(E: LoopArgs.NextLB);
3380 EmitIgnoredExpr(E: LoopArgs.NextUB);
3381 }
3382
3383 EmitBranch(Block: CondBlock);
3384 OMPLoopNestStack.clear();
3385 LoopStack.pop();
3386 // Emit the fall-through block.
3387 EmitBlock(BB: LoopExit.getBlock());
3388
3389 // Tell the runtime we are done.
3390 auto &&CodeGen = [DynamicOrOrdered, &S, &LoopArgs](CodeGenFunction &CGF) {
3391 if (!DynamicOrOrdered)
3392 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, Loc: S.getEndLoc(),
3393 DKind: LoopArgs.DKind);
3394 };
3395 OMPCancelStack.emitExit(CGF&: *this, Kind: EKind, CodeGen);
3396}
3397
3398void CodeGenFunction::EmitOMPForOuterLoop(
3399 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
3400 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
3401 const OMPLoopArguments &LoopArgs,
3402 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3403 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3404
3405 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
3406 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind: ScheduleKind.Schedule);
3407
3408 assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule,
3409 LoopArgs.Chunk != nullptr)) &&
3410 "static non-chunked schedule does not need outer loop");
3411
3412 // Emit outer loop.
3413 //
3414 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3415 // When schedule(dynamic,chunk_size) is specified, the iterations are
3416 // distributed to threads in the team in chunks as the threads request them.
3417 // Each thread executes a chunk of iterations, then requests another chunk,
3418 // until no chunks remain to be distributed. Each chunk contains chunk_size
3419 // iterations, except for the last chunk to be distributed, which may have
3420 // fewer iterations. When no chunk_size is specified, it defaults to 1.
3421 //
3422 // When schedule(guided,chunk_size) is specified, the iterations are assigned
3423 // to threads in the team in chunks as the executing threads request them.
3424 // Each thread executes a chunk of iterations, then requests another chunk,
3425 // until no chunks remain to be assigned. For a chunk_size of 1, the size of
3426 // each chunk is proportional to the number of unassigned iterations divided
3427 // by the number of threads in the team, decreasing to 1. For a chunk_size
3428 // with value k (greater than 1), the size of each chunk is determined in the
3429 // same way, with the restriction that the chunks do not contain fewer than k
3430 // iterations (except for the last chunk to be assigned, which may have fewer
3431 // than k iterations).
3432 //
3433 // When schedule(auto) is specified, the decision regarding scheduling is
3434 // delegated to the compiler and/or runtime system. The programmer gives the
3435 // implementation the freedom to choose any possible mapping of iterations to
3436 // threads in the team.
3437 //
3438 // When schedule(runtime) is specified, the decision regarding scheduling is
3439 // deferred until run time, and the schedule and chunk size are taken from the
3440 // run-sched-var ICV. If the ICV is set to auto, the schedule is
3441 // implementation defined
3442 //
3443 // __kmpc_dispatch_init();
3444 // while(__kmpc_dispatch_next(&LB, &UB)) {
3445 // idx = LB;
3446 // while (idx <= UB) { BODY; ++idx;
3447 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
3448 // } // inner loop
3449 // }
3450 // __kmpc_dispatch_deinit();
3451 //
3452 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3453 // When schedule(static, chunk_size) is specified, iterations are divided into
3454 // chunks of size chunk_size, and the chunks are assigned to the threads in
3455 // the team in a round-robin fashion in the order of the thread number.
3456 //
3457 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
3458 // while (idx <= UB) { BODY; ++idx; } // inner loop
3459 // LB = LB + ST;
3460 // UB = UB + ST;
3461 // }
3462 //
3463
3464 const Expr *IVExpr = S.getIterationVariable();
3465 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
3466 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3467
3468 if (DynamicOrOrdered) {
3469 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
3470 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
3471 llvm::Value *LBVal = DispatchBounds.first;
3472 llvm::Value *UBVal = DispatchBounds.second;
3473 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
3474 LoopArgs.Chunk};
3475 RT.emitForDispatchInit(CGF&: *this, Loc: S.getBeginLoc(), ScheduleKind, IVSize,
3476 IVSigned, Ordered, DispatchValues: DipatchRTInputValues);
3477 } else {
3478 CGOpenMPRuntime::StaticRTInput StaticInit(
3479 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
3480 LoopArgs.ST, LoopArgs.Chunk);
3481 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3482 RT.emitForStaticInit(CGF&: *this, Loc: S.getBeginLoc(), DKind: EKind, ScheduleKind,
3483 Values: StaticInit);
3484 }
3485
3486 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
3487 const unsigned IVSize,
3488 const bool IVSigned) {
3489 if (Ordered) {
3490 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
3491 IVSigned);
3492 }
3493 };
3494
3495 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
3496 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
3497 OuterLoopArgs.IncExpr = S.getInc();
3498 OuterLoopArgs.Init = S.getInit();
3499 OuterLoopArgs.Cond = S.getCond();
3500 OuterLoopArgs.NextLB = S.getNextLowerBound();
3501 OuterLoopArgs.NextUB = S.getNextUpperBound();
3502 OuterLoopArgs.DKind = LoopArgs.DKind;
3503 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, LoopArgs: OuterLoopArgs,
3504 CodeGenLoop: emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
3505 if (DynamicOrOrdered) {
3506 RT.emitForDispatchDeinit(CGF&: *this, Loc: S.getBeginLoc());
3507 }
3508}
3509
3510static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
3511 const unsigned IVSize, const bool IVSigned) {}
3512
3513void CodeGenFunction::EmitOMPDistributeOuterLoop(
3514 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
3515 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
3516 const CodeGenLoopTy &CodeGenLoopContent) {
3517
3518 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3519
3520 // Emit outer loop.
3521 // Same behavior as a OMPForOuterLoop, except that schedule cannot be
3522 // dynamic
3523 //
3524
3525 const Expr *IVExpr = S.getIterationVariable();
3526 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
3527 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3528 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3529
3530 CGOpenMPRuntime::StaticRTInput StaticInit(
3531 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
3532 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
3533 RT.emitDistributeStaticInit(CGF&: *this, Loc: S.getBeginLoc(), SchedKind: ScheduleKind, Values: StaticInit);
3534
3535 // for combined 'distribute' and 'for' the increment expression of distribute
3536 // is stored in DistInc. For 'distribute' alone, it is in Inc.
3537 Expr *IncExpr;
3538 if (isOpenMPLoopBoundSharingDirective(Kind: EKind))
3539 IncExpr = S.getDistInc();
3540 else
3541 IncExpr = S.getInc();
3542
3543 // this routine is shared by 'omp distribute parallel for' and
3544 // 'omp distribute': select the right EUB expression depending on the
3545 // directive
3546 OMPLoopArguments OuterLoopArgs;
3547 OuterLoopArgs.LB = LoopArgs.LB;
3548 OuterLoopArgs.UB = LoopArgs.UB;
3549 OuterLoopArgs.ST = LoopArgs.ST;
3550 OuterLoopArgs.IL = LoopArgs.IL;
3551 OuterLoopArgs.Chunk = LoopArgs.Chunk;
3552 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3553 ? S.getCombinedEnsureUpperBound()
3554 : S.getEnsureUpperBound();
3555 OuterLoopArgs.IncExpr = IncExpr;
3556 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3557 ? S.getCombinedInit()
3558 : S.getInit();
3559 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3560 ? S.getCombinedCond()
3561 : S.getCond();
3562 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3563 ? S.getCombinedNextLowerBound()
3564 : S.getNextLowerBound();
3565 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3566 ? S.getCombinedNextUpperBound()
3567 : S.getNextUpperBound();
3568 OuterLoopArgs.DKind = OMPD_distribute;
3569
3570 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
3571 LoopScope, LoopArgs: OuterLoopArgs, CodeGenLoop: CodeGenLoopContent,
3572 CodeGenOrdered: emitEmptyOrdered);
3573}
3574
3575static std::pair<LValue, LValue>
3576emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
3577 const OMPExecutableDirective &S) {
3578 const OMPLoopDirective &LS = cast<OMPLoopDirective>(Val: S);
3579 LValue LB =
3580 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getLowerBoundVariable()));
3581 LValue UB =
3582 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getUpperBoundVariable()));
3583
3584 // When composing 'distribute' with 'for' (e.g. as in 'distribute
3585 // parallel for') we need to use the 'distribute'
3586 // chunk lower and upper bounds rather than the whole loop iteration
3587 // space. These are parameters to the outlined function for 'parallel'
3588 // and we copy the bounds of the previous schedule into the
3589 // the current ones.
3590 LValue PrevLB = CGF.EmitLValue(E: LS.getPrevLowerBoundVariable());
3591 LValue PrevUB = CGF.EmitLValue(E: LS.getPrevUpperBoundVariable());
3592 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
3593 lvalue: PrevLB, Loc: LS.getPrevLowerBoundVariable()->getExprLoc());
3594 PrevLBVal = CGF.EmitScalarConversion(
3595 Src: PrevLBVal, SrcTy: LS.getPrevLowerBoundVariable()->getType(),
3596 DstTy: LS.getIterationVariable()->getType(),
3597 Loc: LS.getPrevLowerBoundVariable()->getExprLoc());
3598 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
3599 lvalue: PrevUB, Loc: LS.getPrevUpperBoundVariable()->getExprLoc());
3600 PrevUBVal = CGF.EmitScalarConversion(
3601 Src: PrevUBVal, SrcTy: LS.getPrevUpperBoundVariable()->getType(),
3602 DstTy: LS.getIterationVariable()->getType(),
3603 Loc: LS.getPrevUpperBoundVariable()->getExprLoc());
3604
3605 CGF.EmitStoreOfScalar(value: PrevLBVal, lvalue: LB);
3606 CGF.EmitStoreOfScalar(value: PrevUBVal, lvalue: UB);
3607
3608 return {LB, UB};
3609}
3610
3611/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
3612/// we need to use the LB and UB expressions generated by the worksharing
3613/// code generation support, whereas in non combined situations we would
3614/// just emit 0 and the LastIteration expression
3615/// This function is necessary due to the difference of the LB and UB
3616/// types for the RT emission routines for 'for_static_init' and
3617/// 'for_dispatch_init'
3618static std::pair<llvm::Value *, llvm::Value *>
3619emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
3620 const OMPExecutableDirective &S,
3621 Address LB, Address UB) {
3622 const OMPLoopDirective &LS = cast<OMPLoopDirective>(Val: S);
3623 const Expr *IVExpr = LS.getIterationVariable();
3624 // when implementing a dynamic schedule for a 'for' combined with a
3625 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
3626 // is not normalized as each team only executes its own assigned
3627 // distribute chunk
3628 QualType IteratorTy = IVExpr->getType();
3629 llvm::Value *LBVal =
3630 CGF.EmitLoadOfScalar(Addr: LB, /*Volatile=*/false, Ty: IteratorTy, Loc: S.getBeginLoc());
3631 llvm::Value *UBVal =
3632 CGF.EmitLoadOfScalar(Addr: UB, /*Volatile=*/false, Ty: IteratorTy, Loc: S.getBeginLoc());
3633 return {LBVal, UBVal};
3634}
3635
3636static void emitDistributeParallelForDistributeInnerBoundParams(
3637 CodeGenFunction &CGF, const OMPExecutableDirective &S,
3638 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
3639 const auto &Dir = cast<OMPLoopDirective>(Val: S);
3640 LValue LB =
3641 CGF.EmitLValue(E: cast<DeclRefExpr>(Val: Dir.getCombinedLowerBoundVariable()));
3642 llvm::Value *LBCast = CGF.Builder.CreateIntCast(
3643 V: CGF.Builder.CreateLoad(Addr: LB.getAddress()), DestTy: CGF.SizeTy, /*isSigned=*/false);
3644 CapturedVars.push_back(Elt: LBCast);
3645 LValue UB =
3646 CGF.EmitLValue(E: cast<DeclRefExpr>(Val: Dir.getCombinedUpperBoundVariable()));
3647
3648 llvm::Value *UBCast = CGF.Builder.CreateIntCast(
3649 V: CGF.Builder.CreateLoad(Addr: UB.getAddress()), DestTy: CGF.SizeTy, /*isSigned=*/false);
3650 CapturedVars.push_back(Elt: UBCast);
3651}
3652
3653static void
3654emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
3655 const OMPLoopDirective &S,
3656 CodeGenFunction::JumpDest LoopExit) {
3657 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3658 auto &&CGInlinedWorksharingLoop = [&S, EKind](CodeGenFunction &CGF,
3659 PrePostActionTy &Action) {
3660 Action.Enter(CGF);
3661 bool HasCancel = false;
3662 if (!isOpenMPSimdDirective(DKind: EKind)) {
3663 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(Val: &S))
3664 HasCancel = D->hasCancel();
3665 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(Val: &S))
3666 HasCancel = D->hasCancel();
3667 else if (const auto *D =
3668 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(Val: &S))
3669 HasCancel = D->hasCancel();
3670 }
3671 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
3672 CGF.EmitOMPWorksharingLoop(S, EUB: S.getPrevEnsureUpperBound(),
3673 CodeGenLoopBounds: emitDistributeParallelForInnerBounds,
3674 CGDispatchBounds: emitDistributeParallelForDispatchBounds);
3675 };
3676
3677 emitCommonOMPParallelDirective(
3678 CGF, S, InnermostKind: isOpenMPSimdDirective(DKind: EKind) ? OMPD_for_simd : OMPD_for,
3679 CodeGen: CGInlinedWorksharingLoop,
3680 CodeGenBoundParameters: emitDistributeParallelForDistributeInnerBoundParams);
3681}
3682
3683void CodeGenFunction::EmitOMPDistributeParallelForDirective(
3684 const OMPDistributeParallelForDirective &S) {
3685 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3686 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
3687 IncExpr: S.getDistInc());
3688 };
3689 OMPLexicalScope Scope(*this, S, OMPD_parallel);
3690 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_distribute, CodeGen);
3691}
3692
3693void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
3694 const OMPDistributeParallelForSimdDirective &S) {
3695 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3696 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
3697 IncExpr: S.getDistInc());
3698 };
3699 OMPLexicalScope Scope(*this, S, OMPD_parallel);
3700 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_distribute, CodeGen);
3701}
3702
3703void CodeGenFunction::EmitOMPDistributeSimdDirective(
3704 const OMPDistributeSimdDirective &S) {
3705 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3706 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
3707 };
3708 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3709 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_simd, CodeGen);
3710}
3711
3712void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
3713 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
3714 // Emit SPMD target parallel for region as a standalone region.
3715 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3716 emitOMPSimdRegion(CGF, S, Action);
3717 };
3718 llvm::Function *Fn;
3719 llvm::Constant *Addr;
3720 // Emit target region as a standalone region.
3721 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3722 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
3723 assert(Fn && Addr && "Target device function emission failed.");
3724}
3725
3726void CodeGenFunction::EmitOMPTargetSimdDirective(
3727 const OMPTargetSimdDirective &S) {
3728 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3729 emitOMPSimdRegion(CGF, S, Action);
3730 };
3731 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
3732}
3733
3734namespace {
3735struct ScheduleKindModifiersTy {
3736 OpenMPScheduleClauseKind Kind;
3737 OpenMPScheduleClauseModifier M1;
3738 OpenMPScheduleClauseModifier M2;
3739 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
3740 OpenMPScheduleClauseModifier M1,
3741 OpenMPScheduleClauseModifier M2)
3742 : Kind(Kind), M1(M1), M2(M2) {}
3743};
3744} // namespace
3745
3746bool CodeGenFunction::EmitOMPWorksharingLoop(
3747 const OMPLoopDirective &S, Expr *EUB,
3748 const CodeGenLoopBoundsTy &CodeGenLoopBounds,
3749 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3750 // Emit the loop iteration variable.
3751 const auto *IVExpr = cast<DeclRefExpr>(Val: S.getIterationVariable());
3752 const auto *IVDecl = cast<VarDecl>(Val: IVExpr->getDecl());
3753 EmitVarDecl(D: *IVDecl);
3754
3755 // Emit the iterations count variable.
3756 // If it is not a variable, Sema decided to calculate iterations count on each
3757 // iteration (e.g., it is foldable into a constant).
3758 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
3759 EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
3760 // Emit calculation of the iterations count.
3761 EmitIgnoredExpr(E: S.getCalcLastIteration());
3762 }
3763
3764 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3765
3766 bool HasLastprivateClause;
3767 // Check pre-condition.
3768 {
3769 OMPLoopScope PreInitScope(*this, S);
3770 // Skip the entire loop if we don't meet the precondition.
3771 // If the condition constant folds and can be elided, avoid emitting the
3772 // whole loop.
3773 bool CondConstant;
3774 llvm::BasicBlock *ContBlock = nullptr;
3775 if (ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
3776 if (!CondConstant)
3777 return false;
3778 } else {
3779 llvm::BasicBlock *ThenBlock = createBasicBlock(name: "omp.precond.then");
3780 ContBlock = createBasicBlock(name: "omp.precond.end");
3781 emitPreCond(CGF&: *this, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
3782 TrueCount: getProfileCount(S: &S));
3783 EmitBlock(BB: ThenBlock);
3784 incrementProfileCounter(S: &S);
3785 }
3786
3787 RunCleanupsScope DoacrossCleanupScope(*this);
3788 bool Ordered = false;
3789 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
3790 if (OrderedClause->getNumForLoops())
3791 RT.emitDoacrossInit(CGF&: *this, D: S, NumIterations: OrderedClause->getLoopNumIterations());
3792 else
3793 Ordered = true;
3794 }
3795
3796 emitAlignedClause(CGF&: *this, D: S);
3797 bool HasLinears = EmitOMPLinearClauseInit(D: S);
3798 // Emit helper vars inits.
3799
3800 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
3801 LValue LB = Bounds.first;
3802 LValue UB = Bounds.second;
3803 LValue ST =
3804 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable()));
3805 LValue IL =
3806 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable()));
3807
3808 // Emit 'then' code.
3809 {
3810 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3811 OMPPrivateScope LoopScope(*this);
3812 if (EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope) || HasLinears) {
3813 // Emit implicit barrier to synchronize threads and avoid data races on
3814 // initialization of firstprivate variables and post-update of
3815 // lastprivate variables.
3816 CGM.getOpenMPRuntime().emitBarrierCall(
3817 CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
3818 /*ForceSimpleCall=*/true);
3819 }
3820 EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
3821 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
3822 *this, S, EmitLValue(E: S.getIterationVariable()));
3823 HasLastprivateClause = EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
3824 EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
3825 EmitOMPPrivateLoopCounters(S, LoopScope);
3826 EmitOMPLinearClause(D: S, PrivateScope&: LoopScope);
3827 (void)LoopScope.Privatize();
3828 if (isOpenMPTargetExecutionDirective(DKind: EKind))
3829 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF&: *this, D: S);
3830
3831 // Detect the loop schedule kind and chunk.
3832 const Expr *ChunkExpr = nullptr;
3833 OpenMPScheduleTy ScheduleKind;
3834 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
3835 ScheduleKind.Schedule = C->getScheduleKind();
3836 ScheduleKind.M1 = C->getFirstScheduleModifier();
3837 ScheduleKind.M2 = C->getSecondScheduleModifier();
3838 ChunkExpr = C->getChunkSize();
3839 } else {
3840 // Default behaviour for schedule clause.
3841 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
3842 CGF&: *this, S, ScheduleKind&: ScheduleKind.Schedule, ChunkExpr);
3843 }
3844 bool HasChunkSizeOne = false;
3845 llvm::Value *Chunk = nullptr;
3846 if (ChunkExpr) {
3847 Chunk = EmitScalarExpr(E: ChunkExpr);
3848 Chunk = EmitScalarConversion(Src: Chunk, SrcTy: ChunkExpr->getType(),
3849 DstTy: S.getIterationVariable()->getType(),
3850 Loc: S.getBeginLoc());
3851 Expr::EvalResult Result;
3852 if (ChunkExpr->EvaluateAsInt(Result, Ctx: getContext())) {
3853 llvm::APSInt EvaluatedChunk = Result.Val.getInt();
3854 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
3855 }
3856 }
3857 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
3858 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3859 // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3860 // If the static schedule kind is specified or if the ordered clause is
3861 // specified, and if no monotonic modifier is specified, the effect will
3862 // be as if the monotonic modifier was specified.
3863 bool StaticChunkedOne =
3864 RT.isStaticChunked(ScheduleKind: ScheduleKind.Schedule,
3865 /* Chunked */ Chunk != nullptr) &&
3866 HasChunkSizeOne && isOpenMPLoopBoundSharingDirective(Kind: EKind);
3867 bool IsMonotonic =
3868 Ordered ||
3869 (ScheduleKind.Schedule == OMPC_SCHEDULE_static &&
3870 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
3871 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
3872 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
3873 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
3874 if ((RT.isStaticNonchunked(ScheduleKind: ScheduleKind.Schedule,
3875 /* Chunked */ Chunk != nullptr) ||
3876 StaticChunkedOne) &&
3877 !Ordered) {
3878 JumpDest LoopExit =
3879 getJumpDestInCurrentScope(Target: createBasicBlock(name: "omp.loop.exit"));
3880 emitCommonSimdLoop(
3881 CGF&: *this, S,
3882 SimdInitGen: [&S, EKind](CodeGenFunction &CGF, PrePostActionTy &) {
3883 if (isOpenMPSimdDirective(DKind: EKind)) {
3884 CGF.EmitOMPSimdInit(D: S);
3885 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3886 if (C->getKind() == OMPC_ORDER_concurrent)
3887 CGF.LoopStack.setParallel(/*Enable=*/true);
3888 }
3889 },
3890 BodyCodeGen: [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3891 &S, ScheduleKind, LoopExit, EKind,
3892 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3893 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3894 // When no chunk_size is specified, the iteration space is divided
3895 // into chunks that are approximately equal in size, and at most
3896 // one chunk is distributed to each thread. Note that the size of
3897 // the chunks is unspecified in this case.
3898 CGOpenMPRuntime::StaticRTInput StaticInit(
3899 IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
3900 UB.getAddress(), ST.getAddress(),
3901 StaticChunkedOne ? Chunk : nullptr);
3902 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3903 CGF, Loc: S.getBeginLoc(), DKind: EKind, ScheduleKind, Values: StaticInit);
3904 // UB = min(UB, GlobalUB);
3905 if (!StaticChunkedOne)
3906 CGF.EmitIgnoredExpr(E: S.getEnsureUpperBound());
3907 // IV = LB;
3908 CGF.EmitIgnoredExpr(E: S.getInit());
3909 // For unchunked static schedule generate:
3910 //
3911 // while (idx <= UB) {
3912 // BODY;
3913 // ++idx;
3914 // }
3915 //
3916 // For static schedule with chunk one:
3917 //
3918 // while (IV <= PrevUB) {
3919 // BODY;
3920 // IV += ST;
3921 // }
3922 CGF.EmitOMPInnerLoop(
3923 S, RequiresCleanup: LoopScope.requiresCleanups(),
3924 LoopCond: StaticChunkedOne ? S.getCombinedParForInDistCond()
3925 : S.getCond(),
3926 IncExpr: StaticChunkedOne ? S.getDistInc() : S.getInc(),
3927 BodyGen: [&S, LoopExit](CodeGenFunction &CGF) {
3928 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3929 },
3930 PostIncGen: [](CodeGenFunction &) {});
3931 });
3932 EmitBlock(BB: LoopExit.getBlock());
3933 // Tell the runtime we are done.
3934 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3935 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, Loc: S.getEndLoc(),
3936 DKind: OMPD_for);
3937 };
3938 OMPCancelStack.emitExit(CGF&: *this, Kind: EKind, CodeGen);
3939 } else {
3940 // Emit the outer loop, which requests its work chunk [LB..UB] from
3941 // runtime and runs the inner loop to process it.
3942 OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
3943 ST.getAddress(), IL.getAddress(), Chunk,
3944 EUB);
3945 LoopArguments.DKind = OMPD_for;
3946 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3947 LoopArgs: LoopArguments, CGDispatchBounds);
3948 }
3949 if (isOpenMPSimdDirective(DKind: EKind)) {
3950 EmitOMPSimdFinal(D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
3951 return CGF.Builder.CreateIsNotNull(
3952 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
3953 });
3954 }
3955 EmitOMPReductionClauseFinal(
3956 D: S, /*ReductionKind=*/isOpenMPSimdDirective(DKind: EKind)
3957 ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3958 : /*Parallel only*/ OMPD_parallel);
3959 // Emit post-update of the reduction variables if IsLastIter != 0.
3960 emitPostUpdateForReductionClause(
3961 CGF&: *this, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
3962 return CGF.Builder.CreateIsNotNull(
3963 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
3964 });
3965 // Emit final copy of the lastprivate variables if IsLastIter != 0.
3966 if (HasLastprivateClause)
3967 EmitOMPLastprivateClauseFinal(
3968 D: S, NoFinals: isOpenMPSimdDirective(DKind: EKind),
3969 IsLastIterCond: Builder.CreateIsNotNull(Arg: EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())));
3970 LoopScope.restoreMap();
3971 EmitOMPLinearClauseFinal(D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
3972 return CGF.Builder.CreateIsNotNull(
3973 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
3974 });
3975 }
3976 DoacrossCleanupScope.ForceCleanup();
3977 // We're now done with the loop, so jump to the continuation block.
3978 if (ContBlock) {
3979 EmitBranch(Block: ContBlock);
3980 EmitBlock(BB: ContBlock, /*IsFinished=*/true);
3981 }
3982 }
3983 return HasLastprivateClause;
3984}
3985
3986/// The following two functions generate expressions for the loop lower
3987/// and upper bounds in case of static and dynamic (dispatch) schedule
3988/// of the associated 'for' or 'distribute' loop.
3989static std::pair<LValue, LValue>
3990emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3991 const auto &LS = cast<OMPLoopDirective>(Val: S);
3992 LValue LB =
3993 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getLowerBoundVariable()));
3994 LValue UB =
3995 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getUpperBoundVariable()));
3996 return {LB, UB};
3997}
3998
3999/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
4000/// consider the lower and upper bound expressions generated by the
4001/// worksharing loop support, but we use 0 and the iteration space size as
4002/// constants
4003static std::pair<llvm::Value *, llvm::Value *>
4004emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
4005 Address LB, Address UB) {
4006 const auto &LS = cast<OMPLoopDirective>(Val: S);
4007 const Expr *IVExpr = LS.getIterationVariable();
4008 const unsigned IVSize = CGF.getContext().getTypeSize(T: IVExpr->getType());
4009 llvm::Value *LBVal = CGF.Builder.getIntN(N: IVSize, C: 0);
4010 llvm::Value *UBVal = CGF.EmitScalarExpr(E: LS.getLastIteration());
4011 return {LBVal, UBVal};
4012}
4013
4014/// Emits internal temp array declarations for the directive with inscan
4015/// reductions.
4016/// The code is the following:
4017/// \code
4018/// size num_iters = <num_iters>;
4019/// <type> buffer[num_iters];
4020/// \endcode
4021static void emitScanBasedDirectiveDecls(
4022 CodeGenFunction &CGF, const OMPLoopDirective &S,
4023 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
4024 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
4025 V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false);
4026 SmallVector<const Expr *, 4> Shareds;
4027 SmallVector<const Expr *, 4> Privates;
4028 SmallVector<const Expr *, 4> ReductionOps;
4029 SmallVector<const Expr *, 4> CopyArrayTemps;
4030 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4031 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
4032 "Only inscan reductions are expected.");
4033 Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
4034 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
4035 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
4036 CopyArrayTemps.append(in_start: C->copy_array_temps().begin(),
4037 in_end: C->copy_array_temps().end());
4038 }
4039 {
4040 // Emit buffers for each reduction variables.
4041 // ReductionCodeGen is required to emit correctly the code for array
4042 // reductions.
4043 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
4044 unsigned Count = 0;
4045 auto *ITA = CopyArrayTemps.begin();
4046 for (const Expr *IRef : Privates) {
4047 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IRef)->getDecl());
4048 // Emit variably modified arrays, used for arrays/array sections
4049 // reductions.
4050 if (PrivateVD->getType()->isVariablyModifiedType()) {
4051 RedCG.emitSharedOrigLValue(CGF, N: Count);
4052 RedCG.emitAggregateType(CGF, N: Count);
4053 }
4054 CodeGenFunction::OpaqueValueMapping DimMapping(
4055 CGF,
4056 cast<OpaqueValueExpr>(
4057 Val: cast<VariableArrayType>(Val: (*ITA)->getType()->getAsArrayTypeUnsafe())
4058 ->getSizeExpr()),
4059 RValue::get(V: OMPScanNumIterations));
4060 // Emit temp buffer.
4061 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ITA)->getDecl()));
4062 ++ITA;
4063 ++Count;
4064 }
4065 }
4066}
4067
4068/// Copies final inscan reductions values to the original variables.
4069/// The code is the following:
4070/// \code
4071/// <orig_var> = buffer[num_iters-1];
4072/// \endcode
4073static void emitScanBasedDirectiveFinals(
4074 CodeGenFunction &CGF, const OMPLoopDirective &S,
4075 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
4076 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
4077 V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false);
4078 SmallVector<const Expr *, 4> Shareds;
4079 SmallVector<const Expr *, 4> LHSs;
4080 SmallVector<const Expr *, 4> RHSs;
4081 SmallVector<const Expr *, 4> Privates;
4082 SmallVector<const Expr *, 4> CopyOps;
4083 SmallVector<const Expr *, 4> CopyArrayElems;
4084 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4085 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
4086 "Only inscan reductions are expected.");
4087 Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
4088 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
4089 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
4090 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
4091 CopyOps.append(in_start: C->copy_ops().begin(), in_end: C->copy_ops().end());
4092 CopyArrayElems.append(in_start: C->copy_array_elems().begin(),
4093 in_end: C->copy_array_elems().end());
4094 }
4095 // Create temp var and copy LHS value to this temp value.
4096 // LHS = TMP[LastIter];
4097 llvm::Value *OMPLast = CGF.Builder.CreateNSWSub(
4098 LHS: OMPScanNumIterations,
4099 RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1, /*isSigned=*/IsSigned: false));
4100 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4101 const Expr *PrivateExpr = Privates[I];
4102 const Expr *OrigExpr = Shareds[I];
4103 const Expr *CopyArrayElem = CopyArrayElems[I];
4104 CodeGenFunction::OpaqueValueMapping IdxMapping(
4105 CGF,
4106 cast<OpaqueValueExpr>(
4107 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
4108 RValue::get(V: OMPLast));
4109 LValue DestLVal = CGF.EmitLValue(E: OrigExpr);
4110 LValue SrcLVal = CGF.EmitLValue(E: CopyArrayElem);
4111 CGF.EmitOMPCopy(
4112 OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(),
4113 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
4114 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]);
4115 }
4116}
4117
4118/// Emits the code for the directive with inscan reductions.
4119/// The code is the following:
4120/// \code
4121/// #pragma omp ...
4122/// for (i: 0..<num_iters>) {
4123/// <input phase>;
4124/// buffer[i] = red;
4125/// }
4126/// #pragma omp master // in parallel region
4127/// for (int k = 0; k != ceil(log2(num_iters)); ++k)
4128/// for (size cnt = last_iter; cnt >= pow(2, k); --k)
4129/// buffer[i] op= buffer[i-pow(2,k)];
4130/// #pragma omp barrier // in parallel region
4131/// #pragma omp ...
4132/// for (0..<num_iters>) {
4133/// red = InclusiveScan ? buffer[i] : buffer[i-1];
4134/// <scan phase>;
4135/// }
4136/// \endcode
4137static void emitScanBasedDirective(
4138 CodeGenFunction &CGF, const OMPLoopDirective &S,
4139 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
4140 llvm::function_ref<void(CodeGenFunction &)> FirstGen,
4141 llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
4142 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
4143 V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false);
4144 SmallVector<const Expr *, 4> Privates;
4145 SmallVector<const Expr *, 4> ReductionOps;
4146 SmallVector<const Expr *, 4> LHSs;
4147 SmallVector<const Expr *, 4> RHSs;
4148 SmallVector<const Expr *, 4> CopyArrayElems;
4149 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4150 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
4151 "Only inscan reductions are expected.");
4152 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
4153 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
4154 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
4155 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
4156 CopyArrayElems.append(in_start: C->copy_array_elems().begin(),
4157 in_end: C->copy_array_elems().end());
4158 }
4159 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
4160 {
4161 // Emit loop with input phase:
4162 // #pragma omp ...
4163 // for (i: 0..<num_iters>) {
4164 // <input phase>;
4165 // buffer[i] = red;
4166 // }
4167 CGF.OMPFirstScanLoop = true;
4168 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4169 FirstGen(CGF);
4170 }
4171 // #pragma omp barrier // in parallel region
4172 auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems,
4173 &ReductionOps,
4174 &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) {
4175 Action.Enter(CGF);
4176 // Emit prefix reduction:
4177 // #pragma omp master // in parallel region
4178 // for (int k = 0; k <= ceil(log2(n)); ++k)
4179 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
4180 llvm::BasicBlock *LoopBB = CGF.createBasicBlock(name: "omp.outer.log.scan.body");
4181 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: "omp.outer.log.scan.exit");
4182 llvm::Function *F =
4183 CGF.CGM.getIntrinsic(IID: llvm::Intrinsic::log2, Tys: CGF.DoubleTy);
4184 llvm::Value *Arg =
4185 CGF.Builder.CreateUIToFP(V: OMPScanNumIterations, DestTy: CGF.DoubleTy);
4186 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(callee: F, args: Arg);
4187 F = CGF.CGM.getIntrinsic(IID: llvm::Intrinsic::ceil, Tys: CGF.DoubleTy);
4188 LogVal = CGF.EmitNounwindRuntimeCall(callee: F, args: LogVal);
4189 LogVal = CGF.Builder.CreateFPToUI(V: LogVal, DestTy: CGF.IntTy);
4190 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
4191 LHS: OMPScanNumIterations, RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1));
4192 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: S.getBeginLoc());
4193 CGF.EmitBlock(BB: LoopBB);
4194 auto *Counter = CGF.Builder.CreatePHI(Ty: CGF.IntTy, NumReservedValues: 2);
4195 // size pow2k = 1;
4196 auto *Pow2K = CGF.Builder.CreatePHI(Ty: CGF.SizeTy, NumReservedValues: 2);
4197 Counter->addIncoming(V: llvm::ConstantInt::get(Ty: CGF.IntTy, V: 0), BB: InputBB);
4198 Pow2K->addIncoming(V: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1), BB: InputBB);
4199 // for (size i = n - 1; i >= 2 ^ k; --i)
4200 // tmp[i] op= tmp[i-pow2k];
4201 llvm::BasicBlock *InnerLoopBB =
4202 CGF.createBasicBlock(name: "omp.inner.log.scan.body");
4203 llvm::BasicBlock *InnerExitBB =
4204 CGF.createBasicBlock(name: "omp.inner.log.scan.exit");
4205 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(LHS: NMin1, RHS: Pow2K);
4206 CGF.Builder.CreateCondBr(Cond: CmpI, True: InnerLoopBB, False: InnerExitBB);
4207 CGF.EmitBlock(BB: InnerLoopBB);
4208 auto *IVal = CGF.Builder.CreatePHI(Ty: CGF.SizeTy, NumReservedValues: 2);
4209 IVal->addIncoming(V: NMin1, BB: LoopBB);
4210 {
4211 CodeGenFunction::OMPPrivateScope PrivScope(CGF);
4212 auto *ILHS = LHSs.begin();
4213 auto *IRHS = RHSs.begin();
4214 for (const Expr *CopyArrayElem : CopyArrayElems) {
4215 const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
4216 const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
4217 Address LHSAddr = Address::invalid();
4218 {
4219 CodeGenFunction::OpaqueValueMapping IdxMapping(
4220 CGF,
4221 cast<OpaqueValueExpr>(
4222 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
4223 RValue::get(V: IVal));
4224 LHSAddr = CGF.EmitLValue(E: CopyArrayElem).getAddress();
4225 }
4226 PrivScope.addPrivate(LocalVD: LHSVD, Addr: LHSAddr);
4227 Address RHSAddr = Address::invalid();
4228 {
4229 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(LHS: IVal, RHS: Pow2K);
4230 CodeGenFunction::OpaqueValueMapping IdxMapping(
4231 CGF,
4232 cast<OpaqueValueExpr>(
4233 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
4234 RValue::get(V: OffsetIVal));
4235 RHSAddr = CGF.EmitLValue(E: CopyArrayElem).getAddress();
4236 }
4237 PrivScope.addPrivate(LocalVD: RHSVD, Addr: RHSAddr);
4238 ++ILHS;
4239 ++IRHS;
4240 }
4241 PrivScope.Privatize();
4242 CGF.CGM.getOpenMPRuntime().emitReduction(
4243 CGF, Loc: S.getEndLoc(), Privates, LHSExprs: LHSs, RHSExprs: RHSs, ReductionOps,
4244 Options: {/*WithNowait=*/true, /*SimpleReduction=*/true,
4245 /*IsPrivateVarReduction*/ {}, .ReductionKind: OMPD_unknown});
4246 }
4247 llvm::Value *NextIVal =
4248 CGF.Builder.CreateNUWSub(LHS: IVal, RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1));
4249 IVal->addIncoming(V: NextIVal, BB: CGF.Builder.GetInsertBlock());
4250 CmpI = CGF.Builder.CreateICmpUGE(LHS: NextIVal, RHS: Pow2K);
4251 CGF.Builder.CreateCondBr(Cond: CmpI, True: InnerLoopBB, False: InnerExitBB);
4252 CGF.EmitBlock(BB: InnerExitBB);
4253 llvm::Value *Next =
4254 CGF.Builder.CreateNUWAdd(LHS: Counter, RHS: llvm::ConstantInt::get(Ty: CGF.IntTy, V: 1));
4255 Counter->addIncoming(V: Next, BB: CGF.Builder.GetInsertBlock());
4256 // pow2k <<= 1;
4257 llvm::Value *NextPow2K =
4258 CGF.Builder.CreateShl(LHS: Pow2K, RHS: 1, Name: "", /*HasNUW=*/true);
4259 Pow2K->addIncoming(V: NextPow2K, BB: CGF.Builder.GetInsertBlock());
4260 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(LHS: Next, RHS: LogVal);
4261 CGF.Builder.CreateCondBr(Cond: Cmp, True: LoopBB, False: ExitBB);
4262 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: S.getEndLoc());
4263 CGF.EmitBlock(BB: ExitBB);
4264 };
4265 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
4266 if (isOpenMPParallelDirective(DKind: EKind)) {
4267 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
4268 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
4269 CGF, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
4270 /*ForceSimpleCall=*/true);
4271 } else {
4272 RegionCodeGenTy RCG(CodeGen);
4273 RCG(CGF);
4274 }
4275
4276 CGF.OMPFirstScanLoop = false;
4277 SecondGen(CGF);
4278}
4279
4280static bool emitWorksharingDirective(CodeGenFunction &CGF,
4281 const OMPLoopDirective &S,
4282 bool HasCancel) {
4283 bool HasLastprivates;
4284 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
4285 if (llvm::any_of(Range: S.getClausesOfKind<OMPReductionClause>(),
4286 P: [](const OMPReductionClause *C) {
4287 return C->getModifier() == OMPC_REDUCTION_inscan;
4288 })) {
4289 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4290 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4291 OMPLoopScope LoopScope(CGF, S);
4292 return CGF.EmitScalarExpr(E: S.getNumIterations());
4293 };
4294 const auto &&FirstGen = [&S, HasCancel, EKind](CodeGenFunction &CGF) {
4295 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
4296 (void)CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(),
4297 CodeGenLoopBounds: emitForLoopBounds,
4298 CGDispatchBounds: emitDispatchForLoopBounds);
4299 // Emit an implicit barrier at the end.
4300 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc: S.getBeginLoc(),
4301 Kind: OMPD_for);
4302 };
4303 const auto &&SecondGen = [&S, HasCancel, EKind,
4304 &HasLastprivates](CodeGenFunction &CGF) {
4305 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
4306 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(),
4307 CodeGenLoopBounds: emitForLoopBounds,
4308 CGDispatchBounds: emitDispatchForLoopBounds);
4309 };
4310 if (!isOpenMPParallelDirective(DKind: EKind))
4311 emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
4312 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
4313 if (!isOpenMPParallelDirective(DKind: EKind))
4314 emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen);
4315 } else {
4316 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
4317 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(),
4318 CodeGenLoopBounds: emitForLoopBounds,
4319 CGDispatchBounds: emitDispatchForLoopBounds);
4320 }
4321 return HasLastprivates;
4322}
4323
4324// Pass OMPLoopDirective (instead of OMPForDirective) to make this check
4325// available for "loop bind(parallel)", which maps to "for".
4326static bool isForSupportedByOpenMPIRBuilder(const OMPLoopDirective &S,
4327 bool HasCancel) {
4328 if (HasCancel)
4329 return false;
4330 for (OMPClause *C : S.clauses()) {
4331 if (isa<OMPNowaitClause, OMPBindClause>(Val: C))
4332 continue;
4333
4334 if (auto *SC = dyn_cast<OMPScheduleClause>(Val: C)) {
4335 if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
4336 return false;
4337 if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
4338 return false;
4339 switch (SC->getScheduleKind()) {
4340 case OMPC_SCHEDULE_auto:
4341 case OMPC_SCHEDULE_dynamic:
4342 case OMPC_SCHEDULE_runtime:
4343 case OMPC_SCHEDULE_guided:
4344 case OMPC_SCHEDULE_static:
4345 continue;
4346 case OMPC_SCHEDULE_unknown:
4347 return false;
4348 }
4349 }
4350
4351 return false;
4352 }
4353
4354 return true;
4355}
4356
4357static llvm::omp::ScheduleKind
4358convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) {
4359 switch (ScheduleClauseKind) {
4360 case OMPC_SCHEDULE_unknown:
4361 return llvm::omp::OMP_SCHEDULE_Default;
4362 case OMPC_SCHEDULE_auto:
4363 return llvm::omp::OMP_SCHEDULE_Auto;
4364 case OMPC_SCHEDULE_dynamic:
4365 return llvm::omp::OMP_SCHEDULE_Dynamic;
4366 case OMPC_SCHEDULE_guided:
4367 return llvm::omp::OMP_SCHEDULE_Guided;
4368 case OMPC_SCHEDULE_runtime:
4369 return llvm::omp::OMP_SCHEDULE_Runtime;
4370 case OMPC_SCHEDULE_static:
4371 return llvm::omp::OMP_SCHEDULE_Static;
4372 }
4373 llvm_unreachable("Unhandled schedule kind");
4374}
4375
4376// Pass OMPLoopDirective (instead of OMPForDirective) to make this function
4377// available for "loop bind(parallel)", which maps to "for".
4378static void emitOMPForDirective(const OMPLoopDirective &S, CodeGenFunction &CGF,
4379 CodeGenModule &CGM, bool HasCancel) {
4380 bool HasLastprivates = false;
4381 bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder &&
4382 isForSupportedByOpenMPIRBuilder(S, HasCancel);
4383 auto &&CodeGen = [&S, &CGM, HasCancel, &HasLastprivates,
4384 UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
4385 // Use the OpenMPIRBuilder if enabled.
4386 if (UseOMPIRBuilder) {
4387 bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>();
4388
4389 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default;
4390 llvm::Value *ChunkSize = nullptr;
4391 if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) {
4392 SchedKind =
4393 convertClauseKindToSchedKind(ScheduleClauseKind: SchedClause->getScheduleKind());
4394 if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize())
4395 ChunkSize = CGF.EmitScalarExpr(E: ChunkSizeExpr);
4396 }
4397
4398 // Emit the associated statement and get its loop representation.
4399 const Stmt *Inner = S.getRawStmt();
4400 llvm::CanonicalLoopInfo *CLI =
4401 CGF.EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1);
4402
4403 llvm::OpenMPIRBuilder &OMPBuilder =
4404 CGM.getOpenMPRuntime().getOMPBuilder();
4405 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
4406 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
4407 cantFail(ValOrErr: OMPBuilder.applyWorkshareLoop(
4408 DL: CGF.Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier,
4409 SchedKind, ChunkSize, /*HasSimdModifier=*/false,
4410 /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
4411 /*HasOrderedClause=*/false));
4412 return;
4413 }
4414
4415 HasLastprivates = emitWorksharingDirective(CGF, S, HasCancel);
4416 };
4417 {
4418 auto LPCRegion =
4419 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
4420 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
4421 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_for, CodeGen,
4422 HasCancel);
4423 }
4424
4425 if (!UseOMPIRBuilder) {
4426 // Emit an implicit barrier at the end.
4427 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
4428 CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc: S.getBeginLoc(), Kind: OMPD_for);
4429 }
4430 // Check for outer lastprivate conditional update.
4431 checkForLastprivateConditionalUpdate(CGF, S);
4432}
4433
4434void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
4435 return emitOMPForDirective(S, CGF&: *this, CGM, HasCancel: S.hasCancel());
4436}
4437
4438void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
4439 bool HasLastprivates = false;
4440 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
4441 PrePostActionTy &) {
4442 HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4443 };
4444 {
4445 auto LPCRegion =
4446 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4447 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4448 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_simd, CodeGen);
4449 }
4450
4451 // Emit an implicit barrier at the end.
4452 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
4453 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_for);
4454 // Check for outer lastprivate conditional update.
4455 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4456}
4457
4458static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
4459 const Twine &Name,
4460 llvm::Value *Init = nullptr) {
4461 LValue LVal = CGF.MakeAddrLValue(Addr: CGF.CreateMemTemp(T: Ty, Name), T: Ty);
4462 if (Init)
4463 CGF.EmitStoreThroughLValue(Src: RValue::get(V: Init), Dst: LVal, /*isInit*/ true);
4464 return LVal;
4465}
4466
4467void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
4468 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4469 const auto *CS = dyn_cast<CompoundStmt>(Val: CapturedStmt);
4470 bool HasLastprivates = false;
4471 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
4472 auto &&CodeGen = [&S, CapturedStmt, CS, EKind,
4473 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
4474 const ASTContext &C = CGF.getContext();
4475 QualType KmpInt32Ty =
4476 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4477 // Emit helper vars inits.
4478 LValue LB = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.lb.",
4479 Init: CGF.Builder.getInt32(C: 0));
4480 llvm::ConstantInt *GlobalUBVal = CS != nullptr
4481 ? CGF.Builder.getInt32(C: CS->size() - 1)
4482 : CGF.Builder.getInt32(C: 0);
4483 LValue UB =
4484 createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.ub.", Init: GlobalUBVal);
4485 LValue ST = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.st.",
4486 Init: CGF.Builder.getInt32(C: 1));
4487 LValue IL = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.il.",
4488 Init: CGF.Builder.getInt32(C: 0));
4489 // Loop counter.
4490 LValue IV = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.iv.");
4491 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4492 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
4493 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4494 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
4495 // Generate condition for loop.
4496 BinaryOperator *Cond = BinaryOperator::Create(
4497 C, lhs: &IVRefExpr, rhs: &UBRefExpr, opc: BO_LE, ResTy: C.BoolTy, VK: VK_PRValue, OK: OK_Ordinary,
4498 opLoc: S.getBeginLoc(), FPFeatures: FPOptionsOverride());
4499 // Increment for loop counter.
4500 UnaryOperator *Inc = UnaryOperator::Create(
4501 C, input: &IVRefExpr, opc: UO_PreInc, type: KmpInt32Ty, VK: VK_PRValue, OK: OK_Ordinary,
4502 l: S.getBeginLoc(), CanOverflow: true, FPFeatures: FPOptionsOverride());
4503 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
4504 // Iterate through all sections and emit a switch construct:
4505 // switch (IV) {
4506 // case 0:
4507 // <SectionStmt[0]>;
4508 // break;
4509 // ...
4510 // case <NumSection> - 1:
4511 // <SectionStmt[<NumSection> - 1]>;
4512 // break;
4513 // }
4514 // .omp.sections.exit:
4515 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".omp.sections.exit");
4516 llvm::SwitchInst *SwitchStmt =
4517 CGF.Builder.CreateSwitch(V: CGF.EmitLoadOfScalar(lvalue: IV, Loc: S.getBeginLoc()),
4518 Dest: ExitBB, NumCases: CS == nullptr ? 1 : CS->size());
4519 if (CS) {
4520 unsigned CaseNumber = 0;
4521 for (const Stmt *SubStmt : CS->children()) {
4522 auto CaseBB = CGF.createBasicBlock(name: ".omp.sections.case");
4523 CGF.EmitBlock(BB: CaseBB);
4524 SwitchStmt->addCase(OnVal: CGF.Builder.getInt32(C: CaseNumber), Dest: CaseBB);
4525 CGF.EmitStmt(S: SubStmt);
4526 CGF.EmitBranch(Block: ExitBB);
4527 ++CaseNumber;
4528 }
4529 } else {
4530 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(name: ".omp.sections.case");
4531 CGF.EmitBlock(BB: CaseBB);
4532 SwitchStmt->addCase(OnVal: CGF.Builder.getInt32(C: 0), Dest: CaseBB);
4533 CGF.EmitStmt(S: CapturedStmt);
4534 CGF.EmitBranch(Block: ExitBB);
4535 }
4536 CGF.EmitBlock(BB: ExitBB, /*IsFinished=*/true);
4537 };
4538
4539 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
4540 if (CGF.EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope)) {
4541 // Emit implicit barrier to synchronize threads and avoid data races on
4542 // initialization of firstprivate variables and post-update of lastprivate
4543 // variables.
4544 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
4545 CGF, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
4546 /*ForceSimpleCall=*/true);
4547 }
4548 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
4549 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
4550 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
4551 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
4552 (void)LoopScope.Privatize();
4553 if (isOpenMPTargetExecutionDirective(DKind: EKind))
4554 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
4555
4556 // Emit static non-chunked loop.
4557 OpenMPScheduleTy ScheduleKind;
4558 ScheduleKind.Schedule = OMPC_SCHEDULE_static;
4559 CGOpenMPRuntime::StaticRTInput StaticInit(
4560 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
4561 LB.getAddress(), UB.getAddress(), ST.getAddress());
4562 CGF.CGM.getOpenMPRuntime().emitForStaticInit(CGF, Loc: S.getBeginLoc(), DKind: EKind,
4563 ScheduleKind, Values: StaticInit);
4564 // UB = min(UB, GlobalUB);
4565 llvm::Value *UBVal = CGF.EmitLoadOfScalar(lvalue: UB, Loc: S.getBeginLoc());
4566 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
4567 C: CGF.Builder.CreateICmpSLT(LHS: UBVal, RHS: GlobalUBVal), True: UBVal, False: GlobalUBVal);
4568 CGF.EmitStoreOfScalar(value: MinUBGlobalUB, lvalue: UB);
4569 // IV = LB;
4570 CGF.EmitStoreOfScalar(value: CGF.EmitLoadOfScalar(lvalue: LB, Loc: S.getBeginLoc()), lvalue: IV);
4571 // while (idx <= UB) { BODY; ++idx; }
4572 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, LoopCond: Cond, IncExpr: Inc, BodyGen,
4573 PostIncGen: [](CodeGenFunction &) {});
4574 // Tell the runtime we are done.
4575 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
4576 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, Loc: S.getEndLoc(),
4577 DKind: OMPD_sections);
4578 };
4579 CGF.OMPCancelStack.emitExit(CGF, Kind: EKind, CodeGen);
4580 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
4581 // Emit post-update of the reduction variables if IsLastIter != 0.
4582 emitPostUpdateForReductionClause(CGF, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
4583 return CGF.Builder.CreateIsNotNull(
4584 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
4585 });
4586
4587 // Emit final copy of the lastprivate variables if IsLastIter != 0.
4588 if (HasLastprivates)
4589 CGF.EmitOMPLastprivateClauseFinal(
4590 D: S, /*NoFinals=*/false,
4591 IsLastIterCond: CGF.Builder.CreateIsNotNull(
4592 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())));
4593 };
4594
4595 bool HasCancel = false;
4596 if (auto *OSD = dyn_cast<OMPSectionsDirective>(Val: &S))
4597 HasCancel = OSD->hasCancel();
4598 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(Val: &S))
4599 HasCancel = OPSD->hasCancel();
4600 OMPCancelStackRAII CancelRegion(*this, EKind, HasCancel);
4601 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_sections, CodeGen,
4602 HasCancel);
4603 // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
4604 // clause. Otherwise the barrier will be generated by the codegen for the
4605 // directive.
4606 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
4607 // Emit implicit barrier to synchronize threads and avoid data races on
4608 // initialization of firstprivate variables.
4609 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(),
4610 Kind: OMPD_unknown);
4611 }
4612}
4613
4614void CodeGenFunction::EmitOMPScopeDirective(const OMPScopeDirective &S) {
4615 {
4616 // Emit code for 'scope' region
4617 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4618 Action.Enter(CGF);
4619 OMPPrivateScope PrivateScope(CGF);
4620 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
4621 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
4622 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
4623 (void)PrivateScope.Privatize();
4624 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
4625 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
4626 };
4627 auto LPCRegion =
4628 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4629 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4630 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_scope, CodeGen);
4631 }
4632 // Emit an implicit barrier at the end.
4633 if (!S.getSingleClause<OMPNowaitClause>()) {
4634 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_scope);
4635 }
4636 // Check for outer lastprivate conditional update.
4637 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4638}
4639
4640void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
4641 if (CGM.getLangOpts().OpenMPIRBuilder) {
4642 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4643 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4644 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
4645
4646 auto FiniCB = [](InsertPointTy IP) {
4647 // Don't FinalizeOMPRegion because this is done inside of OMPIRBuilder for
4648 // sections.
4649 return llvm::Error::success();
4650 };
4651
4652 const CapturedStmt *ICS = S.getInnermostCapturedStmt();
4653 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4654 const auto *CS = dyn_cast<CompoundStmt>(Val: CapturedStmt);
4655 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
4656 if (CS) {
4657 for (const Stmt *SubStmt : CS->children()) {
4658 auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
4659 InsertPointTy CodeGenIP) {
4660 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4661 CGF&: *this, RegionBodyStmt: SubStmt, AllocaIP, CodeGenIP, RegionName: "section");
4662 return llvm::Error::success();
4663 };
4664 SectionCBVector.push_back(Elt: SectionCB);
4665 }
4666 } else {
4667 auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
4668 InsertPointTy CodeGenIP) {
4669 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4670 CGF&: *this, RegionBodyStmt: CapturedStmt, AllocaIP, CodeGenIP, RegionName: "section");
4671 return llvm::Error::success();
4672 };
4673 SectionCBVector.push_back(Elt: SectionCB);
4674 }
4675
4676 // Privatization callback that performs appropriate action for
4677 // shared/private/firstprivate/lastprivate/copyin/... variables.
4678 //
4679 // TODO: This defaults to shared right now.
4680 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
4681 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
4682 // The next line is appropriate only for variables (Val) with the
4683 // data-sharing attribute "shared".
4684 ReplVal = &Val;
4685
4686 return CodeGenIP;
4687 };
4688
4689 CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
4690 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
4691 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
4692 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
4693 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
4694 cantFail(ValOrErr: OMPBuilder.createSections(
4695 Loc: Builder, AllocaIP, SectionCBs: SectionCBVector, PrivCB, FiniCB, IsCancellable: S.hasCancel(),
4696 IsNowait: S.getSingleClause<OMPNowaitClause>()));
4697 Builder.restoreIP(IP: AfterIP);
4698 return;
4699 }
4700 {
4701 auto LPCRegion =
4702 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4703 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4704 EmitSections(S);
4705 }
4706 // Emit an implicit barrier at the end.
4707 if (!S.getSingleClause<OMPNowaitClause>()) {
4708 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(),
4709 Kind: OMPD_sections);
4710 }
4711 // Check for outer lastprivate conditional update.
4712 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4713}
4714
4715void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
4716 if (CGM.getLangOpts().OpenMPIRBuilder) {
4717 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4718 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4719
4720 const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
4721 auto FiniCB = [this](InsertPointTy IP) {
4722 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4723 return llvm::Error::success();
4724 };
4725
4726 auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
4727 InsertPointTy CodeGenIP) {
4728 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4729 CGF&: *this, RegionBodyStmt: SectionRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "section");
4730 return llvm::Error::success();
4731 };
4732
4733 LexicalScope Scope(*this, S.getSourceRange());
4734 EmitStopPoint(S: &S);
4735 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
4736 cantFail(ValOrErr: OMPBuilder.createSection(Loc: Builder, BodyGenCB, FiniCB));
4737 Builder.restoreIP(IP: AfterIP);
4738
4739 return;
4740 }
4741 LexicalScope Scope(*this, S.getSourceRange());
4742 EmitStopPoint(S: &S);
4743 EmitStmt(S: S.getAssociatedStmt());
4744}
4745
4746void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
4747 llvm::SmallVector<const Expr *, 8> CopyprivateVars;
4748 llvm::SmallVector<const Expr *, 8> DestExprs;
4749 llvm::SmallVector<const Expr *, 8> SrcExprs;
4750 llvm::SmallVector<const Expr *, 8> AssignmentOps;
4751 // Check if there are any 'copyprivate' clauses associated with this
4752 // 'single' construct.
4753 // Build a list of copyprivate variables along with helper expressions
4754 // (<source>, <destination>, <destination>=<source> expressions)
4755 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
4756 CopyprivateVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
4757 DestExprs.append(in_start: C->destination_exprs().begin(),
4758 in_end: C->destination_exprs().end());
4759 SrcExprs.append(in_start: C->source_exprs().begin(), in_end: C->source_exprs().end());
4760 AssignmentOps.append(in_start: C->assignment_ops().begin(),
4761 in_end: C->assignment_ops().end());
4762 }
4763 // Emit code for 'single' region along with 'copyprivate' clauses
4764 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4765 Action.Enter(CGF);
4766 OMPPrivateScope SingleScope(CGF);
4767 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope&: SingleScope);
4768 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: SingleScope);
4769 (void)SingleScope.Privatize();
4770 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
4771 };
4772 {
4773 auto LPCRegion =
4774 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4775 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4776 CGM.getOpenMPRuntime().emitSingleRegion(CGF&: *this, SingleOpGen: CodeGen, Loc: S.getBeginLoc(),
4777 CopyprivateVars, DestExprs,
4778 SrcExprs, AssignmentOps);
4779 }
4780 // Emit an implicit barrier at the end (to avoid data race on firstprivate
4781 // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
4782 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
4783 CGM.getOpenMPRuntime().emitBarrierCall(
4784 CGF&: *this, Loc: S.getBeginLoc(),
4785 Kind: S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
4786 }
4787 // Check for outer lastprivate conditional update.
4788 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4789}
4790
4791static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4792 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4793 Action.Enter(CGF);
4794 CGF.EmitStmt(S: S.getRawStmt());
4795 };
4796 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
4797}
4798
4799void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
4800 if (CGM.getLangOpts().OpenMPIRBuilder) {
4801 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4802 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4803
4804 const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
4805
4806 auto FiniCB = [this](InsertPointTy IP) {
4807 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4808 return llvm::Error::success();
4809 };
4810
4811 auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
4812 InsertPointTy CodeGenIP) {
4813 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4814 CGF&: *this, RegionBodyStmt: MasterRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "master");
4815 return llvm::Error::success();
4816 };
4817
4818 LexicalScope Scope(*this, S.getSourceRange());
4819 EmitStopPoint(S: &S);
4820 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
4821 cantFail(ValOrErr: OMPBuilder.createMaster(Loc: Builder, BodyGenCB, FiniCB));
4822 Builder.restoreIP(IP: AfterIP);
4823
4824 return;
4825 }
4826 LexicalScope Scope(*this, S.getSourceRange());
4827 EmitStopPoint(S: &S);
4828 emitMaster(CGF&: *this, S);
4829}
4830
4831static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4832 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4833 Action.Enter(CGF);
4834 CGF.EmitStmt(S: S.getRawStmt());
4835 };
4836 Expr *Filter = nullptr;
4837 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4838 Filter = FilterClause->getThreadID();
4839 CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, MaskedOpGen: CodeGen, Loc: S.getBeginLoc(),
4840 Filter);
4841}
4842
4843void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
4844 if (CGM.getLangOpts().OpenMPIRBuilder) {
4845 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4846 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4847
4848 const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
4849 const Expr *Filter = nullptr;
4850 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4851 Filter = FilterClause->getThreadID();
4852 llvm::Value *FilterVal = Filter
4853 ? EmitScalarExpr(E: Filter, IgnoreResultAssign: CGM.Int32Ty)
4854 : llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/0);
4855
4856 auto FiniCB = [this](InsertPointTy IP) {
4857 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4858 return llvm::Error::success();
4859 };
4860
4861 auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
4862 InsertPointTy CodeGenIP) {
4863 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4864 CGF&: *this, RegionBodyStmt: MaskedRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "masked");
4865 return llvm::Error::success();
4866 };
4867
4868 LexicalScope Scope(*this, S.getSourceRange());
4869 EmitStopPoint(S: &S);
4870 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
4871 ValOrErr: OMPBuilder.createMasked(Loc: Builder, BodyGenCB, FiniCB, Filter: FilterVal));
4872 Builder.restoreIP(IP: AfterIP);
4873
4874 return;
4875 }
4876 LexicalScope Scope(*this, S.getSourceRange());
4877 EmitStopPoint(S: &S);
4878 emitMasked(CGF&: *this, S);
4879}
4880
4881void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
4882 if (CGM.getLangOpts().OpenMPIRBuilder) {
4883 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4884 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4885
4886 const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
4887 const Expr *Hint = nullptr;
4888 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4889 Hint = HintClause->getHint();
4890
4891 // TODO: This is slightly different from what's currently being done in
4892 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4893 // about typing is final.
4894 llvm::Value *HintInst = nullptr;
4895 if (Hint)
4896 HintInst =
4897 Builder.CreateIntCast(V: EmitScalarExpr(E: Hint), DestTy: CGM.Int32Ty, isSigned: false);
4898
4899 auto FiniCB = [this](InsertPointTy IP) {
4900 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4901 return llvm::Error::success();
4902 };
4903
4904 auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
4905 InsertPointTy CodeGenIP) {
4906 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4907 CGF&: *this, RegionBodyStmt: CriticalRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "critical");
4908 return llvm::Error::success();
4909 };
4910
4911 LexicalScope Scope(*this, S.getSourceRange());
4912 EmitStopPoint(S: &S);
4913 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
4914 cantFail(ValOrErr: OMPBuilder.createCritical(Loc: Builder, BodyGenCB, FiniCB,
4915 CriticalName: S.getDirectiveName().getAsString(),
4916 HintInst));
4917 Builder.restoreIP(IP: AfterIP);
4918
4919 return;
4920 }
4921
4922 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4923 Action.Enter(CGF);
4924 CGF.EmitStmt(S: S.getAssociatedStmt());
4925 };
4926 const Expr *Hint = nullptr;
4927 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4928 Hint = HintClause->getHint();
4929 LexicalScope Scope(*this, S.getSourceRange());
4930 EmitStopPoint(S: &S);
4931 CGM.getOpenMPRuntime().emitCriticalRegion(CGF&: *this,
4932 CriticalName: S.getDirectiveName().getAsString(),
4933 CriticalOpGen: CodeGen, Loc: S.getBeginLoc(), Hint);
4934}
4935
4936void CodeGenFunction::EmitOMPParallelForDirective(
4937 const OMPParallelForDirective &S) {
4938 // Emit directive as a combined directive that consists of two implicit
4939 // directives: 'parallel' with 'for' directive.
4940 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4941 Action.Enter(CGF);
4942 emitOMPCopyinClause(CGF, S);
4943 (void)emitWorksharingDirective(CGF, S, HasCancel: S.hasCancel());
4944 };
4945 {
4946 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4947 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4948 CGCapturedStmtInfo CGSI(CR_OpenMP);
4949 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4950 OMPLoopScope LoopScope(CGF, S);
4951 return CGF.EmitScalarExpr(E: S.getNumIterations());
4952 };
4953 bool IsInscan = llvm::any_of(Range: S.getClausesOfKind<OMPReductionClause>(),
4954 P: [](const OMPReductionClause *C) {
4955 return C->getModifier() == OMPC_REDUCTION_inscan;
4956 });
4957 if (IsInscan)
4958 emitScanBasedDirectiveDecls(CGF&: *this, S, NumIteratorsGen);
4959 auto LPCRegion =
4960 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4961 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_for, CodeGen,
4962 CodeGenBoundParameters: emitEmptyBoundParameters);
4963 if (IsInscan)
4964 emitScanBasedDirectiveFinals(CGF&: *this, S, NumIteratorsGen);
4965 }
4966 // Check for outer lastprivate conditional update.
4967 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4968}
4969
4970void CodeGenFunction::EmitOMPParallelForSimdDirective(
4971 const OMPParallelForSimdDirective &S) {
4972 // Emit directive as a combined directive that consists of two implicit
4973 // directives: 'parallel' with 'for' directive.
4974 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4975 Action.Enter(CGF);
4976 emitOMPCopyinClause(CGF, S);
4977 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4978 };
4979 {
4980 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4981 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4982 CGCapturedStmtInfo CGSI(CR_OpenMP);
4983 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4984 OMPLoopScope LoopScope(CGF, S);
4985 return CGF.EmitScalarExpr(E: S.getNumIterations());
4986 };
4987 bool IsInscan = llvm::any_of(Range: S.getClausesOfKind<OMPReductionClause>(),
4988 P: [](const OMPReductionClause *C) {
4989 return C->getModifier() == OMPC_REDUCTION_inscan;
4990 });
4991 if (IsInscan)
4992 emitScanBasedDirectiveDecls(CGF&: *this, S, NumIteratorsGen);
4993 auto LPCRegion =
4994 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4995 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_for_simd, CodeGen,
4996 CodeGenBoundParameters: emitEmptyBoundParameters);
4997 if (IsInscan)
4998 emitScanBasedDirectiveFinals(CGF&: *this, S, NumIteratorsGen);
4999 }
5000 // Check for outer lastprivate conditional update.
5001 checkForLastprivateConditionalUpdate(CGF&: *this, S);
5002}
5003
5004void CodeGenFunction::EmitOMPParallelMasterDirective(
5005 const OMPParallelMasterDirective &S) {
5006 // Emit directive as a combined directive that consists of two implicit
5007 // directives: 'parallel' with 'master' directive.
5008 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5009 Action.Enter(CGF);
5010 OMPPrivateScope PrivateScope(CGF);
5011 emitOMPCopyinClause(CGF, S);
5012 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
5013 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
5014 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
5015 (void)PrivateScope.Privatize();
5016 emitMaster(CGF, S);
5017 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
5018 };
5019 {
5020 auto LPCRegion =
5021 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
5022 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_master, CodeGen,
5023 CodeGenBoundParameters: emitEmptyBoundParameters);
5024 emitPostUpdateForReductionClause(CGF&: *this, D: S,
5025 CondGen: [](CodeGenFunction &) { return nullptr; });
5026 }
5027 // Check for outer lastprivate conditional update.
5028 checkForLastprivateConditionalUpdate(CGF&: *this, S);
5029}
5030
5031void CodeGenFunction::EmitOMPParallelMaskedDirective(
5032 const OMPParallelMaskedDirective &S) {
5033 // Emit directive as a combined directive that consists of two implicit
5034 // directives: 'parallel' with 'masked' directive.
5035 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5036 Action.Enter(CGF);
5037 OMPPrivateScope PrivateScope(CGF);
5038 emitOMPCopyinClause(CGF, S);
5039 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
5040 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
5041 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
5042 (void)PrivateScope.Privatize();
5043 emitMasked(CGF, S);
5044 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
5045 };
5046 {
5047 auto LPCRegion =
5048 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
5049 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_masked, CodeGen,
5050 CodeGenBoundParameters: emitEmptyBoundParameters);
5051 emitPostUpdateForReductionClause(CGF&: *this, D: S,
5052 CondGen: [](CodeGenFunction &) { return nullptr; });
5053 }
5054 // Check for outer lastprivate conditional update.
5055 checkForLastprivateConditionalUpdate(CGF&: *this, S);
5056}
5057
5058void CodeGenFunction::EmitOMPParallelSectionsDirective(
5059 const OMPParallelSectionsDirective &S) {
5060 // Emit directive as a combined directive that consists of two implicit
5061 // directives: 'parallel' with 'sections' directive.
5062 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5063 Action.Enter(CGF);
5064 emitOMPCopyinClause(CGF, S);
5065 CGF.EmitSections(S);
5066 };
5067 {
5068 auto LPCRegion =
5069 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
5070 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_sections, CodeGen,
5071 CodeGenBoundParameters: emitEmptyBoundParameters);
5072 }
5073 // Check for outer lastprivate conditional update.
5074 checkForLastprivateConditionalUpdate(CGF&: *this, S);
5075}
5076
5077namespace {
5078/// Get the list of variables declared in the context of the untied tasks.
5079class CheckVarsEscapingUntiedTaskDeclContext final
5080 : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
5081 llvm::SmallVector<const VarDecl *, 4> PrivateDecls;
5082
5083public:
5084 explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
5085 ~CheckVarsEscapingUntiedTaskDeclContext() = default;
5086 void VisitDeclStmt(const DeclStmt *S) {
5087 if (!S)
5088 return;
5089 // Need to privatize only local vars, static locals can be processed as is.
5090 for (const Decl *D : S->decls()) {
5091 if (const auto *VD = dyn_cast_or_null<VarDecl>(Val: D))
5092 if (VD->hasLocalStorage())
5093 PrivateDecls.push_back(Elt: VD);
5094 }
5095 }
5096 void VisitOMPExecutableDirective(const OMPExecutableDirective *) {}
5097 void VisitCapturedStmt(const CapturedStmt *) {}
5098 void VisitLambdaExpr(const LambdaExpr *) {}
5099 void VisitBlockExpr(const BlockExpr *) {}
5100 void VisitStmt(const Stmt *S) {
5101 if (!S)
5102 return;
5103 for (const Stmt *Child : S->children())
5104 if (Child)
5105 Visit(S: Child);
5106 }
5107
5108 /// Swaps list of vars with the provided one.
5109 ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
5110};
5111} // anonymous namespace
5112
5113static void buildDependences(const OMPExecutableDirective &S,
5114 OMPTaskDataTy &Data) {
5115
5116 // First look for 'omp_all_memory' and add this first.
5117 bool OmpAllMemory = false;
5118 if (llvm::any_of(
5119 Range: S.getClausesOfKind<OMPDependClause>(), P: [](const OMPDependClause *C) {
5120 return C->getDependencyKind() == OMPC_DEPEND_outallmemory ||
5121 C->getDependencyKind() == OMPC_DEPEND_inoutallmemory;
5122 })) {
5123 OmpAllMemory = true;
5124 // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are
5125 // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to
5126 // simplify.
5127 OMPTaskDataTy::DependData &DD =
5128 Data.Dependences.emplace_back(Args: OMPC_DEPEND_outallmemory,
5129 /*IteratorExpr=*/Args: nullptr);
5130 // Add a nullptr Expr to simplify the codegen in emitDependData.
5131 DD.DepExprs.push_back(Elt: nullptr);
5132 }
5133 // Add remaining dependences skipping any 'out' or 'inout' if they are
5134 // overridden by 'omp_all_memory'.
5135 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
5136 OpenMPDependClauseKind Kind = C->getDependencyKind();
5137 if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory)
5138 continue;
5139 if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout))
5140 continue;
5141 OMPTaskDataTy::DependData &DD =
5142 Data.Dependences.emplace_back(Args: C->getDependencyKind(), Args: C->getModifier());
5143 DD.DepExprs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5144 }
5145}
5146
5147void CodeGenFunction::EmitOMPTaskBasedDirective(
5148 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
5149 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
5150 OMPTaskDataTy &Data) {
5151 // Emit outlined function for task construct.
5152 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: CapturedRegion);
5153 auto I = CS->getCapturedDecl()->param_begin();
5154 auto PartId = std::next(x: I);
5155 auto TaskT = std::next(x: I, n: 4);
5156 // Check if the task is final
5157 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
5158 // If the condition constant folds and can be elided, try to avoid emitting
5159 // the condition and the dead arm of the if/else.
5160 const Expr *Cond = Clause->getCondition();
5161 bool CondConstant;
5162 if (ConstantFoldsToSimpleInteger(Cond, Result&: CondConstant))
5163 Data.Final.setInt(CondConstant);
5164 else
5165 Data.Final.setPointer(EvaluateExprAsBool(E: Cond));
5166 } else {
5167 // By default the task is not final.
5168 Data.Final.setInt(/*IntVal=*/false);
5169 }
5170 // Check if the task has 'priority' clause.
5171 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
5172 const Expr *Prio = Clause->getPriority();
5173 Data.Priority.setInt(/*IntVal=*/true);
5174 Data.Priority.setPointer(EmitScalarConversion(
5175 Src: EmitScalarExpr(E: Prio), SrcTy: Prio->getType(),
5176 DstTy: getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
5177 Loc: Prio->getExprLoc()));
5178 }
5179 // The first function argument for tasks is a thread id, the second one is a
5180 // part id (0 for tied tasks, >=0 for untied task).
5181 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
5182 // Get list of private variables.
5183 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
5184 auto IRef = C->varlist_begin();
5185 for (const Expr *IInit : C->private_copies()) {
5186 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
5187 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
5188 Data.PrivateVars.push_back(Elt: *IRef);
5189 Data.PrivateCopies.push_back(Elt: IInit);
5190 }
5191 ++IRef;
5192 }
5193 }
5194 EmittedAsPrivate.clear();
5195 // Get list of firstprivate variables.
5196 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
5197 auto IRef = C->varlist_begin();
5198 auto IElemInitRef = C->inits().begin();
5199 for (const Expr *IInit : C->private_copies()) {
5200 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
5201 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
5202 Data.FirstprivateVars.push_back(Elt: *IRef);
5203 Data.FirstprivateCopies.push_back(Elt: IInit);
5204 Data.FirstprivateInits.push_back(Elt: *IElemInitRef);
5205 }
5206 ++IRef;
5207 ++IElemInitRef;
5208 }
5209 }
5210 // Get list of lastprivate variables (for taskloops).
5211 llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
5212 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
5213 auto IRef = C->varlist_begin();
5214 auto ID = C->destination_exprs().begin();
5215 for (const Expr *IInit : C->private_copies()) {
5216 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
5217 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
5218 Data.LastprivateVars.push_back(Elt: *IRef);
5219 Data.LastprivateCopies.push_back(Elt: IInit);
5220 }
5221 LastprivateDstsOrigs.insert(
5222 KV: std::make_pair(x: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ID)->getDecl()),
5223 y: cast<DeclRefExpr>(Val: *IRef)));
5224 ++IRef;
5225 ++ID;
5226 }
5227 }
5228 SmallVector<const Expr *, 4> LHSs;
5229 SmallVector<const Expr *, 4> RHSs;
5230 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
5231 Data.ReductionVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5232 Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5233 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
5234 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
5235 in_end: C->reduction_ops().end());
5236 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
5237 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
5238 }
5239 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
5240 CGF&: *this, Loc: S.getBeginLoc(), LHSExprs: LHSs, RHSExprs: RHSs, Data);
5241 // Build list of dependences.
5242 buildDependences(S, Data);
5243 // Get list of local vars for untied tasks.
5244 if (!Data.Tied) {
5245 CheckVarsEscapingUntiedTaskDeclContext Checker;
5246 Checker.Visit(S: S.getInnermostCapturedStmt()->getCapturedStmt());
5247 Data.PrivateLocals.append(in_start: Checker.getPrivateDecls().begin(),
5248 in_end: Checker.getPrivateDecls().end());
5249 }
5250 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
5251 CapturedRegion](CodeGenFunction &CGF,
5252 PrePostActionTy &Action) {
5253 llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
5254 std::pair<Address, Address>>
5255 UntiedLocalVars;
5256 // Set proper addresses for generated private copies.
5257 OMPPrivateScope Scope(CGF);
5258 // Generate debug info for variables present in shared clause.
5259 if (auto *DI = CGF.getDebugInfo()) {
5260 llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields =
5261 CGF.CapturedStmtInfo->getCaptureFields();
5262 llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue();
5263 if (CaptureFields.size() && ContextValue) {
5264 unsigned CharWidth = CGF.getContext().getCharWidth();
5265 // The shared variables are packed together as members of structure.
5266 // So the address of each shared variable can be computed by adding
5267 // offset of it (within record) to the base address of record. For each
5268 // shared variable, debug intrinsic llvm.dbg.declare is generated with
5269 // appropriate expressions (DIExpression).
5270 // Ex:
5271 // %12 = load %struct.anon*, %struct.anon** %__context.addr.i
5272 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
5273 // metadata !svar1,
5274 // metadata !DIExpression(DW_OP_deref))
5275 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
5276 // metadata !svar2,
5277 // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref))
5278 for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) {
5279 const VarDecl *SharedVar = It->first;
5280 RecordDecl *CaptureRecord = It->second->getParent();
5281 const ASTRecordLayout &Layout =
5282 CGF.getContext().getASTRecordLayout(D: CaptureRecord);
5283 unsigned Offset =
5284 Layout.getFieldOffset(FieldNo: It->second->getFieldIndex()) / CharWidth;
5285 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
5286 (void)DI->EmitDeclareOfAutoVariable(Decl: SharedVar, AI: ContextValue,
5287 Builder&: CGF.Builder, UsePointerValue: false);
5288 // Get the call dbg.declare instruction we just created and update
5289 // its DIExpression to add offset to base address.
5290 auto UpdateExpr = [](llvm::LLVMContext &Ctx, auto *Declare,
5291 unsigned Offset) {
5292 SmallVector<uint64_t, 8> Ops;
5293 // Add offset to the base address if non zero.
5294 if (Offset) {
5295 Ops.push_back(Elt: llvm::dwarf::DW_OP_plus_uconst);
5296 Ops.push_back(Elt: Offset);
5297 }
5298 Ops.push_back(Elt: llvm::dwarf::DW_OP_deref);
5299 Declare->setExpression(llvm::DIExpression::get(Context&: Ctx, Elements: Ops));
5300 };
5301 llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back();
5302 if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(Val: &Last))
5303 UpdateExpr(DDI->getContext(), DDI, Offset);
5304 // If we're emitting using the new debug info format into a block
5305 // without a terminator, the record will be "trailing".
5306 assert(!Last.isTerminator() && "unexpected terminator");
5307 if (auto *Marker =
5308 CGF.Builder.GetInsertBlock()->getTrailingDbgRecords()) {
5309 for (llvm::DbgVariableRecord &DVR : llvm::reverse(
5310 C: llvm::filterDbgVars(R: Marker->getDbgRecordRange()))) {
5311 UpdateExpr(Last.getContext(), &DVR, Offset);
5312 break;
5313 }
5314 }
5315 }
5316 }
5317 }
5318 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
5319 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
5320 !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) {
5321 enum { PrivatesParam = 2, CopyFnParam = 3 };
5322 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
5323 Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: CopyFnParam)));
5324 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(
5325 VD: CS->getCapturedDecl()->getParam(i: PrivatesParam)));
5326 // Map privates.
5327 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
5328 llvm::SmallVector<llvm::Value *, 16> CallArgs;
5329 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
5330 CallArgs.push_back(Elt: PrivatesPtr);
5331 ParamTypes.push_back(Elt: PrivatesPtr->getType());
5332 for (const Expr *E : Data.PrivateVars) {
5333 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5334 RawAddress PrivatePtr = CGF.CreateMemTemp(
5335 T: CGF.getContext().getPointerType(T: E->getType()), Name: ".priv.ptr.addr");
5336 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5337 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5338 ParamTypes.push_back(Elt: PrivatePtr.getType());
5339 }
5340 for (const Expr *E : Data.FirstprivateVars) {
5341 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5342 RawAddress PrivatePtr =
5343 CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()),
5344 Name: ".firstpriv.ptr.addr");
5345 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5346 FirstprivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5347 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5348 ParamTypes.push_back(Elt: PrivatePtr.getType());
5349 }
5350 for (const Expr *E : Data.LastprivateVars) {
5351 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5352 RawAddress PrivatePtr =
5353 CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()),
5354 Name: ".lastpriv.ptr.addr");
5355 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5356 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5357 ParamTypes.push_back(Elt: PrivatePtr.getType());
5358 }
5359 for (const VarDecl *VD : Data.PrivateLocals) {
5360 QualType Ty = VD->getType().getNonReferenceType();
5361 if (VD->getType()->isLValueReferenceType())
5362 Ty = CGF.getContext().getPointerType(T: Ty);
5363 if (isAllocatableDecl(VD))
5364 Ty = CGF.getContext().getPointerType(T: Ty);
5365 RawAddress PrivatePtr = CGF.CreateMemTemp(
5366 T: CGF.getContext().getPointerType(T: Ty), Name: ".local.ptr.addr");
5367 auto Result = UntiedLocalVars.insert(
5368 KV: std::make_pair(x&: VD, y: std::make_pair(x&: PrivatePtr, y: Address::invalid())));
5369 // If key exists update in place.
5370 if (Result.second == false)
5371 *Result.first = std::make_pair(
5372 x&: VD, y: std::make_pair(x&: PrivatePtr, y: Address::invalid()));
5373 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5374 ParamTypes.push_back(Elt: PrivatePtr.getType());
5375 }
5376 auto *CopyFnTy = llvm::FunctionType::get(Result: CGF.Builder.getVoidTy(),
5377 Params: ParamTypes, /*isVarArg=*/false);
5378 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
5379 CGF, Loc: S.getBeginLoc(), OutlinedFn: {CopyFnTy, CopyFn}, Args: CallArgs);
5380 for (const auto &Pair : LastprivateDstsOrigs) {
5381 const auto *OrigVD = cast<VarDecl>(Val: Pair.second->getDecl());
5382 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
5383 /*RefersToEnclosingVariableOrCapture=*/
5384 CGF.CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
5385 Pair.second->getType(), VK_LValue,
5386 Pair.second->getExprLoc());
5387 Scope.addPrivate(LocalVD: Pair.first, Addr: CGF.EmitLValue(E: &DRE).getAddress());
5388 }
5389 for (const auto &Pair : PrivatePtrs) {
5390 Address Replacement = Address(
5391 CGF.Builder.CreateLoad(Addr: Pair.second),
5392 CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()),
5393 CGF.getContext().getDeclAlign(D: Pair.first));
5394 Scope.addPrivate(LocalVD: Pair.first, Addr: Replacement);
5395 if (auto *DI = CGF.getDebugInfo())
5396 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
5397 (void)DI->EmitDeclareOfAutoVariable(
5398 Decl: Pair.first, AI: Pair.second.getBasePointer(), Builder&: CGF.Builder,
5399 /*UsePointerValue*/ true);
5400 }
5401 // Adjust mapping for internal locals by mapping actual memory instead of
5402 // a pointer to this memory.
5403 for (auto &Pair : UntiedLocalVars) {
5404 QualType VDType = Pair.first->getType().getNonReferenceType();
5405 if (Pair.first->getType()->isLValueReferenceType())
5406 VDType = CGF.getContext().getPointerType(T: VDType);
5407 if (isAllocatableDecl(VD: Pair.first)) {
5408 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Pair.second.first);
5409 Address Replacement(
5410 Ptr,
5411 CGF.ConvertTypeForMem(T: CGF.getContext().getPointerType(T: VDType)),
5412 CGF.getPointerAlign());
5413 Pair.second.first = Replacement;
5414 Ptr = CGF.Builder.CreateLoad(Addr: Replacement);
5415 Replacement = Address(Ptr, CGF.ConvertTypeForMem(T: VDType),
5416 CGF.getContext().getDeclAlign(D: Pair.first));
5417 Pair.second.second = Replacement;
5418 } else {
5419 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Pair.second.first);
5420 Address Replacement(Ptr, CGF.ConvertTypeForMem(T: VDType),
5421 CGF.getContext().getDeclAlign(D: Pair.first));
5422 Pair.second.first = Replacement;
5423 }
5424 }
5425 }
5426 if (Data.Reductions) {
5427 OMPPrivateScope FirstprivateScope(CGF);
5428 for (const auto &Pair : FirstprivatePtrs) {
5429 Address Replacement(
5430 CGF.Builder.CreateLoad(Addr: Pair.second),
5431 CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()),
5432 CGF.getContext().getDeclAlign(D: Pair.first));
5433 FirstprivateScope.addPrivate(LocalVD: Pair.first, Addr: Replacement);
5434 }
5435 (void)FirstprivateScope.Privatize();
5436 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
5437 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
5438 Data.ReductionCopies, Data.ReductionOps);
5439 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
5440 Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: 9)));
5441 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
5442 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
5443 RedCG.emitAggregateType(CGF, N: Cnt);
5444 // FIXME: This must removed once the runtime library is fixed.
5445 // Emit required threadprivate variables for
5446 // initializer/combiner/finalizer.
5447 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5448 RCG&: RedCG, N: Cnt);
5449 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5450 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5451 Replacement = Address(
5452 CGF.EmitScalarConversion(Src: Replacement.emitRawPointer(CGF),
5453 SrcTy: CGF.getContext().VoidPtrTy,
5454 DstTy: CGF.getContext().getPointerType(
5455 T: Data.ReductionCopies[Cnt]->getType()),
5456 Loc: Data.ReductionCopies[Cnt]->getExprLoc()),
5457 CGF.ConvertTypeForMem(T: Data.ReductionCopies[Cnt]->getType()),
5458 Replacement.getAlignment());
5459 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5460 Scope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5461 }
5462 }
5463 // Privatize all private variables except for in_reduction items.
5464 (void)Scope.Privatize();
5465 SmallVector<const Expr *, 4> InRedVars;
5466 SmallVector<const Expr *, 4> InRedPrivs;
5467 SmallVector<const Expr *, 4> InRedOps;
5468 SmallVector<const Expr *, 4> TaskgroupDescriptors;
5469 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5470 auto IPriv = C->privates().begin();
5471 auto IRed = C->reduction_ops().begin();
5472 auto ITD = C->taskgroup_descriptors().begin();
5473 for (const Expr *Ref : C->varlist()) {
5474 InRedVars.emplace_back(Args&: Ref);
5475 InRedPrivs.emplace_back(Args: *IPriv);
5476 InRedOps.emplace_back(Args: *IRed);
5477 TaskgroupDescriptors.emplace_back(Args: *ITD);
5478 std::advance(i&: IPriv, n: 1);
5479 std::advance(i&: IRed, n: 1);
5480 std::advance(i&: ITD, n: 1);
5481 }
5482 }
5483 // Privatize in_reduction items here, because taskgroup descriptors must be
5484 // privatized earlier.
5485 OMPPrivateScope InRedScope(CGF);
5486 if (!InRedVars.empty()) {
5487 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
5488 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
5489 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
5490 RedCG.emitAggregateType(CGF, N: Cnt);
5491 // The taskgroup descriptor variable is always implicit firstprivate and
5492 // privatized already during processing of the firstprivates.
5493 // FIXME: This must removed once the runtime library is fixed.
5494 // Emit required threadprivate variables for
5495 // initializer/combiner/finalizer.
5496 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5497 RCG&: RedCG, N: Cnt);
5498 llvm::Value *ReductionsPtr;
5499 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
5500 ReductionsPtr = CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: TRExpr),
5501 Loc: TRExpr->getExprLoc());
5502 } else {
5503 ReductionsPtr = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
5504 }
5505 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5506 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5507 Replacement = Address(
5508 CGF.EmitScalarConversion(
5509 Src: Replacement.emitRawPointer(CGF), SrcTy: CGF.getContext().VoidPtrTy,
5510 DstTy: CGF.getContext().getPointerType(T: InRedPrivs[Cnt]->getType()),
5511 Loc: InRedPrivs[Cnt]->getExprLoc()),
5512 CGF.ConvertTypeForMem(T: InRedPrivs[Cnt]->getType()),
5513 Replacement.getAlignment());
5514 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5515 InRedScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5516 }
5517 }
5518 (void)InRedScope.Privatize();
5519
5520 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
5521 UntiedLocalVars);
5522 Action.Enter(CGF);
5523 BodyGen(CGF);
5524 };
5525 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
5526 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
5527 D: S, ThreadIDVar: *I, PartIDVar: *PartId, TaskTVar: *TaskT, InnermostKind: EKind, CodeGen, Tied: Data.Tied, NumberOfParts&: Data.NumberOfParts);
5528 OMPLexicalScope Scope(*this, S, std::nullopt,
5529 !isOpenMPParallelDirective(DKind: EKind) &&
5530 !isOpenMPSimdDirective(DKind: EKind));
5531 TaskGen(*this, OutlinedFn, Data);
5532}
5533
5534static ImplicitParamDecl *
5535createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
5536 QualType Ty, CapturedDecl *CD,
5537 SourceLocation Loc) {
5538 auto *OrigVD = ImplicitParamDecl::Create(C, DC: CD, IdLoc: Loc, /*Id=*/nullptr, T: Ty,
5539 ParamKind: ImplicitParamKind::Other);
5540 auto *OrigRef = DeclRefExpr::Create(
5541 Context: C, QualifierLoc: NestedNameSpecifierLoc(), TemplateKWLoc: SourceLocation(), D: OrigVD,
5542 /*RefersToEnclosingVariableOrCapture=*/false, NameLoc: Loc, T: Ty, VK: VK_LValue);
5543 auto *PrivateVD = ImplicitParamDecl::Create(C, DC: CD, IdLoc: Loc, /*Id=*/nullptr, T: Ty,
5544 ParamKind: ImplicitParamKind::Other);
5545 auto *PrivateRef = DeclRefExpr::Create(
5546 Context: C, QualifierLoc: NestedNameSpecifierLoc(), TemplateKWLoc: SourceLocation(), D: PrivateVD,
5547 /*RefersToEnclosingVariableOrCapture=*/false, NameLoc: Loc, T: Ty, VK: VK_LValue);
5548 QualType ElemType = C.getBaseElementType(QT: Ty);
5549 auto *InitVD = ImplicitParamDecl::Create(C, DC: CD, IdLoc: Loc, /*Id=*/nullptr, T: ElemType,
5550 ParamKind: ImplicitParamKind::Other);
5551 auto *InitRef = DeclRefExpr::Create(
5552 Context: C, QualifierLoc: NestedNameSpecifierLoc(), TemplateKWLoc: SourceLocation(), D: InitVD,
5553 /*RefersToEnclosingVariableOrCapture=*/false, NameLoc: Loc, T: ElemType, VK: VK_LValue);
5554 PrivateVD->setInitStyle(VarDecl::CInit);
5555 PrivateVD->setInit(ImplicitCastExpr::Create(Context: C, T: ElemType, Kind: CK_LValueToRValue,
5556 Operand: InitRef, /*BasePath=*/nullptr,
5557 Cat: VK_PRValue, FPO: FPOptionsOverride()));
5558 Data.FirstprivateVars.emplace_back(Args&: OrigRef);
5559 Data.FirstprivateCopies.emplace_back(Args&: PrivateRef);
5560 Data.FirstprivateInits.emplace_back(Args&: InitRef);
5561 return OrigVD;
5562}
5563
5564void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
5565 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
5566 OMPTargetDataInfo &InputInfo) {
5567 // Emit outlined function for task construct.
5568 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_task);
5569 Address CapturedStruct = GenerateCapturedStmtArgument(S: *CS);
5570 CanQualType SharedsTy =
5571 getContext().getCanonicalTagType(TD: CS->getCapturedRecordDecl());
5572 auto I = CS->getCapturedDecl()->param_begin();
5573 auto PartId = std::next(x: I);
5574 auto TaskT = std::next(x: I, n: 4);
5575 OMPTaskDataTy Data;
5576 // The task is not final.
5577 Data.Final.setInt(/*IntVal=*/false);
5578 // Get list of firstprivate variables.
5579 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
5580 auto IRef = C->varlist_begin();
5581 auto IElemInitRef = C->inits().begin();
5582 for (auto *IInit : C->private_copies()) {
5583 Data.FirstprivateVars.push_back(Elt: *IRef);
5584 Data.FirstprivateCopies.push_back(Elt: IInit);
5585 Data.FirstprivateInits.push_back(Elt: *IElemInitRef);
5586 ++IRef;
5587 ++IElemInitRef;
5588 }
5589 }
5590 SmallVector<const Expr *, 4> LHSs;
5591 SmallVector<const Expr *, 4> RHSs;
5592 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5593 Data.ReductionVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5594 Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5595 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
5596 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
5597 in_end: C->reduction_ops().end());
5598 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
5599 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
5600 }
5601 OMPPrivateScope TargetScope(*this);
5602 VarDecl *BPVD = nullptr;
5603 VarDecl *PVD = nullptr;
5604 VarDecl *SVD = nullptr;
5605 VarDecl *MVD = nullptr;
5606 if (InputInfo.NumberOfTargetItems > 0) {
5607 auto *CD = CapturedDecl::Create(
5608 C&: getContext(), DC: getContext().getTranslationUnitDecl(), /*NumParams=*/0);
5609 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
5610 QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
5611 EltTy: getContext().VoidPtrTy, ArySize: ArrSize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
5612 /*IndexTypeQuals=*/0);
5613 BPVD = createImplicitFirstprivateForType(
5614 C&: getContext(), Data, Ty: BaseAndPointerAndMapperType, CD, Loc: S.getBeginLoc());
5615 PVD = createImplicitFirstprivateForType(
5616 C&: getContext(), Data, Ty: BaseAndPointerAndMapperType, CD, Loc: S.getBeginLoc());
5617 QualType SizesType = getContext().getConstantArrayType(
5618 EltTy: getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
5619 ArySize: ArrSize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
5620 /*IndexTypeQuals=*/0);
5621 SVD = createImplicitFirstprivateForType(C&: getContext(), Data, Ty: SizesType, CD,
5622 Loc: S.getBeginLoc());
5623 TargetScope.addPrivate(LocalVD: BPVD, Addr: InputInfo.BasePointersArray);
5624 TargetScope.addPrivate(LocalVD: PVD, Addr: InputInfo.PointersArray);
5625 TargetScope.addPrivate(LocalVD: SVD, Addr: InputInfo.SizesArray);
5626 // If there is no user-defined mapper, the mapper array will be nullptr. In
5627 // this case, we don't need to privatize it.
5628 if (!isa_and_nonnull<llvm::ConstantPointerNull>(
5629 Val: InputInfo.MappersArray.emitRawPointer(CGF&: *this))) {
5630 MVD = createImplicitFirstprivateForType(
5631 C&: getContext(), Data, Ty: BaseAndPointerAndMapperType, CD, Loc: S.getBeginLoc());
5632 TargetScope.addPrivate(LocalVD: MVD, Addr: InputInfo.MappersArray);
5633 }
5634 }
5635 (void)TargetScope.Privatize();
5636 buildDependences(S, Data);
5637 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
5638 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD, EKind,
5639 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
5640 // Set proper addresses for generated private copies.
5641 OMPPrivateScope Scope(CGF);
5642 if (!Data.FirstprivateVars.empty()) {
5643 enum { PrivatesParam = 2, CopyFnParam = 3 };
5644 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
5645 Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: CopyFnParam)));
5646 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(
5647 VD: CS->getCapturedDecl()->getParam(i: PrivatesParam)));
5648 // Map privates.
5649 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
5650 llvm::SmallVector<llvm::Value *, 16> CallArgs;
5651 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
5652 CallArgs.push_back(Elt: PrivatesPtr);
5653 ParamTypes.push_back(Elt: PrivatesPtr->getType());
5654 for (const Expr *E : Data.FirstprivateVars) {
5655 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5656 RawAddress PrivatePtr =
5657 CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()),
5658 Name: ".firstpriv.ptr.addr");
5659 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5660 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5661 ParamTypes.push_back(Elt: PrivatePtr.getType());
5662 }
5663 auto *CopyFnTy = llvm::FunctionType::get(Result: CGF.Builder.getVoidTy(),
5664 Params: ParamTypes, /*isVarArg=*/false);
5665 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
5666 CGF, Loc: S.getBeginLoc(), OutlinedFn: {CopyFnTy, CopyFn}, Args: CallArgs);
5667 for (const auto &Pair : PrivatePtrs) {
5668 Address Replacement(
5669 CGF.Builder.CreateLoad(Addr: Pair.second),
5670 CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()),
5671 CGF.getContext().getDeclAlign(D: Pair.first));
5672 Scope.addPrivate(LocalVD: Pair.first, Addr: Replacement);
5673 }
5674 }
5675 CGF.processInReduction(S, Data, CGF, CS, Scope);
5676 if (InputInfo.NumberOfTargetItems > 0) {
5677 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
5678 Addr: CGF.GetAddrOfLocalVar(VD: BPVD), /*Index=*/0);
5679 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
5680 Addr: CGF.GetAddrOfLocalVar(VD: PVD), /*Index=*/0);
5681 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
5682 Addr: CGF.GetAddrOfLocalVar(VD: SVD), /*Index=*/0);
5683 // If MVD is nullptr, the mapper array is not privatized
5684 if (MVD)
5685 InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
5686 Addr: CGF.GetAddrOfLocalVar(VD: MVD), /*Index=*/0);
5687 }
5688
5689 Action.Enter(CGF);
5690 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
5691 auto *TL = S.getSingleClause<OMPThreadLimitClause>();
5692 if (CGF.CGM.getLangOpts().OpenMP >= 51 &&
5693 needsTaskBasedThreadLimit(DKind: EKind) && TL) {
5694 // Emit __kmpc_set_thread_limit() to set the thread_limit for the task
5695 // enclosing this target region. This will indirectly set the thread_limit
5696 // for every applicable construct within target region.
5697 CGF.CGM.getOpenMPRuntime().emitThreadLimitClause(
5698 CGF, ThreadLimit: TL->getThreadLimit().front(), Loc: S.getBeginLoc());
5699 }
5700 BodyGen(CGF);
5701 };
5702 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
5703 D: S, ThreadIDVar: *I, PartIDVar: *PartId, TaskTVar: *TaskT, InnermostKind: EKind, CodeGen, /*Tied=*/true,
5704 NumberOfParts&: Data.NumberOfParts);
5705 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
5706 IntegerLiteral IfCond(getContext(), TrueOrFalse,
5707 getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/0),
5708 SourceLocation());
5709 CGM.getOpenMPRuntime().emitTaskCall(CGF&: *this, Loc: S.getBeginLoc(), D: S, TaskFunction: OutlinedFn,
5710 SharedsTy, Shareds: CapturedStruct, IfCond: &IfCond, Data);
5711}
5712
5713void CodeGenFunction::processInReduction(const OMPExecutableDirective &S,
5714 OMPTaskDataTy &Data,
5715 CodeGenFunction &CGF,
5716 const CapturedStmt *CS,
5717 OMPPrivateScope &Scope) {
5718 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
5719 if (Data.Reductions) {
5720 OpenMPDirectiveKind CapturedRegion = EKind;
5721 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
5722 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
5723 Data.ReductionCopies, Data.ReductionOps);
5724 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
5725 Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: 4)));
5726 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
5727 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
5728 RedCG.emitAggregateType(CGF, N: Cnt);
5729 // FIXME: This must removed once the runtime library is fixed.
5730 // Emit required threadprivate variables for
5731 // initializer/combiner/finalizer.
5732 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5733 RCG&: RedCG, N: Cnt);
5734 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5735 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5736 Replacement = Address(
5737 CGF.EmitScalarConversion(Src: Replacement.emitRawPointer(CGF),
5738 SrcTy: CGF.getContext().VoidPtrTy,
5739 DstTy: CGF.getContext().getPointerType(
5740 T: Data.ReductionCopies[Cnt]->getType()),
5741 Loc: Data.ReductionCopies[Cnt]->getExprLoc()),
5742 CGF.ConvertTypeForMem(T: Data.ReductionCopies[Cnt]->getType()),
5743 Replacement.getAlignment());
5744 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5745 Scope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5746 }
5747 }
5748 (void)Scope.Privatize();
5749 SmallVector<const Expr *, 4> InRedVars;
5750 SmallVector<const Expr *, 4> InRedPrivs;
5751 SmallVector<const Expr *, 4> InRedOps;
5752 SmallVector<const Expr *, 4> TaskgroupDescriptors;
5753 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5754 auto IPriv = C->privates().begin();
5755 auto IRed = C->reduction_ops().begin();
5756 auto ITD = C->taskgroup_descriptors().begin();
5757 for (const Expr *Ref : C->varlist()) {
5758 InRedVars.emplace_back(Args&: Ref);
5759 InRedPrivs.emplace_back(Args: *IPriv);
5760 InRedOps.emplace_back(Args: *IRed);
5761 TaskgroupDescriptors.emplace_back(Args: *ITD);
5762 std::advance(i&: IPriv, n: 1);
5763 std::advance(i&: IRed, n: 1);
5764 std::advance(i&: ITD, n: 1);
5765 }
5766 }
5767 OMPPrivateScope InRedScope(CGF);
5768 if (!InRedVars.empty()) {
5769 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
5770 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
5771 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
5772 RedCG.emitAggregateType(CGF, N: Cnt);
5773 // FIXME: This must removed once the runtime library is fixed.
5774 // Emit required threadprivate variables for
5775 // initializer/combiner/finalizer.
5776 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5777 RCG&: RedCG, N: Cnt);
5778 llvm::Value *ReductionsPtr;
5779 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
5780 ReductionsPtr =
5781 CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: TRExpr), Loc: TRExpr->getExprLoc());
5782 } else {
5783 ReductionsPtr = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
5784 }
5785 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5786 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5787 Replacement = Address(
5788 CGF.EmitScalarConversion(
5789 Src: Replacement.emitRawPointer(CGF), SrcTy: CGF.getContext().VoidPtrTy,
5790 DstTy: CGF.getContext().getPointerType(T: InRedPrivs[Cnt]->getType()),
5791 Loc: InRedPrivs[Cnt]->getExprLoc()),
5792 CGF.ConvertTypeForMem(T: InRedPrivs[Cnt]->getType()),
5793 Replacement.getAlignment());
5794 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5795 InRedScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5796 }
5797 }
5798 (void)InRedScope.Privatize();
5799}
5800
5801void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
5802 // Emit outlined function for task construct.
5803 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_task);
5804 Address CapturedStruct = GenerateCapturedStmtArgument(S: *CS);
5805 CanQualType SharedsTy =
5806 getContext().getCanonicalTagType(TD: CS->getCapturedRecordDecl());
5807 const Expr *IfCond = nullptr;
5808 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
5809 if (C->getNameModifier() == OMPD_unknown ||
5810 C->getNameModifier() == OMPD_task) {
5811 IfCond = C->getCondition();
5812 break;
5813 }
5814 }
5815
5816 OMPTaskDataTy Data;
5817 // Check if we should emit tied or untied task.
5818 Data.Tied = !S.getSingleClause<OMPUntiedClause>();
5819 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
5820 CGF.EmitStmt(S: CS->getCapturedStmt());
5821 };
5822 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
5823 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
5824 const OMPTaskDataTy &Data) {
5825 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, Loc: S.getBeginLoc(), D: S, TaskFunction: OutlinedFn,
5826 SharedsTy, Shareds: CapturedStruct, IfCond,
5827 Data);
5828 };
5829 auto LPCRegion =
5830 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
5831 EmitOMPTaskBasedDirective(S, CapturedRegion: OMPD_task, BodyGen, TaskGen, Data);
5832}
5833
5834void CodeGenFunction::EmitOMPTaskyieldDirective(
5835 const OMPTaskyieldDirective &S) {
5836 CGM.getOpenMPRuntime().emitTaskyieldCall(CGF&: *this, Loc: S.getBeginLoc());
5837}
5838
5839void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) {
5840 const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>();
5841 Expr *ME = MC ? MC->getMessageString() : nullptr;
5842 const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>();
5843 bool IsFatal = false;
5844 if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal)
5845 IsFatal = true;
5846 CGM.getOpenMPRuntime().emitErrorCall(CGF&: *this, Loc: S.getBeginLoc(), ME, IsFatal);
5847}
5848
5849void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
5850 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_barrier);
5851}
5852
5853void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
5854 OMPTaskDataTy Data;
5855 // Build list of dependences
5856 buildDependences(S, Data);
5857 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
5858 CGM.getOpenMPRuntime().emitTaskwaitCall(CGF&: *this, Loc: S.getBeginLoc(), Data);
5859}
5860
5861static bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) {
5862 return T.clauses().empty();
5863}
5864
5865void CodeGenFunction::EmitOMPTaskgroupDirective(
5866 const OMPTaskgroupDirective &S) {
5867 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5868 if (CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(T: S)) {
5869 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5870 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5871 InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
5872 AllocaInsertPt->getIterator());
5873
5874 auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
5875 InsertPointTy CodeGenIP) {
5876 Builder.restoreIP(IP: CodeGenIP);
5877 EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
5878 return llvm::Error::success();
5879 };
5880 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
5881 if (!CapturedStmtInfo)
5882 CapturedStmtInfo = &CapStmtInfo;
5883 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
5884 cantFail(ValOrErr: OMPBuilder.createTaskgroup(Loc: Builder, AllocaIP, BodyGenCB));
5885 Builder.restoreIP(IP: AfterIP);
5886 return;
5887 }
5888 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5889 Action.Enter(CGF);
5890 if (const Expr *E = S.getReductionRef()) {
5891 SmallVector<const Expr *, 4> LHSs;
5892 SmallVector<const Expr *, 4> RHSs;
5893 OMPTaskDataTy Data;
5894 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
5895 Data.ReductionVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5896 Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5897 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
5898 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
5899 in_end: C->reduction_ops().end());
5900 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
5901 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
5902 }
5903 llvm::Value *ReductionDesc =
5904 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, Loc: S.getBeginLoc(),
5905 LHSExprs: LHSs, RHSExprs: RHSs, Data);
5906 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5907 CGF.EmitVarDecl(D: *VD);
5908 CGF.EmitStoreOfScalar(Value: ReductionDesc, Addr: CGF.GetAddrOfLocalVar(VD),
5909 /*Volatile=*/false, Ty: E->getType());
5910 }
5911 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
5912 };
5913 CGM.getOpenMPRuntime().emitTaskgroupRegion(CGF&: *this, TaskgroupOpGen: CodeGen, Loc: S.getBeginLoc());
5914}
5915
5916void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
5917 llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
5918 ? llvm::AtomicOrdering::NotAtomic
5919 : llvm::AtomicOrdering::AcquireRelease;
5920 CGM.getOpenMPRuntime().emitFlush(
5921 CGF&: *this,
5922 Vars: [&S]() -> ArrayRef<const Expr *> {
5923 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
5924 return llvm::ArrayRef(FlushClause->varlist_begin(),
5925 FlushClause->varlist_end());
5926 return {};
5927 }(),
5928 Loc: S.getBeginLoc(), AO);
5929}
5930
5931void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
5932 const auto *DO = S.getSingleClause<OMPDepobjClause>();
5933 LValue DOLVal = EmitLValue(E: DO->getDepobj());
5934 if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
5935 // Build list and emit dependences
5936 OMPTaskDataTy Data;
5937 buildDependences(S, Data);
5938 for (auto &Dep : Data.Dependences) {
5939 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
5940 CGF&: *this, Dependencies: Dep, Loc: DC->getBeginLoc());
5941 EmitStoreOfScalar(value: DepAddr.emitRawPointer(CGF&: *this), lvalue: DOLVal);
5942 }
5943 return;
5944 }
5945 if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
5946 CGM.getOpenMPRuntime().emitDestroyClause(CGF&: *this, DepobjLVal: DOLVal, Loc: DC->getBeginLoc());
5947 return;
5948 }
5949 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
5950 CGM.getOpenMPRuntime().emitUpdateClause(
5951 CGF&: *this, DepobjLVal: DOLVal, NewDepKind: UC->getDependencyKind(), Loc: UC->getBeginLoc());
5952 return;
5953 }
5954}
5955
5956void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
5957 if (!OMPParentLoopDirectiveForScan)
5958 return;
5959 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
5960 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
5961 SmallVector<const Expr *, 4> Shareds;
5962 SmallVector<const Expr *, 4> Privates;
5963 SmallVector<const Expr *, 4> LHSs;
5964 SmallVector<const Expr *, 4> RHSs;
5965 SmallVector<const Expr *, 4> ReductionOps;
5966 SmallVector<const Expr *, 4> CopyOps;
5967 SmallVector<const Expr *, 4> CopyArrayTemps;
5968 SmallVector<const Expr *, 4> CopyArrayElems;
5969 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
5970 if (C->getModifier() != OMPC_REDUCTION_inscan)
5971 continue;
5972 Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5973 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
5974 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
5975 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
5976 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
5977 CopyOps.append(in_start: C->copy_ops().begin(), in_end: C->copy_ops().end());
5978 CopyArrayTemps.append(in_start: C->copy_array_temps().begin(),
5979 in_end: C->copy_array_temps().end());
5980 CopyArrayElems.append(in_start: C->copy_array_elems().begin(),
5981 in_end: C->copy_array_elems().end());
5982 }
5983 if (ParentDir.getDirectiveKind() == OMPD_simd ||
5984 (getLangOpts().OpenMPSimd &&
5985 isOpenMPSimdDirective(DKind: ParentDir.getDirectiveKind()))) {
5986 // For simd directive and simd-based directives in simd only mode, use the
5987 // following codegen:
5988 // int x = 0;
5989 // #pragma omp simd reduction(inscan, +: x)
5990 // for (..) {
5991 // <first part>
5992 // #pragma omp scan inclusive(x)
5993 // <second part>
5994 // }
5995 // is transformed to:
5996 // int x = 0;
5997 // for (..) {
5998 // int x_priv = 0;
5999 // <first part>
6000 // x = x_priv + x;
6001 // x_priv = x;
6002 // <second part>
6003 // }
6004 // and
6005 // int x = 0;
6006 // #pragma omp simd reduction(inscan, +: x)
6007 // for (..) {
6008 // <first part>
6009 // #pragma omp scan exclusive(x)
6010 // <second part>
6011 // }
6012 // to
6013 // int x = 0;
6014 // for (..) {
6015 // int x_priv = 0;
6016 // <second part>
6017 // int temp = x;
6018 // x = x_priv + x;
6019 // x_priv = temp;
6020 // <first part>
6021 // }
6022 llvm::BasicBlock *OMPScanReduce = createBasicBlock(name: "omp.inscan.reduce");
6023 EmitBranch(Block: IsInclusive
6024 ? OMPScanReduce
6025 : BreakContinueStack.back().ContinueBlock.getBlock());
6026 EmitBlock(BB: OMPScanDispatch);
6027 {
6028 // New scope for correct construction/destruction of temp variables for
6029 // exclusive scan.
6030 LexicalScope Scope(*this, S.getSourceRange());
6031 EmitBranch(Block: IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
6032 EmitBlock(BB: OMPScanReduce);
6033 if (!IsInclusive) {
6034 // Create temp var and copy LHS value to this temp value.
6035 // TMP = LHS;
6036 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
6037 const Expr *PrivateExpr = Privates[I];
6038 const Expr *TempExpr = CopyArrayTemps[I];
6039 EmitAutoVarDecl(
6040 D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: TempExpr)->getDecl()));
6041 LValue DestLVal = EmitLValue(E: TempExpr);
6042 LValue SrcLVal = EmitLValue(E: LHSs[I]);
6043 EmitOMPCopy(OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(),
6044 SrcAddr: SrcLVal.getAddress(),
6045 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
6046 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()),
6047 Copy: CopyOps[I]);
6048 }
6049 }
6050 CGM.getOpenMPRuntime().emitReduction(
6051 CGF&: *this, Loc: ParentDir.getEndLoc(), Privates, LHSExprs: LHSs, RHSExprs: RHSs, ReductionOps,
6052 Options: {/*WithNowait=*/true, /*SimpleReduction=*/true,
6053 /*IsPrivateVarReduction*/ {}, .ReductionKind: OMPD_simd});
6054 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
6055 const Expr *PrivateExpr = Privates[I];
6056 LValue DestLVal;
6057 LValue SrcLVal;
6058 if (IsInclusive) {
6059 DestLVal = EmitLValue(E: RHSs[I]);
6060 SrcLVal = EmitLValue(E: LHSs[I]);
6061 } else {
6062 const Expr *TempExpr = CopyArrayTemps[I];
6063 DestLVal = EmitLValue(E: RHSs[I]);
6064 SrcLVal = EmitLValue(E: TempExpr);
6065 }
6066 EmitOMPCopy(
6067 OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(),
6068 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
6069 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]);
6070 }
6071 }
6072 EmitBranch(Block: IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
6073 OMPScanExitBlock = IsInclusive
6074 ? BreakContinueStack.back().ContinueBlock.getBlock()
6075 : OMPScanReduce;
6076 EmitBlock(BB: OMPAfterScanBlock);
6077 return;
6078 }
6079 if (!IsInclusive) {
6080 EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock());
6081 EmitBlock(BB: OMPScanExitBlock);
6082 }
6083 if (OMPFirstScanLoop) {
6084 // Emit buffer[i] = red; at the end of the input phase.
6085 const auto *IVExpr = cast<OMPLoopDirective>(Val: ParentDir)
6086 .getIterationVariable()
6087 ->IgnoreParenImpCasts();
6088 LValue IdxLVal = EmitLValue(E: IVExpr);
6089 llvm::Value *IdxVal = EmitLoadOfScalar(lvalue: IdxLVal, Loc: IVExpr->getExprLoc());
6090 IdxVal = Builder.CreateIntCast(V: IdxVal, DestTy: SizeTy, /*isSigned=*/false);
6091 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
6092 const Expr *PrivateExpr = Privates[I];
6093 const Expr *OrigExpr = Shareds[I];
6094 const Expr *CopyArrayElem = CopyArrayElems[I];
6095 OpaqueValueMapping IdxMapping(
6096 *this,
6097 cast<OpaqueValueExpr>(
6098 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
6099 RValue::get(V: IdxVal));
6100 LValue DestLVal = EmitLValue(E: CopyArrayElem);
6101 LValue SrcLVal = EmitLValue(E: OrigExpr);
6102 EmitOMPCopy(
6103 OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(),
6104 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
6105 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]);
6106 }
6107 }
6108 EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock());
6109 if (IsInclusive) {
6110 EmitBlock(BB: OMPScanExitBlock);
6111 EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock());
6112 }
6113 EmitBlock(BB: OMPScanDispatch);
6114 if (!OMPFirstScanLoop) {
6115 // Emit red = buffer[i]; at the entrance to the scan phase.
6116 const auto *IVExpr = cast<OMPLoopDirective>(Val: ParentDir)
6117 .getIterationVariable()
6118 ->IgnoreParenImpCasts();
6119 LValue IdxLVal = EmitLValue(E: IVExpr);
6120 llvm::Value *IdxVal = EmitLoadOfScalar(lvalue: IdxLVal, Loc: IVExpr->getExprLoc());
6121 IdxVal = Builder.CreateIntCast(V: IdxVal, DestTy: SizeTy, /*isSigned=*/false);
6122 llvm::BasicBlock *ExclusiveExitBB = nullptr;
6123 if (!IsInclusive) {
6124 llvm::BasicBlock *ContBB = createBasicBlock(name: "omp.exclusive.dec");
6125 ExclusiveExitBB = createBasicBlock(name: "omp.exclusive.copy.exit");
6126 llvm::Value *Cmp = Builder.CreateIsNull(Arg: IdxVal);
6127 Builder.CreateCondBr(Cond: Cmp, True: ExclusiveExitBB, False: ContBB);
6128 EmitBlock(BB: ContBB);
6129 // Use idx - 1 iteration for exclusive scan.
6130 IdxVal = Builder.CreateNUWSub(LHS: IdxVal, RHS: llvm::ConstantInt::get(Ty: SizeTy, V: 1));
6131 }
6132 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
6133 const Expr *PrivateExpr = Privates[I];
6134 const Expr *OrigExpr = Shareds[I];
6135 const Expr *CopyArrayElem = CopyArrayElems[I];
6136 OpaqueValueMapping IdxMapping(
6137 *this,
6138 cast<OpaqueValueExpr>(
6139 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
6140 RValue::get(V: IdxVal));
6141 LValue SrcLVal = EmitLValue(E: CopyArrayElem);
6142 LValue DestLVal = EmitLValue(E: OrigExpr);
6143 EmitOMPCopy(
6144 OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(),
6145 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
6146 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]);
6147 }
6148 if (!IsInclusive) {
6149 EmitBlock(BB: ExclusiveExitBB);
6150 }
6151 }
6152 EmitBranch(Block: (OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
6153 : OMPAfterScanBlock);
6154 EmitBlock(BB: OMPAfterScanBlock);
6155}
6156
6157void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
6158 const CodeGenLoopTy &CodeGenLoop,
6159 Expr *IncExpr) {
6160 // Emit the loop iteration variable.
6161 const auto *IVExpr = cast<DeclRefExpr>(Val: S.getIterationVariable());
6162 const auto *IVDecl = cast<VarDecl>(Val: IVExpr->getDecl());
6163 EmitVarDecl(D: *IVDecl);
6164
6165 // Emit the iterations count variable.
6166 // If it is not a variable, Sema decided to calculate iterations count on each
6167 // iteration (e.g., it is foldable into a constant).
6168 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
6169 EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
6170 // Emit calculation of the iterations count.
6171 EmitIgnoredExpr(E: S.getCalcLastIteration());
6172 }
6173
6174 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
6175
6176 bool HasLastprivateClause = false;
6177 // Check pre-condition.
6178 {
6179 OMPLoopScope PreInitScope(*this, S);
6180 // Skip the entire loop if we don't meet the precondition.
6181 // If the condition constant folds and can be elided, avoid emitting the
6182 // whole loop.
6183 bool CondConstant;
6184 llvm::BasicBlock *ContBlock = nullptr;
6185 if (ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
6186 if (!CondConstant)
6187 return;
6188 } else {
6189 llvm::BasicBlock *ThenBlock = createBasicBlock(name: "omp.precond.then");
6190 ContBlock = createBasicBlock(name: "omp.precond.end");
6191 emitPreCond(CGF&: *this, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
6192 TrueCount: getProfileCount(S: &S));
6193 EmitBlock(BB: ThenBlock);
6194 incrementProfileCounter(S: &S);
6195 }
6196
6197 emitAlignedClause(CGF&: *this, D: S);
6198 // Emit 'then' code.
6199 {
6200 // Emit helper vars inits.
6201
6202 LValue LB = EmitOMPHelperVar(
6203 CGF&: *this, Helper: cast<DeclRefExpr>(
6204 Val: (isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
6205 ? S.getCombinedLowerBoundVariable()
6206 : S.getLowerBoundVariable())));
6207 LValue UB = EmitOMPHelperVar(
6208 CGF&: *this, Helper: cast<DeclRefExpr>(
6209 Val: (isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
6210 ? S.getCombinedUpperBoundVariable()
6211 : S.getUpperBoundVariable())));
6212 LValue ST =
6213 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable()));
6214 LValue IL =
6215 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable()));
6216
6217 OMPPrivateScope LoopScope(*this);
6218 if (EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope)) {
6219 // Emit implicit barrier to synchronize threads and avoid data races
6220 // on initialization of firstprivate variables and post-update of
6221 // lastprivate variables.
6222 CGM.getOpenMPRuntime().emitBarrierCall(
6223 CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
6224 /*ForceSimpleCall=*/true);
6225 }
6226 EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
6227 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()) &&
6228 !isOpenMPParallelDirective(DKind: S.getDirectiveKind()) &&
6229 !isOpenMPTeamsDirective(DKind: S.getDirectiveKind()))
6230 EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
6231 HasLastprivateClause = EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
6232 EmitOMPPrivateLoopCounters(S, LoopScope);
6233 (void)LoopScope.Privatize();
6234 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
6235 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF&: *this, D: S);
6236
6237 // Detect the distribute schedule kind and chunk.
6238 llvm::Value *Chunk = nullptr;
6239 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
6240 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
6241 ScheduleKind = C->getDistScheduleKind();
6242 if (const Expr *Ch = C->getChunkSize()) {
6243 Chunk = EmitScalarExpr(E: Ch);
6244 Chunk = EmitScalarConversion(Src: Chunk, SrcTy: Ch->getType(),
6245 DstTy: S.getIterationVariable()->getType(),
6246 Loc: S.getBeginLoc());
6247 }
6248 } else {
6249 // Default behaviour for dist_schedule clause.
6250 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
6251 CGF&: *this, S, ScheduleKind, Chunk);
6252 }
6253 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
6254 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
6255
6256 // OpenMP [2.10.8, distribute Construct, Description]
6257 // If dist_schedule is specified, kind must be static. If specified,
6258 // iterations are divided into chunks of size chunk_size, chunks are
6259 // assigned to the teams of the league in a round-robin fashion in the
6260 // order of the team number. When no chunk_size is specified, the
6261 // iteration space is divided into chunks that are approximately equal
6262 // in size, and at most one chunk is distributed to each team of the
6263 // league. The size of the chunks is unspecified in this case.
6264 bool StaticChunked =
6265 RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
6266 isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind());
6267 if (RT.isStaticNonchunked(ScheduleKind,
6268 /* Chunked */ Chunk != nullptr) ||
6269 StaticChunked) {
6270 CGOpenMPRuntime::StaticRTInput StaticInit(
6271 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
6272 LB.getAddress(), UB.getAddress(), ST.getAddress(),
6273 StaticChunked ? Chunk : nullptr);
6274 RT.emitDistributeStaticInit(CGF&: *this, Loc: S.getBeginLoc(), SchedKind: ScheduleKind,
6275 Values: StaticInit);
6276 JumpDest LoopExit =
6277 getJumpDestInCurrentScope(Target: createBasicBlock(name: "omp.loop.exit"));
6278 // UB = min(UB, GlobalUB);
6279 EmitIgnoredExpr(E: isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
6280 ? S.getCombinedEnsureUpperBound()
6281 : S.getEnsureUpperBound());
6282 // IV = LB;
6283 EmitIgnoredExpr(E: isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
6284 ? S.getCombinedInit()
6285 : S.getInit());
6286
6287 const Expr *Cond =
6288 isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
6289 ? S.getCombinedCond()
6290 : S.getCond();
6291
6292 if (StaticChunked)
6293 Cond = S.getCombinedDistCond();
6294
6295 // For static unchunked schedules generate:
6296 //
6297 // 1. For distribute alone, codegen
6298 // while (idx <= UB) {
6299 // BODY;
6300 // ++idx;
6301 // }
6302 //
6303 // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
6304 // while (idx <= UB) {
6305 // <CodeGen rest of pragma>(LB, UB);
6306 // idx += ST;
6307 // }
6308 //
6309 // For static chunk one schedule generate:
6310 //
6311 // while (IV <= GlobalUB) {
6312 // <CodeGen rest of pragma>(LB, UB);
6313 // LB += ST;
6314 // UB += ST;
6315 // UB = min(UB, GlobalUB);
6316 // IV = LB;
6317 // }
6318 //
6319 emitCommonSimdLoop(
6320 CGF&: *this, S,
6321 SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6322 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()))
6323 CGF.EmitOMPSimdInit(D: S);
6324 },
6325 BodyCodeGen: [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
6326 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
6327 CGF.EmitOMPInnerLoop(
6328 S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: Cond, IncExpr,
6329 BodyGen: [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
6330 CodeGenLoop(CGF, S, LoopExit);
6331 },
6332 PostIncGen: [&S, StaticChunked](CodeGenFunction &CGF) {
6333 if (StaticChunked) {
6334 CGF.EmitIgnoredExpr(E: S.getCombinedNextLowerBound());
6335 CGF.EmitIgnoredExpr(E: S.getCombinedNextUpperBound());
6336 CGF.EmitIgnoredExpr(E: S.getCombinedEnsureUpperBound());
6337 CGF.EmitIgnoredExpr(E: S.getCombinedInit());
6338 }
6339 });
6340 });
6341 EmitBlock(BB: LoopExit.getBlock());
6342 // Tell the runtime we are done.
6343 RT.emitForStaticFinish(CGF&: *this, Loc: S.getEndLoc(), DKind: OMPD_distribute);
6344 } else {
6345 // Emit the outer loop, which requests its work chunk [LB..UB] from
6346 // runtime and runs the inner loop to process it.
6347 const OMPLoopArguments LoopArguments = {
6348 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
6349 Chunk};
6350 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArgs: LoopArguments,
6351 CodeGenLoopContent: CodeGenLoop);
6352 }
6353 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind())) {
6354 EmitOMPSimdFinal(D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
6355 return CGF.Builder.CreateIsNotNull(
6356 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
6357 });
6358 }
6359 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()) &&
6360 !isOpenMPParallelDirective(DKind: S.getDirectiveKind()) &&
6361 !isOpenMPTeamsDirective(DKind: S.getDirectiveKind())) {
6362 EmitOMPReductionClauseFinal(D: S, ReductionKind: OMPD_simd);
6363 // Emit post-update of the reduction variables if IsLastIter != 0.
6364 emitPostUpdateForReductionClause(
6365 CGF&: *this, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
6366 return CGF.Builder.CreateIsNotNull(
6367 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
6368 });
6369 }
6370 // Emit final copy of the lastprivate variables if IsLastIter != 0.
6371 if (HasLastprivateClause) {
6372 EmitOMPLastprivateClauseFinal(
6373 D: S, /*NoFinals=*/false,
6374 IsLastIterCond: Builder.CreateIsNotNull(Arg: EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())));
6375 }
6376 }
6377
6378 // We're now done with the loop, so jump to the continuation block.
6379 if (ContBlock) {
6380 EmitBranch(Block: ContBlock);
6381 EmitBlock(BB: ContBlock, IsFinished: true);
6382 }
6383 }
6384}
6385
6386// Pass OMPLoopDirective (instead of OMPDistributeDirective) to make this
6387// function available for "loop bind(teams)", which maps to "distribute".
6388static void emitOMPDistributeDirective(const OMPLoopDirective &S,
6389 CodeGenFunction &CGF,
6390 CodeGenModule &CGM) {
6391 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6392 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
6393 };
6394 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
6395 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute, CodeGen);
6396}
6397
6398void CodeGenFunction::EmitOMPDistributeDirective(
6399 const OMPDistributeDirective &S) {
6400 emitOMPDistributeDirective(S, CGF&: *this, CGM);
6401}
6402
6403static llvm::Function *
6404emitOutlinedOrderedFunction(CodeGenModule &CGM, const CapturedStmt *S,
6405 const OMPExecutableDirective &D) {
6406 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
6407 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
6408 CGF.CapturedStmtInfo = &CapStmtInfo;
6409 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(S: *S, D);
6410 Fn->setDoesNotRecurse();
6411 return Fn;
6412}
6413
6414template <typename T>
6415static void emitRestoreIP(CodeGenFunction &CGF, const T *C,
6416 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
6417 llvm::OpenMPIRBuilder &OMPBuilder) {
6418
6419 unsigned NumLoops = C->getNumLoops();
6420 QualType Int64Ty = CGF.CGM.getContext().getIntTypeForBitwidth(
6421 /*DestWidth=*/64, /*Signed=*/1);
6422 llvm::SmallVector<llvm::Value *> StoreValues;
6423 for (unsigned I = 0; I < NumLoops; I++) {
6424 const Expr *CounterVal = C->getLoopData(I);
6425 assert(CounterVal);
6426 llvm::Value *StoreValue = CGF.EmitScalarConversion(
6427 Src: CGF.EmitScalarExpr(E: CounterVal), SrcTy: CounterVal->getType(), DstTy: Int64Ty,
6428 Loc: CounterVal->getExprLoc());
6429 StoreValues.emplace_back(Args&: StoreValue);
6430 }
6431 OMPDoacrossKind<T> ODK;
6432 bool IsDependSource = ODK.isSource(C);
6433 CGF.Builder.restoreIP(
6434 IP: OMPBuilder.createOrderedDepend(Loc: CGF.Builder, AllocaIP, NumLoops,
6435 StoreValues, Name: ".cnt.addr", IsDependSource));
6436}
6437
6438void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
6439 if (CGM.getLangOpts().OpenMPIRBuilder) {
6440 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
6441 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
6442
6443 if (S.hasClausesOfKind<OMPDependClause>() ||
6444 S.hasClausesOfKind<OMPDoacrossClause>()) {
6445 // The ordered directive with depend clause.
6446 assert(!S.hasAssociatedStmt() && "No associated statement must be in "
6447 "ordered depend|doacross construct.");
6448 InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
6449 AllocaInsertPt->getIterator());
6450 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
6451 emitRestoreIP(CGF&: *this, C: DC, AllocaIP, OMPBuilder);
6452 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
6453 emitRestoreIP(CGF&: *this, C: DC, AllocaIP, OMPBuilder);
6454 } else {
6455 // The ordered directive with threads or simd clause, or without clause.
6456 // Without clause, it behaves as if the threads clause is specified.
6457 const auto *C = S.getSingleClause<OMPSIMDClause>();
6458
6459 auto FiniCB = [this](InsertPointTy IP) {
6460 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
6461 return llvm::Error::success();
6462 };
6463
6464 auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP,
6465 InsertPointTy CodeGenIP) {
6466 Builder.restoreIP(IP: CodeGenIP);
6467
6468 const CapturedStmt *CS = S.getInnermostCapturedStmt();
6469 if (C) {
6470 llvm::BasicBlock *FiniBB = splitBBWithSuffix(
6471 Builder, /*CreateBranch=*/false, Suffix: ".ordered.after");
6472 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6473 GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
6474 llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, S: CS, D: S);
6475 assert(S.getBeginLoc().isValid() &&
6476 "Outlined function call location must be valid.");
6477 ApplyDebugLocation::CreateDefaultArtificial(CGF&: *this, TemporaryLocation: S.getBeginLoc());
6478 OMPBuilderCBHelpers::EmitCaptureStmt(CGF&: *this, CodeGenIP, FiniBB&: *FiniBB,
6479 Fn: OutlinedFn, Args: CapturedVars);
6480 } else {
6481 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
6482 CGF&: *this, RegionBodyStmt: CS->getCapturedStmt(), AllocaIP, CodeGenIP, RegionName: "ordered");
6483 }
6484 return llvm::Error::success();
6485 };
6486
6487 OMPLexicalScope Scope(*this, S, OMPD_unknown);
6488 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
6489 ValOrErr: OMPBuilder.createOrderedThreadsSimd(Loc: Builder, BodyGenCB, FiniCB, IsThreads: !C));
6490 Builder.restoreIP(IP: AfterIP);
6491 }
6492 return;
6493 }
6494
6495 if (S.hasClausesOfKind<OMPDependClause>()) {
6496 assert(!S.hasAssociatedStmt() &&
6497 "No associated statement must be in ordered depend construct.");
6498 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
6499 CGM.getOpenMPRuntime().emitDoacrossOrdered(CGF&: *this, C: DC);
6500 return;
6501 }
6502 if (S.hasClausesOfKind<OMPDoacrossClause>()) {
6503 assert(!S.hasAssociatedStmt() &&
6504 "No associated statement must be in ordered doacross construct.");
6505 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
6506 CGM.getOpenMPRuntime().emitDoacrossOrdered(CGF&: *this, C: DC);
6507 return;
6508 }
6509 const auto *C = S.getSingleClause<OMPSIMDClause>();
6510 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
6511 PrePostActionTy &Action) {
6512 const CapturedStmt *CS = S.getInnermostCapturedStmt();
6513 if (C) {
6514 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6515 CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
6516 llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, S: CS, D: S);
6517 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc: S.getBeginLoc(),
6518 OutlinedFn, Args: CapturedVars);
6519 } else {
6520 Action.Enter(CGF);
6521 CGF.EmitStmt(S: CS->getCapturedStmt());
6522 }
6523 };
6524 OMPLexicalScope Scope(*this, S, OMPD_unknown);
6525 CGM.getOpenMPRuntime().emitOrderedRegion(CGF&: *this, OrderedOpGen: CodeGen, Loc: S.getBeginLoc(), IsThreads: !C);
6526}
6527
6528static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
6529 QualType SrcType, QualType DestType,
6530 SourceLocation Loc) {
6531 assert(CGF.hasScalarEvaluationKind(DestType) &&
6532 "DestType must have scalar evaluation kind.");
6533 assert(!Val.isAggregate() && "Must be a scalar or complex.");
6534 return Val.isScalar() ? CGF.EmitScalarConversion(Src: Val.getScalarVal(), SrcTy: SrcType,
6535 DstTy: DestType, Loc)
6536 : CGF.EmitComplexToScalarConversion(
6537 Src: Val.getComplexVal(), SrcTy: SrcType, DstTy: DestType, Loc);
6538}
6539
6540static CodeGenFunction::ComplexPairTy
6541convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
6542 QualType DestType, SourceLocation Loc) {
6543 assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
6544 "DestType must have complex evaluation kind.");
6545 CodeGenFunction::ComplexPairTy ComplexVal;
6546 if (Val.isScalar()) {
6547 // Convert the input element to the element type of the complex.
6548 QualType DestElementType =
6549 DestType->castAs<ComplexType>()->getElementType();
6550 llvm::Value *ScalarVal = CGF.EmitScalarConversion(
6551 Src: Val.getScalarVal(), SrcTy: SrcType, DstTy: DestElementType, Loc);
6552 ComplexVal = CodeGenFunction::ComplexPairTy(
6553 ScalarVal, llvm::Constant::getNullValue(Ty: ScalarVal->getType()));
6554 } else {
6555 assert(Val.isComplex() && "Must be a scalar or complex.");
6556 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
6557 QualType DestElementType =
6558 DestType->castAs<ComplexType>()->getElementType();
6559 ComplexVal.first = CGF.EmitScalarConversion(
6560 Src: Val.getComplexVal().first, SrcTy: SrcElementType, DstTy: DestElementType, Loc);
6561 ComplexVal.second = CGF.EmitScalarConversion(
6562 Src: Val.getComplexVal().second, SrcTy: SrcElementType, DstTy: DestElementType, Loc);
6563 }
6564 return ComplexVal;
6565}
6566
6567static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6568 LValue LVal, RValue RVal) {
6569 if (LVal.isGlobalReg())
6570 CGF.EmitStoreThroughGlobalRegLValue(Src: RVal, Dst: LVal);
6571 else
6572 CGF.EmitAtomicStore(rvalue: RVal, lvalue: LVal, AO, IsVolatile: LVal.isVolatile(), /*isInit=*/false);
6573}
6574
6575static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF,
6576 llvm::AtomicOrdering AO, LValue LVal,
6577 SourceLocation Loc) {
6578 if (LVal.isGlobalReg())
6579 return CGF.EmitLoadOfLValue(V: LVal, Loc);
6580 return CGF.EmitAtomicLoad(
6581 lvalue: LVal, loc: Loc, AO: llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: AO),
6582 IsVolatile: LVal.isVolatile());
6583}
6584
6585void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
6586 QualType RValTy, SourceLocation Loc) {
6587 switch (getEvaluationKind(T: LVal.getType())) {
6588 case TEK_Scalar:
6589 EmitStoreThroughLValue(Src: RValue::get(V: convertToScalarValue(
6590 CGF&: *this, Val: RVal, SrcType: RValTy, DestType: LVal.getType(), Loc)),
6591 Dst: LVal);
6592 break;
6593 case TEK_Complex:
6594 EmitStoreOfComplex(
6595 V: convertToComplexValue(CGF&: *this, Val: RVal, SrcType: RValTy, DestType: LVal.getType(), Loc), dest: LVal,
6596 /*isInit=*/false);
6597 break;
6598 case TEK_Aggregate:
6599 llvm_unreachable("Must be a scalar or complex.");
6600 }
6601}
6602
6603static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6604 const Expr *X, const Expr *V,
6605 SourceLocation Loc) {
6606 // v = x;
6607 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
6608 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
6609 LValue XLValue = CGF.EmitLValue(E: X);
6610 LValue VLValue = CGF.EmitLValue(E: V);
6611 RValue Res = emitSimpleAtomicLoad(CGF, AO, LVal: XLValue, Loc);
6612 // OpenMP, 2.17.7, atomic Construct
6613 // If the read or capture clause is specified and the acquire, acq_rel, or
6614 // seq_cst clause is specified then the strong flush on exit from the atomic
6615 // operation is also an acquire flush.
6616 switch (AO) {
6617 case llvm::AtomicOrdering::Acquire:
6618 case llvm::AtomicOrdering::AcquireRelease:
6619 case llvm::AtomicOrdering::SequentiallyConsistent:
6620 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
6621 AO: llvm::AtomicOrdering::Acquire);
6622 break;
6623 case llvm::AtomicOrdering::Monotonic:
6624 case llvm::AtomicOrdering::Release:
6625 break;
6626 case llvm::AtomicOrdering::NotAtomic:
6627 case llvm::AtomicOrdering::Unordered:
6628 llvm_unreachable("Unexpected ordering.");
6629 }
6630 CGF.emitOMPSimpleStore(LVal: VLValue, RVal: Res, RValTy: X->getType().getNonReferenceType(), Loc);
6631 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: V);
6632}
6633
6634static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
6635 llvm::AtomicOrdering AO, const Expr *X,
6636 const Expr *E, SourceLocation Loc) {
6637 // x = expr;
6638 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
6639 emitSimpleAtomicStore(CGF, AO, LVal: CGF.EmitLValue(E: X), RVal: CGF.EmitAnyExpr(E));
6640 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6641 // OpenMP, 2.17.7, atomic Construct
6642 // If the write, update, or capture clause is specified and the release,
6643 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6644 // the atomic operation is also a release flush.
6645 switch (AO) {
6646 case llvm::AtomicOrdering::Release:
6647 case llvm::AtomicOrdering::AcquireRelease:
6648 case llvm::AtomicOrdering::SequentiallyConsistent:
6649 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
6650 AO: llvm::AtomicOrdering::Release);
6651 break;
6652 case llvm::AtomicOrdering::Acquire:
6653 case llvm::AtomicOrdering::Monotonic:
6654 break;
6655 case llvm::AtomicOrdering::NotAtomic:
6656 case llvm::AtomicOrdering::Unordered:
6657 llvm_unreachable("Unexpected ordering.");
6658 }
6659}
6660
6661static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
6662 RValue Update,
6663 BinaryOperatorKind BO,
6664 llvm::AtomicOrdering AO,
6665 bool IsXLHSInRHSPart) {
6666 ASTContext &Context = CGF.getContext();
6667 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
6668 // expression is simple and atomic is allowed for the given type for the
6669 // target platform.
6670 if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() ||
6671 (!isa<llvm::ConstantInt>(Val: Update.getScalarVal()) &&
6672 (Update.getScalarVal()->getType() != X.getAddress().getElementType())) ||
6673 !Context.getTargetInfo().hasBuiltinAtomic(
6674 AtomicSizeInBits: Context.getTypeSize(T: X.getType()), AlignmentInBits: Context.toBits(CharSize: X.getAlignment())))
6675 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6676
6677 auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) {
6678 if (T->isIntegerTy())
6679 return true;
6680
6681 if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub))
6682 return llvm::isPowerOf2_64(Value: CGF.CGM.getDataLayout().getTypeStoreSize(Ty: T));
6683
6684 return false;
6685 };
6686
6687 if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) ||
6688 !CheckAtomicSupport(X.getAddress().getElementType(), BO))
6689 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6690
6691 bool IsInteger = X.getAddress().getElementType()->isIntegerTy();
6692 llvm::AtomicRMWInst::BinOp RMWOp;
6693 switch (BO) {
6694 case BO_Add:
6695 RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd;
6696 break;
6697 case BO_Sub:
6698 if (!IsXLHSInRHSPart)
6699 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6700 RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub;
6701 break;
6702 case BO_And:
6703 RMWOp = llvm::AtomicRMWInst::And;
6704 break;
6705 case BO_Or:
6706 RMWOp = llvm::AtomicRMWInst::Or;
6707 break;
6708 case BO_Xor:
6709 RMWOp = llvm::AtomicRMWInst::Xor;
6710 break;
6711 case BO_LT:
6712 if (IsInteger)
6713 RMWOp = X.getType()->hasSignedIntegerRepresentation()
6714 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
6715 : llvm::AtomicRMWInst::Max)
6716 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
6717 : llvm::AtomicRMWInst::UMax);
6718 else
6719 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMin
6720 : llvm::AtomicRMWInst::FMax;
6721 break;
6722 case BO_GT:
6723 if (IsInteger)
6724 RMWOp = X.getType()->hasSignedIntegerRepresentation()
6725 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
6726 : llvm::AtomicRMWInst::Min)
6727 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
6728 : llvm::AtomicRMWInst::UMin);
6729 else
6730 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMax
6731 : llvm::AtomicRMWInst::FMin;
6732 break;
6733 case BO_Assign:
6734 RMWOp = llvm::AtomicRMWInst::Xchg;
6735 break;
6736 case BO_Mul:
6737 case BO_Div:
6738 case BO_Rem:
6739 case BO_Shl:
6740 case BO_Shr:
6741 case BO_LAnd:
6742 case BO_LOr:
6743 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6744 case BO_PtrMemD:
6745 case BO_PtrMemI:
6746 case BO_LE:
6747 case BO_GE:
6748 case BO_EQ:
6749 case BO_NE:
6750 case BO_Cmp:
6751 case BO_AddAssign:
6752 case BO_SubAssign:
6753 case BO_AndAssign:
6754 case BO_OrAssign:
6755 case BO_XorAssign:
6756 case BO_MulAssign:
6757 case BO_DivAssign:
6758 case BO_RemAssign:
6759 case BO_ShlAssign:
6760 case BO_ShrAssign:
6761 case BO_Comma:
6762 llvm_unreachable("Unsupported atomic update operation");
6763 }
6764 llvm::Value *UpdateVal = Update.getScalarVal();
6765 if (auto *IC = dyn_cast<llvm::ConstantInt>(Val: UpdateVal)) {
6766 if (IsInteger)
6767 UpdateVal = CGF.Builder.CreateIntCast(
6768 V: IC, DestTy: X.getAddress().getElementType(),
6769 isSigned: X.getType()->hasSignedIntegerRepresentation());
6770 else
6771 UpdateVal = CGF.Builder.CreateCast(Op: llvm::Instruction::CastOps::UIToFP, V: IC,
6772 DestTy: X.getAddress().getElementType());
6773 }
6774 llvm::AtomicRMWInst *Res =
6775 CGF.emitAtomicRMWInst(Op: RMWOp, Addr: X.getAddress(), Val: UpdateVal, Order: AO);
6776 return std::make_pair(x: true, y: RValue::get(V: Res));
6777}
6778
6779std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
6780 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
6781 llvm::AtomicOrdering AO, SourceLocation Loc,
6782 const llvm::function_ref<RValue(RValue)> CommonGen) {
6783 // Update expressions are allowed to have the following forms:
6784 // x binop= expr; -> xrval + expr;
6785 // x++, ++x -> xrval + 1;
6786 // x--, --x -> xrval - 1;
6787 // x = x binop expr; -> xrval binop expr
6788 // x = expr Op x; - > expr binop xrval;
6789 auto Res = emitOMPAtomicRMW(CGF&: *this, X, Update: E, BO, AO, IsXLHSInRHSPart);
6790 if (!Res.first) {
6791 if (X.isGlobalReg()) {
6792 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
6793 // 'xrval'.
6794 EmitStoreThroughLValue(Src: CommonGen(EmitLoadOfLValue(V: X, Loc)), Dst: X);
6795 } else {
6796 // Perform compare-and-swap procedure.
6797 EmitAtomicUpdate(LVal: X, AO, UpdateOp: CommonGen, IsVolatile: X.getType().isVolatileQualified());
6798 }
6799 }
6800 return Res;
6801}
6802
6803static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF,
6804 llvm::AtomicOrdering AO, const Expr *X,
6805 const Expr *E, const Expr *UE,
6806 bool IsXLHSInRHSPart, SourceLocation Loc) {
6807 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6808 "Update expr in 'atomic update' must be a binary operator.");
6809 const auto *BOUE = cast<BinaryOperator>(Val: UE->IgnoreImpCasts());
6810 // Update expressions are allowed to have the following forms:
6811 // x binop= expr; -> xrval + expr;
6812 // x++, ++x -> xrval + 1;
6813 // x--, --x -> xrval - 1;
6814 // x = x binop expr; -> xrval binop expr
6815 // x = expr Op x; - > expr binop xrval;
6816 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
6817 LValue XLValue = CGF.EmitLValue(E: X);
6818 RValue ExprRValue = CGF.EmitAnyExpr(E);
6819 const auto *LHS = cast<OpaqueValueExpr>(Val: BOUE->getLHS()->IgnoreImpCasts());
6820 const auto *RHS = cast<OpaqueValueExpr>(Val: BOUE->getRHS()->IgnoreImpCasts());
6821 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6822 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6823 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
6824 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6825 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6826 return CGF.EmitAnyExpr(E: UE);
6827 };
6828 (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
6829 X: XLValue, E: ExprRValue, BO: BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, CommonGen: Gen);
6830 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6831 // OpenMP, 2.17.7, atomic Construct
6832 // If the write, update, or capture clause is specified and the release,
6833 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6834 // the atomic operation is also a release flush.
6835 switch (AO) {
6836 case llvm::AtomicOrdering::Release:
6837 case llvm::AtomicOrdering::AcquireRelease:
6838 case llvm::AtomicOrdering::SequentiallyConsistent:
6839 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
6840 AO: llvm::AtomicOrdering::Release);
6841 break;
6842 case llvm::AtomicOrdering::Acquire:
6843 case llvm::AtomicOrdering::Monotonic:
6844 break;
6845 case llvm::AtomicOrdering::NotAtomic:
6846 case llvm::AtomicOrdering::Unordered:
6847 llvm_unreachable("Unexpected ordering.");
6848 }
6849}
6850
6851static RValue convertToType(CodeGenFunction &CGF, RValue Value,
6852 QualType SourceType, QualType ResType,
6853 SourceLocation Loc) {
6854 switch (CGF.getEvaluationKind(T: ResType)) {
6855 case TEK_Scalar:
6856 return RValue::get(
6857 V: convertToScalarValue(CGF, Val: Value, SrcType: SourceType, DestType: ResType, Loc));
6858 case TEK_Complex: {
6859 auto Res = convertToComplexValue(CGF, Val: Value, SrcType: SourceType, DestType: ResType, Loc);
6860 return RValue::getComplex(V1: Res.first, V2: Res.second);
6861 }
6862 case TEK_Aggregate:
6863 break;
6864 }
6865 llvm_unreachable("Must be a scalar or complex.");
6866}
6867
6868static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
6869 llvm::AtomicOrdering AO,
6870 bool IsPostfixUpdate, const Expr *V,
6871 const Expr *X, const Expr *E,
6872 const Expr *UE, bool IsXLHSInRHSPart,
6873 SourceLocation Loc) {
6874 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
6875 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
6876 RValue NewVVal;
6877 LValue VLValue = CGF.EmitLValue(E: V);
6878 LValue XLValue = CGF.EmitLValue(E: X);
6879 RValue ExprRValue = CGF.EmitAnyExpr(E);
6880 QualType NewVValType;
6881 if (UE) {
6882 // 'x' is updated with some additional value.
6883 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6884 "Update expr in 'atomic capture' must be a binary operator.");
6885 const auto *BOUE = cast<BinaryOperator>(Val: UE->IgnoreImpCasts());
6886 // Update expressions are allowed to have the following forms:
6887 // x binop= expr; -> xrval + expr;
6888 // x++, ++x -> xrval + 1;
6889 // x--, --x -> xrval - 1;
6890 // x = x binop expr; -> xrval binop expr
6891 // x = expr Op x; - > expr binop xrval;
6892 const auto *LHS = cast<OpaqueValueExpr>(Val: BOUE->getLHS()->IgnoreImpCasts());
6893 const auto *RHS = cast<OpaqueValueExpr>(Val: BOUE->getRHS()->IgnoreImpCasts());
6894 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6895 NewVValType = XRValExpr->getType();
6896 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6897 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
6898 IsPostfixUpdate](RValue XRValue) {
6899 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6900 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6901 RValue Res = CGF.EmitAnyExpr(E: UE);
6902 NewVVal = IsPostfixUpdate ? XRValue : Res;
6903 return Res;
6904 };
6905 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6906 X: XLValue, E: ExprRValue, BO: BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, CommonGen: Gen);
6907 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6908 if (Res.first) {
6909 // 'atomicrmw' instruction was generated.
6910 if (IsPostfixUpdate) {
6911 // Use old value from 'atomicrmw'.
6912 NewVVal = Res.second;
6913 } else {
6914 // 'atomicrmw' does not provide new value, so evaluate it using old
6915 // value of 'x'.
6916 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6917 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
6918 NewVVal = CGF.EmitAnyExpr(E: UE);
6919 }
6920 }
6921 } else {
6922 // 'x' is simply rewritten with some 'expr'.
6923 NewVValType = X->getType().getNonReferenceType();
6924 ExprRValue = convertToType(CGF, Value: ExprRValue, SourceType: E->getType(),
6925 ResType: X->getType().getNonReferenceType(), Loc);
6926 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
6927 NewVVal = XRValue;
6928 return ExprRValue;
6929 };
6930 // Try to perform atomicrmw xchg, otherwise simple exchange.
6931 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6932 X: XLValue, E: ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
6933 Loc, CommonGen: Gen);
6934 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6935 if (Res.first) {
6936 // 'atomicrmw' instruction was generated.
6937 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
6938 }
6939 }
6940 // Emit post-update store to 'v' of old/new 'x' value.
6941 CGF.emitOMPSimpleStore(LVal: VLValue, RVal: NewVVal, RValTy: NewVValType, Loc);
6942 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: V);
6943 // OpenMP 5.1 removes the required flush for capture clause.
6944 if (CGF.CGM.getLangOpts().OpenMP < 51) {
6945 // OpenMP, 2.17.7, atomic Construct
6946 // If the write, update, or capture clause is specified and the release,
6947 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6948 // the atomic operation is also a release flush.
6949 // If the read or capture clause is specified and the acquire, acq_rel, or
6950 // seq_cst clause is specified then the strong flush on exit from the atomic
6951 // operation is also an acquire flush.
6952 switch (AO) {
6953 case llvm::AtomicOrdering::Release:
6954 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
6955 AO: llvm::AtomicOrdering::Release);
6956 break;
6957 case llvm::AtomicOrdering::Acquire:
6958 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
6959 AO: llvm::AtomicOrdering::Acquire);
6960 break;
6961 case llvm::AtomicOrdering::AcquireRelease:
6962 case llvm::AtomicOrdering::SequentiallyConsistent:
6963 CGF.CGM.getOpenMPRuntime().emitFlush(
6964 CGF, Vars: {}, Loc, AO: llvm::AtomicOrdering::AcquireRelease);
6965 break;
6966 case llvm::AtomicOrdering::Monotonic:
6967 break;
6968 case llvm::AtomicOrdering::NotAtomic:
6969 case llvm::AtomicOrdering::Unordered:
6970 llvm_unreachable("Unexpected ordering.");
6971 }
6972 }
6973}
6974
6975static void emitOMPAtomicCompareExpr(
6976 CodeGenFunction &CGF, llvm::AtomicOrdering AO, llvm::AtomicOrdering FailAO,
6977 const Expr *X, const Expr *V, const Expr *R, const Expr *E, const Expr *D,
6978 const Expr *CE, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly,
6979 SourceLocation Loc) {
6980 llvm::OpenMPIRBuilder &OMPBuilder =
6981 CGF.CGM.getOpenMPRuntime().getOMPBuilder();
6982
6983 OMPAtomicCompareOp Op;
6984 assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator");
6985 switch (cast<BinaryOperator>(Val: CE)->getOpcode()) {
6986 case BO_EQ:
6987 Op = OMPAtomicCompareOp::EQ;
6988 break;
6989 case BO_LT:
6990 Op = OMPAtomicCompareOp::MIN;
6991 break;
6992 case BO_GT:
6993 Op = OMPAtomicCompareOp::MAX;
6994 break;
6995 default:
6996 llvm_unreachable("unsupported atomic compare binary operator");
6997 }
6998
6999 LValue XLVal = CGF.EmitLValue(E: X);
7000 Address XAddr = XLVal.getAddress();
7001
7002 auto EmitRValueWithCastIfNeeded = [&CGF, Loc](const Expr *X, const Expr *E) {
7003 if (X->getType() == E->getType())
7004 return CGF.EmitScalarExpr(E);
7005 const Expr *NewE = E->IgnoreImplicitAsWritten();
7006 llvm::Value *V = CGF.EmitScalarExpr(E: NewE);
7007 if (NewE->getType() == X->getType())
7008 return V;
7009 return CGF.EmitScalarConversion(Src: V, SrcTy: NewE->getType(), DstTy: X->getType(), Loc);
7010 };
7011
7012 llvm::Value *EVal = EmitRValueWithCastIfNeeded(X, E);
7013 llvm::Value *DVal = D ? EmitRValueWithCastIfNeeded(X, D) : nullptr;
7014 if (auto *CI = dyn_cast<llvm::ConstantInt>(Val: EVal))
7015 EVal = CGF.Builder.CreateIntCast(
7016 V: CI, DestTy: XLVal.getAddress().getElementType(),
7017 isSigned: E->getType()->hasSignedIntegerRepresentation());
7018 if (DVal)
7019 if (auto *CI = dyn_cast<llvm::ConstantInt>(Val: DVal))
7020 DVal = CGF.Builder.CreateIntCast(
7021 V: CI, DestTy: XLVal.getAddress().getElementType(),
7022 isSigned: D->getType()->hasSignedIntegerRepresentation());
7023
7024 llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{
7025 .Var: XAddr.emitRawPointer(CGF), .ElemTy: XAddr.getElementType(),
7026 .IsSigned: X->getType()->hasSignedIntegerRepresentation(),
7027 .IsVolatile: X->getType().isVolatileQualified()};
7028 llvm::OpenMPIRBuilder::AtomicOpValue VOpVal, ROpVal;
7029 if (V) {
7030 LValue LV = CGF.EmitLValue(E: V);
7031 Address Addr = LV.getAddress();
7032 VOpVal = {.Var: Addr.emitRawPointer(CGF), .ElemTy: Addr.getElementType(),
7033 .IsSigned: V->getType()->hasSignedIntegerRepresentation(),
7034 .IsVolatile: V->getType().isVolatileQualified()};
7035 }
7036 if (R) {
7037 LValue LV = CGF.EmitLValue(E: R);
7038 Address Addr = LV.getAddress();
7039 ROpVal = {.Var: Addr.emitRawPointer(CGF), .ElemTy: Addr.getElementType(),
7040 .IsSigned: R->getType()->hasSignedIntegerRepresentation(),
7041 .IsVolatile: R->getType().isVolatileQualified()};
7042 }
7043
7044 if (FailAO == llvm::AtomicOrdering::NotAtomic) {
7045 // fail clause was not mentioned on the
7046 // "#pragma omp atomic compare" construct.
7047 CGF.Builder.restoreIP(IP: OMPBuilder.createAtomicCompare(
7048 Loc: CGF.Builder, X&: XOpVal, V&: VOpVal, R&: ROpVal, E: EVal, D: DVal, AO, Op, IsXBinopExpr,
7049 IsPostfixUpdate, IsFailOnly));
7050 } else
7051 CGF.Builder.restoreIP(IP: OMPBuilder.createAtomicCompare(
7052 Loc: CGF.Builder, X&: XOpVal, V&: VOpVal, R&: ROpVal, E: EVal, D: DVal, AO, Op, IsXBinopExpr,
7053 IsPostfixUpdate, IsFailOnly, Failure: FailAO));
7054}
7055
7056static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
7057 llvm::AtomicOrdering AO,
7058 llvm::AtomicOrdering FailAO, bool IsPostfixUpdate,
7059 const Expr *X, const Expr *V, const Expr *R,
7060 const Expr *E, const Expr *UE, const Expr *D,
7061 const Expr *CE, bool IsXLHSInRHSPart,
7062 bool IsFailOnly, SourceLocation Loc) {
7063 switch (Kind) {
7064 case OMPC_read:
7065 emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);
7066 break;
7067 case OMPC_write:
7068 emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);
7069 break;
7070 case OMPC_unknown:
7071 case OMPC_update:
7072 emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);
7073 break;
7074 case OMPC_capture:
7075 emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,
7076 IsXLHSInRHSPart, Loc);
7077 break;
7078 case OMPC_compare: {
7079 emitOMPAtomicCompareExpr(CGF, AO, FailAO, X, V, R, E, D, CE,
7080 IsXBinopExpr: IsXLHSInRHSPart, IsPostfixUpdate, IsFailOnly, Loc);
7081 break;
7082 }
7083 default:
7084 llvm_unreachable("Clause is not allowed in 'omp atomic'.");
7085 }
7086}
7087
7088void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
7089 llvm::AtomicOrdering AO = CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
7090 // Fail Memory Clause Ordering.
7091 llvm::AtomicOrdering FailAO = llvm::AtomicOrdering::NotAtomic;
7092 bool MemOrderingSpecified = false;
7093 if (S.getSingleClause<OMPSeqCstClause>()) {
7094 AO = llvm::AtomicOrdering::SequentiallyConsistent;
7095 MemOrderingSpecified = true;
7096 } else if (S.getSingleClause<OMPAcqRelClause>()) {
7097 AO = llvm::AtomicOrdering::AcquireRelease;
7098 MemOrderingSpecified = true;
7099 } else if (S.getSingleClause<OMPAcquireClause>()) {
7100 AO = llvm::AtomicOrdering::Acquire;
7101 MemOrderingSpecified = true;
7102 } else if (S.getSingleClause<OMPReleaseClause>()) {
7103 AO = llvm::AtomicOrdering::Release;
7104 MemOrderingSpecified = true;
7105 } else if (S.getSingleClause<OMPRelaxedClause>()) {
7106 AO = llvm::AtomicOrdering::Monotonic;
7107 MemOrderingSpecified = true;
7108 }
7109 llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered;
7110 OpenMPClauseKind Kind = OMPC_unknown;
7111 for (const OMPClause *C : S.clauses()) {
7112 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
7113 // if it is first).
7114 OpenMPClauseKind K = C->getClauseKind();
7115 // TBD
7116 if (K == OMPC_weak)
7117 return;
7118 if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire ||
7119 K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint)
7120 continue;
7121 Kind = K;
7122 KindsEncountered.insert(V: K);
7123 }
7124 // We just need to correct Kind here. No need to set a bool saying it is
7125 // actually compare capture because we can tell from whether V and R are
7126 // nullptr.
7127 if (KindsEncountered.contains(V: OMPC_compare) &&
7128 KindsEncountered.contains(V: OMPC_capture))
7129 Kind = OMPC_compare;
7130 if (!MemOrderingSpecified) {
7131 llvm::AtomicOrdering DefaultOrder =
7132 CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
7133 if (DefaultOrder == llvm::AtomicOrdering::Monotonic ||
7134 DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent ||
7135 (DefaultOrder == llvm::AtomicOrdering::AcquireRelease &&
7136 Kind == OMPC_capture)) {
7137 AO = DefaultOrder;
7138 } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) {
7139 if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) {
7140 AO = llvm::AtomicOrdering::Release;
7141 } else if (Kind == OMPC_read) {
7142 assert(Kind == OMPC_read && "Unexpected atomic kind.");
7143 AO = llvm::AtomicOrdering::Acquire;
7144 }
7145 }
7146 }
7147
7148 if (KindsEncountered.contains(V: OMPC_compare) &&
7149 KindsEncountered.contains(V: OMPC_fail)) {
7150 Kind = OMPC_compare;
7151 const auto *FailClause = S.getSingleClause<OMPFailClause>();
7152 if (FailClause) {
7153 OpenMPClauseKind FailParameter = FailClause->getFailParameter();
7154 if (FailParameter == llvm::omp::OMPC_relaxed)
7155 FailAO = llvm::AtomicOrdering::Monotonic;
7156 else if (FailParameter == llvm::omp::OMPC_acquire)
7157 FailAO = llvm::AtomicOrdering::Acquire;
7158 else if (FailParameter == llvm::omp::OMPC_seq_cst)
7159 FailAO = llvm::AtomicOrdering::SequentiallyConsistent;
7160 }
7161 }
7162
7163 LexicalScope Scope(*this, S.getSourceRange());
7164 EmitStopPoint(S: S.getAssociatedStmt());
7165 emitOMPAtomicExpr(CGF&: *this, Kind, AO, FailAO, IsPostfixUpdate: S.isPostfixUpdate(), X: S.getX(),
7166 V: S.getV(), R: S.getR(), E: S.getExpr(), UE: S.getUpdateExpr(),
7167 D: S.getD(), CE: S.getCondExpr(), IsXLHSInRHSPart: S.isXLHSInRHSPart(),
7168 IsFailOnly: S.isFailOnly(), Loc: S.getBeginLoc());
7169}
7170
7171static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
7172 const OMPExecutableDirective &S,
7173 const RegionCodeGenTy &CodeGen) {
7174 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
7175 CodeGenModule &CGM = CGF.CGM;
7176
7177 // On device emit this construct as inlined code.
7178 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
7179 OMPLexicalScope Scope(CGF, S, OMPD_target);
7180 CGM.getOpenMPRuntime().emitInlinedDirective(
7181 CGF, InnermostKind: OMPD_target, CodeGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7182 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
7183 });
7184 return;
7185 }
7186
7187 auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
7188 llvm::Function *Fn = nullptr;
7189 llvm::Constant *FnID = nullptr;
7190
7191 const Expr *IfCond = nullptr;
7192 // Check for the at most one if clause associated with the target region.
7193 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7194 if (C->getNameModifier() == OMPD_unknown ||
7195 C->getNameModifier() == OMPD_target) {
7196 IfCond = C->getCondition();
7197 break;
7198 }
7199 }
7200
7201 // Check if we have any device clause associated with the directive.
7202 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device(
7203 nullptr, OMPC_DEVICE_unknown);
7204 if (auto *C = S.getSingleClause<OMPDeviceClause>())
7205 Device.setPointerAndInt(PtrVal: C->getDevice(), IntVal: C->getModifier());
7206
7207 // Check if we have an if clause whose conditional always evaluates to false
7208 // or if we do not have any targets specified. If so the target region is not
7209 // an offload entry point.
7210 bool IsOffloadEntry = true;
7211 if (IfCond) {
7212 bool Val;
7213 if (CGF.ConstantFoldsToSimpleInteger(Cond: IfCond, Result&: Val) && !Val)
7214 IsOffloadEntry = false;
7215 }
7216 if (CGM.getLangOpts().OMPTargetTriples.empty())
7217 IsOffloadEntry = false;
7218
7219 if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) {
7220 CGM.getDiags().Report(DiagID: diag::err_missing_mandatory_offloading);
7221 }
7222
7223 assert(CGF.CurFuncDecl && "No parent declaration for target region!");
7224 StringRef ParentName;
7225 // In case we have Ctors/Dtors we use the complete type variant to produce
7226 // the mangling of the device outlined kernel.
7227 if (const auto *D = dyn_cast<CXXConstructorDecl>(Val: CGF.CurFuncDecl))
7228 ParentName = CGM.getMangledName(GD: GlobalDecl(D, Ctor_Complete));
7229 else if (const auto *D = dyn_cast<CXXDestructorDecl>(Val: CGF.CurFuncDecl))
7230 ParentName = CGM.getMangledName(GD: GlobalDecl(D, Dtor_Complete));
7231 else
7232 ParentName =
7233 CGM.getMangledName(GD: GlobalDecl(cast<FunctionDecl>(Val: CGF.CurFuncDecl)));
7234
7235 // Emit target region as a standalone region.
7236 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: FnID,
7237 IsOffloadEntry, CodeGen);
7238 OMPLexicalScope Scope(CGF, S, OMPD_task);
7239 auto &&SizeEmitter =
7240 [IsOffloadEntry](CodeGenFunction &CGF,
7241 const OMPLoopDirective &D) -> llvm::Value * {
7242 if (IsOffloadEntry) {
7243 OMPLoopScope(CGF, D);
7244 // Emit calculation of the iterations count.
7245 llvm::Value *NumIterations = CGF.EmitScalarExpr(E: D.getNumIterations());
7246 NumIterations = CGF.Builder.CreateIntCast(V: NumIterations, DestTy: CGF.Int64Ty,
7247 /*isSigned=*/false);
7248 return NumIterations;
7249 }
7250 return nullptr;
7251 };
7252 CGM.getOpenMPRuntime().emitTargetCall(CGF, D: S, OutlinedFn: Fn, OutlinedFnID: FnID, IfCond, Device,
7253 SizeEmitter);
7254}
7255
7256static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
7257 PrePostActionTy &Action) {
7258 Action.Enter(CGF);
7259 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7260 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
7261 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
7262 (void)PrivateScope.Privatize();
7263 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
7264 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
7265
7266 CGF.EmitStmt(S: S.getCapturedStmt(RegionKind: OMPD_target)->getCapturedStmt());
7267 CGF.EnsureInsertPoint();
7268}
7269
7270void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
7271 StringRef ParentName,
7272 const OMPTargetDirective &S) {
7273 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7274 emitTargetRegion(CGF, S, Action);
7275 };
7276 llvm::Function *Fn;
7277 llvm::Constant *Addr;
7278 // Emit target region as a standalone region.
7279 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7280 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7281 assert(Fn && Addr && "Target device function emission failed.");
7282}
7283
7284void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
7285 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7286 emitTargetRegion(CGF, S, Action);
7287 };
7288 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7289}
7290
7291static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
7292 const OMPExecutableDirective &S,
7293 OpenMPDirectiveKind InnermostKind,
7294 const RegionCodeGenTy &CodeGen) {
7295 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_teams);
7296 llvm::Function *OutlinedFn =
7297 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
7298 CGF, D: S, ThreadIDVar: *CS->getCapturedDecl()->param_begin(), InnermostKind,
7299 CodeGen);
7300
7301 const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
7302 const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
7303 if (NT || TL) {
7304 const Expr *NumTeams = NT ? NT->getNumTeams().front() : nullptr;
7305 const Expr *ThreadLimit = TL ? TL->getThreadLimit().front() : nullptr;
7306
7307 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
7308 Loc: S.getBeginLoc());
7309 }
7310
7311 OMPTeamsScope Scope(CGF, S);
7312 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
7313 CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
7314 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, D: S, Loc: S.getBeginLoc(), OutlinedFn,
7315 CapturedVars);
7316}
7317
7318void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
7319 // Emit teams region as a standalone region.
7320 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7321 Action.Enter(CGF);
7322 OMPPrivateScope PrivateScope(CGF);
7323 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
7324 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
7325 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7326 (void)PrivateScope.Privatize();
7327 CGF.EmitStmt(S: S.getCapturedStmt(RegionKind: OMPD_teams)->getCapturedStmt());
7328 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7329 };
7330 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute, CodeGen);
7331 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7332 CondGen: [](CodeGenFunction &) { return nullptr; });
7333}
7334
7335static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
7336 const OMPTargetTeamsDirective &S) {
7337 auto *CS = S.getCapturedStmt(RegionKind: OMPD_teams);
7338 Action.Enter(CGF);
7339 // Emit teams region as a standalone region.
7340 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
7341 Action.Enter(CGF);
7342 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7343 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
7344 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
7345 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7346 (void)PrivateScope.Privatize();
7347 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
7348 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
7349 CGF.EmitStmt(S: CS->getCapturedStmt());
7350 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7351 };
7352 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_teams, CodeGen);
7353 emitPostUpdateForReductionClause(CGF, D: S,
7354 CondGen: [](CodeGenFunction &) { return nullptr; });
7355}
7356
7357void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
7358 CodeGenModule &CGM, StringRef ParentName,
7359 const OMPTargetTeamsDirective &S) {
7360 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7361 emitTargetTeamsRegion(CGF, Action, S);
7362 };
7363 llvm::Function *Fn;
7364 llvm::Constant *Addr;
7365 // Emit target region as a standalone region.
7366 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7367 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7368 assert(Fn && Addr && "Target device function emission failed.");
7369}
7370
7371void CodeGenFunction::EmitOMPTargetTeamsDirective(
7372 const OMPTargetTeamsDirective &S) {
7373 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7374 emitTargetTeamsRegion(CGF, Action, S);
7375 };
7376 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7377}
7378
7379static void
7380emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
7381 const OMPTargetTeamsDistributeDirective &S) {
7382 Action.Enter(CGF);
7383 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7384 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
7385 };
7386
7387 // Emit teams region as a standalone region.
7388 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7389 PrePostActionTy &Action) {
7390 Action.Enter(CGF);
7391 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7392 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7393 (void)PrivateScope.Privatize();
7394 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
7395 CodeGen: CodeGenDistribute);
7396 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7397 };
7398 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute, CodeGen);
7399 emitPostUpdateForReductionClause(CGF, D: S,
7400 CondGen: [](CodeGenFunction &) { return nullptr; });
7401}
7402
7403void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
7404 CodeGenModule &CGM, StringRef ParentName,
7405 const OMPTargetTeamsDistributeDirective &S) {
7406 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7407 emitTargetTeamsDistributeRegion(CGF, Action, S);
7408 };
7409 llvm::Function *Fn;
7410 llvm::Constant *Addr;
7411 // Emit target region as a standalone region.
7412 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7413 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7414 assert(Fn && Addr && "Target device function emission failed.");
7415}
7416
7417void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
7418 const OMPTargetTeamsDistributeDirective &S) {
7419 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7420 emitTargetTeamsDistributeRegion(CGF, Action, S);
7421 };
7422 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7423}
7424
7425static void emitTargetTeamsDistributeSimdRegion(
7426 CodeGenFunction &CGF, PrePostActionTy &Action,
7427 const OMPTargetTeamsDistributeSimdDirective &S) {
7428 Action.Enter(CGF);
7429 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7430 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
7431 };
7432
7433 // Emit teams region as a standalone region.
7434 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7435 PrePostActionTy &Action) {
7436 Action.Enter(CGF);
7437 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7438 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7439 (void)PrivateScope.Privatize();
7440 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
7441 CodeGen: CodeGenDistribute);
7442 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7443 };
7444 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_simd, CodeGen);
7445 emitPostUpdateForReductionClause(CGF, D: S,
7446 CondGen: [](CodeGenFunction &) { return nullptr; });
7447}
7448
7449void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
7450 CodeGenModule &CGM, StringRef ParentName,
7451 const OMPTargetTeamsDistributeSimdDirective &S) {
7452 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7453 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
7454 };
7455 llvm::Function *Fn;
7456 llvm::Constant *Addr;
7457 // Emit target region as a standalone region.
7458 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7459 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7460 assert(Fn && Addr && "Target device function emission failed.");
7461}
7462
7463void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
7464 const OMPTargetTeamsDistributeSimdDirective &S) {
7465 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7466 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
7467 };
7468 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7469}
7470
7471void CodeGenFunction::EmitOMPTeamsDistributeDirective(
7472 const OMPTeamsDistributeDirective &S) {
7473
7474 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7475 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
7476 };
7477
7478 // Emit teams region as a standalone region.
7479 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7480 PrePostActionTy &Action) {
7481 Action.Enter(CGF);
7482 OMPPrivateScope PrivateScope(CGF);
7483 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7484 (void)PrivateScope.Privatize();
7485 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
7486 CodeGen: CodeGenDistribute);
7487 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7488 };
7489 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute, CodeGen);
7490 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7491 CondGen: [](CodeGenFunction &) { return nullptr; });
7492}
7493
7494void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
7495 const OMPTeamsDistributeSimdDirective &S) {
7496 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7497 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
7498 };
7499
7500 // Emit teams region as a standalone region.
7501 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7502 PrePostActionTy &Action) {
7503 Action.Enter(CGF);
7504 OMPPrivateScope PrivateScope(CGF);
7505 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7506 (void)PrivateScope.Privatize();
7507 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_simd,
7508 CodeGen: CodeGenDistribute);
7509 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7510 };
7511 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute_simd, CodeGen);
7512 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7513 CondGen: [](CodeGenFunction &) { return nullptr; });
7514}
7515
7516void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
7517 const OMPTeamsDistributeParallelForDirective &S) {
7518 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7519 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7520 IncExpr: S.getDistInc());
7521 };
7522
7523 // Emit teams region as a standalone region.
7524 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7525 PrePostActionTy &Action) {
7526 Action.Enter(CGF);
7527 OMPPrivateScope PrivateScope(CGF);
7528 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7529 (void)PrivateScope.Privatize();
7530 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
7531 CodeGen: CodeGenDistribute);
7532 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7533 };
7534 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute_parallel_for, CodeGen);
7535 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7536 CondGen: [](CodeGenFunction &) { return nullptr; });
7537}
7538
7539void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
7540 const OMPTeamsDistributeParallelForSimdDirective &S) {
7541 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7542 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7543 IncExpr: S.getDistInc());
7544 };
7545
7546 // Emit teams region as a standalone region.
7547 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7548 PrePostActionTy &Action) {
7549 Action.Enter(CGF);
7550 OMPPrivateScope PrivateScope(CGF);
7551 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7552 (void)PrivateScope.Privatize();
7553 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7554 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
7555 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7556 };
7557 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute_parallel_for_simd,
7558 CodeGen);
7559 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7560 CondGen: [](CodeGenFunction &) { return nullptr; });
7561}
7562
7563void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) {
7564 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
7565 llvm::Value *Device = nullptr;
7566 llvm::Value *NumDependences = nullptr;
7567 llvm::Value *DependenceList = nullptr;
7568
7569 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7570 Device = EmitScalarExpr(E: C->getDevice());
7571
7572 // Build list and emit dependences
7573 OMPTaskDataTy Data;
7574 buildDependences(S, Data);
7575 if (!Data.Dependences.empty()) {
7576 Address DependenciesArray = Address::invalid();
7577 std::tie(args&: NumDependences, args&: DependenciesArray) =
7578 CGM.getOpenMPRuntime().emitDependClause(CGF&: *this, Dependencies: Data.Dependences,
7579 Loc: S.getBeginLoc());
7580 DependenceList = DependenciesArray.emitRawPointer(CGF&: *this);
7581 }
7582 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
7583
7584 assert(!(Data.HasNowaitClause && !(S.getSingleClause<OMPInitClause>() ||
7585 S.getSingleClause<OMPDestroyClause>() ||
7586 S.getSingleClause<OMPUseClause>())) &&
7587 "OMPNowaitClause clause is used separately in OMPInteropDirective.");
7588
7589 auto ItOMPInitClause = S.getClausesOfKind<OMPInitClause>();
7590 if (!ItOMPInitClause.empty()) {
7591 // Look at the multiple init clauses
7592 for (const OMPInitClause *C : ItOMPInitClause) {
7593 llvm::Value *InteropvarPtr =
7594 EmitLValue(E: C->getInteropVar()).getPointer(CGF&: *this);
7595 llvm::omp::OMPInteropType InteropType =
7596 llvm::omp::OMPInteropType::Unknown;
7597 if (C->getIsTarget()) {
7598 InteropType = llvm::omp::OMPInteropType::Target;
7599 } else {
7600 assert(C->getIsTargetSync() &&
7601 "Expected interop-type target/targetsync");
7602 InteropType = llvm::omp::OMPInteropType::TargetSync;
7603 }
7604 OMPBuilder.createOMPInteropInit(Loc: Builder, InteropVar: InteropvarPtr, InteropType,
7605 Device, NumDependences, DependenceAddress: DependenceList,
7606 HaveNowaitClause: Data.HasNowaitClause);
7607 }
7608 }
7609 auto ItOMPDestroyClause = S.getClausesOfKind<OMPDestroyClause>();
7610 if (!ItOMPDestroyClause.empty()) {
7611 // Look at the multiple destroy clauses
7612 for (const OMPDestroyClause *C : ItOMPDestroyClause) {
7613 llvm::Value *InteropvarPtr =
7614 EmitLValue(E: C->getInteropVar()).getPointer(CGF&: *this);
7615 OMPBuilder.createOMPInteropDestroy(Loc: Builder, InteropVar: InteropvarPtr, Device,
7616 NumDependences, DependenceAddress: DependenceList,
7617 HaveNowaitClause: Data.HasNowaitClause);
7618 }
7619 }
7620 auto ItOMPUseClause = S.getClausesOfKind<OMPUseClause>();
7621 if (!ItOMPUseClause.empty()) {
7622 // Look at the multiple use clauses
7623 for (const OMPUseClause *C : ItOMPUseClause) {
7624 llvm::Value *InteropvarPtr =
7625 EmitLValue(E: C->getInteropVar()).getPointer(CGF&: *this);
7626 OMPBuilder.createOMPInteropUse(Loc: Builder, InteropVar: InteropvarPtr, Device,
7627 NumDependences, DependenceAddress: DependenceList,
7628 HaveNowaitClause: Data.HasNowaitClause);
7629 }
7630 }
7631}
7632
7633static void emitTargetTeamsDistributeParallelForRegion(
7634 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
7635 PrePostActionTy &Action) {
7636 Action.Enter(CGF);
7637 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7638 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7639 IncExpr: S.getDistInc());
7640 };
7641
7642 // Emit teams region as a standalone region.
7643 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7644 PrePostActionTy &Action) {
7645 Action.Enter(CGF);
7646 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7647 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7648 (void)PrivateScope.Privatize();
7649 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7650 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
7651 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7652 };
7653
7654 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_parallel_for,
7655 CodeGen: CodeGenTeams);
7656 emitPostUpdateForReductionClause(CGF, D: S,
7657 CondGen: [](CodeGenFunction &) { return nullptr; });
7658}
7659
7660void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
7661 CodeGenModule &CGM, StringRef ParentName,
7662 const OMPTargetTeamsDistributeParallelForDirective &S) {
7663 // Emit SPMD target teams distribute parallel for region as a standalone
7664 // region.
7665 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7666 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7667 };
7668 llvm::Function *Fn;
7669 llvm::Constant *Addr;
7670 // Emit target region as a standalone region.
7671 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7672 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7673 assert(Fn && Addr && "Target device function emission failed.");
7674}
7675
7676void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
7677 const OMPTargetTeamsDistributeParallelForDirective &S) {
7678 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7679 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7680 };
7681 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7682}
7683
7684static void emitTargetTeamsDistributeParallelForSimdRegion(
7685 CodeGenFunction &CGF,
7686 const OMPTargetTeamsDistributeParallelForSimdDirective &S,
7687 PrePostActionTy &Action) {
7688 Action.Enter(CGF);
7689 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7690 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7691 IncExpr: S.getDistInc());
7692 };
7693
7694 // Emit teams region as a standalone region.
7695 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7696 PrePostActionTy &Action) {
7697 Action.Enter(CGF);
7698 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7699 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7700 (void)PrivateScope.Privatize();
7701 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7702 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
7703 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7704 };
7705
7706 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_parallel_for_simd,
7707 CodeGen: CodeGenTeams);
7708 emitPostUpdateForReductionClause(CGF, D: S,
7709 CondGen: [](CodeGenFunction &) { return nullptr; });
7710}
7711
7712void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
7713 CodeGenModule &CGM, StringRef ParentName,
7714 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7715 // Emit SPMD target teams distribute parallel for simd region as a standalone
7716 // region.
7717 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7718 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7719 };
7720 llvm::Function *Fn;
7721 llvm::Constant *Addr;
7722 // Emit target region as a standalone region.
7723 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7724 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7725 assert(Fn && Addr && "Target device function emission failed.");
7726}
7727
7728void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
7729 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7730 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7731 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7732 };
7733 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7734}
7735
7736void CodeGenFunction::EmitOMPCancellationPointDirective(
7737 const OMPCancellationPointDirective &S) {
7738 CGM.getOpenMPRuntime().emitCancellationPointCall(CGF&: *this, Loc: S.getBeginLoc(),
7739 CancelRegion: S.getCancelRegion());
7740}
7741
7742void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
7743 const Expr *IfCond = nullptr;
7744 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7745 if (C->getNameModifier() == OMPD_unknown ||
7746 C->getNameModifier() == OMPD_cancel) {
7747 IfCond = C->getCondition();
7748 break;
7749 }
7750 }
7751 if (CGM.getLangOpts().OpenMPIRBuilder) {
7752 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
7753 // TODO: This check is necessary as we only generate `omp parallel` through
7754 // the OpenMPIRBuilder for now.
7755 if (S.getCancelRegion() == OMPD_parallel ||
7756 S.getCancelRegion() == OMPD_sections ||
7757 S.getCancelRegion() == OMPD_section) {
7758 llvm::Value *IfCondition = nullptr;
7759 if (IfCond)
7760 IfCondition = EmitScalarExpr(E: IfCond,
7761 /*IgnoreResultAssign=*/true);
7762 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
7763 ValOrErr: OMPBuilder.createCancel(Loc: Builder, IfCondition, CanceledDirective: S.getCancelRegion()));
7764 return Builder.restoreIP(IP: AfterIP);
7765 }
7766 }
7767
7768 CGM.getOpenMPRuntime().emitCancelCall(CGF&: *this, Loc: S.getBeginLoc(), IfCond,
7769 CancelRegion: S.getCancelRegion());
7770}
7771
7772CodeGenFunction::JumpDest
7773CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
7774 if (Kind == OMPD_parallel || Kind == OMPD_task ||
7775 Kind == OMPD_target_parallel || Kind == OMPD_taskloop ||
7776 Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop)
7777 return ReturnBlock;
7778 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
7779 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
7780 Kind == OMPD_distribute_parallel_for ||
7781 Kind == OMPD_target_parallel_for ||
7782 Kind == OMPD_teams_distribute_parallel_for ||
7783 Kind == OMPD_target_teams_distribute_parallel_for);
7784 return OMPCancelStack.getExitBlock();
7785}
7786
7787void CodeGenFunction::EmitOMPUseDevicePtrClause(
7788 const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
7789 const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7790 CaptureDeviceAddrMap) {
7791 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7792 for (const Expr *OrigVarIt : C.varlist()) {
7793 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: OrigVarIt)->getDecl());
7794 if (!Processed.insert(V: OrigVD).second)
7795 continue;
7796
7797 // In order to identify the right initializer we need to match the
7798 // declaration used by the mapping logic. In some cases we may get
7799 // OMPCapturedExprDecl that refers to the original declaration.
7800 const ValueDecl *MatchingVD = OrigVD;
7801 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: MatchingVD)) {
7802 // OMPCapturedExprDecl are used to privative fields of the current
7803 // structure.
7804 const auto *ME = cast<MemberExpr>(Val: OED->getInit());
7805 assert(isa<CXXThisExpr>(ME->getBase()->IgnoreImpCasts()) &&
7806 "Base should be the current struct!");
7807 MatchingVD = ME->getMemberDecl();
7808 }
7809
7810 // If we don't have information about the current list item, move on to
7811 // the next one.
7812 auto InitAddrIt = CaptureDeviceAddrMap.find(Val: MatchingVD);
7813 if (InitAddrIt == CaptureDeviceAddrMap.end())
7814 continue;
7815
7816 llvm::Type *Ty = ConvertTypeForMem(T: OrigVD->getType().getNonReferenceType());
7817
7818 // Return the address of the private variable.
7819 bool IsRegistered = PrivateScope.addPrivate(
7820 LocalVD: OrigVD,
7821 Addr: Address(InitAddrIt->second, Ty,
7822 getContext().getTypeAlignInChars(T: getContext().VoidPtrTy)));
7823 assert(IsRegistered && "firstprivate var already registered as private");
7824 // Silence the warning about unused variable.
7825 (void)IsRegistered;
7826 }
7827}
7828
7829static const VarDecl *getBaseDecl(const Expr *Ref) {
7830 const Expr *Base = Ref->IgnoreParenImpCasts();
7831 while (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: Base))
7832 Base = OASE->getBase()->IgnoreParenImpCasts();
7833 while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
7834 Base = ASE->getBase()->IgnoreParenImpCasts();
7835 return cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Base)->getDecl());
7836}
7837
7838void CodeGenFunction::EmitOMPUseDeviceAddrClause(
7839 const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
7840 const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7841 CaptureDeviceAddrMap) {
7842 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7843 for (const Expr *Ref : C.varlist()) {
7844 const VarDecl *OrigVD = getBaseDecl(Ref);
7845 if (!Processed.insert(V: OrigVD).second)
7846 continue;
7847 // In order to identify the right initializer we need to match the
7848 // declaration used by the mapping logic. In some cases we may get
7849 // OMPCapturedExprDecl that refers to the original declaration.
7850 const ValueDecl *MatchingVD = OrigVD;
7851 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: MatchingVD)) {
7852 // OMPCapturedExprDecl are used to privative fields of the current
7853 // structure.
7854 const auto *ME = cast<MemberExpr>(Val: OED->getInit());
7855 assert(isa<CXXThisExpr>(ME->getBase()) &&
7856 "Base should be the current struct!");
7857 MatchingVD = ME->getMemberDecl();
7858 }
7859
7860 // If we don't have information about the current list item, move on to
7861 // the next one.
7862 auto InitAddrIt = CaptureDeviceAddrMap.find(Val: MatchingVD);
7863 if (InitAddrIt == CaptureDeviceAddrMap.end())
7864 continue;
7865
7866 llvm::Type *Ty = ConvertTypeForMem(T: OrigVD->getType().getNonReferenceType());
7867
7868 Address PrivAddr =
7869 Address(InitAddrIt->second, Ty,
7870 getContext().getTypeAlignInChars(T: getContext().VoidPtrTy));
7871 // For declrefs and variable length array need to load the pointer for
7872 // correct mapping, since the pointer to the data was passed to the runtime.
7873 if (isa<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts()) ||
7874 MatchingVD->getType()->isArrayType()) {
7875 QualType PtrTy = getContext().getPointerType(
7876 T: OrigVD->getType().getNonReferenceType());
7877 PrivAddr =
7878 EmitLoadOfPointer(Ptr: PrivAddr.withElementType(ElemTy: ConvertTypeForMem(T: PtrTy)),
7879 PtrTy: PtrTy->castAs<PointerType>());
7880 }
7881
7882 (void)PrivateScope.addPrivate(LocalVD: OrigVD, Addr: PrivAddr);
7883 }
7884}
7885
7886// Generate the instructions for '#pragma omp target data' directive.
7887void CodeGenFunction::EmitOMPTargetDataDirective(
7888 const OMPTargetDataDirective &S) {
7889 // Emit vtable only from host for target data directive.
7890 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
7891 CGM.getOpenMPRuntime().registerVTable(D: S);
7892
7893 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
7894 /*SeparateBeginEndCalls=*/true);
7895
7896 // Create a pre/post action to signal the privatization of the device pointer.
7897 // This action can be replaced by the OpenMP runtime code generation to
7898 // deactivate privatization.
7899 bool PrivatizeDevicePointers = false;
7900 class DevicePointerPrivActionTy : public PrePostActionTy {
7901 bool &PrivatizeDevicePointers;
7902
7903 public:
7904 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
7905 : PrivatizeDevicePointers(PrivatizeDevicePointers) {}
7906 void Enter(CodeGenFunction &CGF) override {
7907 PrivatizeDevicePointers = true;
7908 }
7909 };
7910 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
7911
7912 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7913 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7914 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
7915 };
7916
7917 // Codegen that selects whether to generate the privatization code or not.
7918 auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7919 RegionCodeGenTy RCG(InnermostCodeGen);
7920 PrivatizeDevicePointers = false;
7921
7922 // Call the pre-action to change the status of PrivatizeDevicePointers if
7923 // needed.
7924 Action.Enter(CGF);
7925
7926 if (PrivatizeDevicePointers) {
7927 OMPPrivateScope PrivateScope(CGF);
7928 // Emit all instances of the use_device_ptr clause.
7929 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7930 CGF.EmitOMPUseDevicePtrClause(C: *C, PrivateScope,
7931 CaptureDeviceAddrMap: Info.CaptureDeviceAddrMap);
7932 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7933 CGF.EmitOMPUseDeviceAddrClause(C: *C, PrivateScope,
7934 CaptureDeviceAddrMap: Info.CaptureDeviceAddrMap);
7935 (void)PrivateScope.Privatize();
7936 RCG(CGF);
7937 } else {
7938 // If we don't have target devices, don't bother emitting the data
7939 // mapping code.
7940 std::optional<OpenMPDirectiveKind> CaptureRegion;
7941 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7942 // Emit helper decls of the use_device_ptr/use_device_addr clauses.
7943 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7944 for (const Expr *E : C->varlist()) {
7945 const Decl *D = cast<DeclRefExpr>(Val: E)->getDecl();
7946 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D))
7947 CGF.EmitVarDecl(D: *OED);
7948 }
7949 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7950 for (const Expr *E : C->varlist()) {
7951 const Decl *D = getBaseDecl(Ref: E);
7952 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D))
7953 CGF.EmitVarDecl(D: *OED);
7954 }
7955 } else {
7956 CaptureRegion = OMPD_unknown;
7957 }
7958
7959 OMPLexicalScope Scope(CGF, S, CaptureRegion);
7960 RCG(CGF);
7961 }
7962 };
7963
7964 // Forward the provided action to the privatization codegen.
7965 RegionCodeGenTy PrivRCG(PrivCodeGen);
7966 PrivRCG.setAction(Action);
7967
7968 // Notwithstanding the body of the region is emitted as inlined directive,
7969 // we don't use an inline scope as changes in the references inside the
7970 // region are expected to be visible outside, so we do not privative them.
7971 OMPLexicalScope Scope(CGF, S);
7972 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_target_data,
7973 CodeGen: PrivRCG);
7974 };
7975
7976 RegionCodeGenTy RCG(CodeGen);
7977
7978 // If we don't have target devices, don't bother emitting the data mapping
7979 // code.
7980 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7981 RCG(*this);
7982 return;
7983 }
7984
7985 // Check if we have any if clause associated with the directive.
7986 const Expr *IfCond = nullptr;
7987 if (const auto *C = S.getSingleClause<OMPIfClause>())
7988 IfCond = C->getCondition();
7989
7990 // Check if we have any device clause associated with the directive.
7991 const Expr *Device = nullptr;
7992 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7993 Device = C->getDevice();
7994
7995 // Set the action to signal privatization of device pointers.
7996 RCG.setAction(PrivAction);
7997
7998 // Emit region code.
7999 CGM.getOpenMPRuntime().emitTargetDataCalls(CGF&: *this, D: S, IfCond, Device, CodeGen: RCG,
8000 Info);
8001}
8002
8003void CodeGenFunction::EmitOMPTargetEnterDataDirective(
8004 const OMPTargetEnterDataDirective &S) {
8005 // If we don't have target devices, don't bother emitting the data mapping
8006 // code.
8007 if (CGM.getLangOpts().OMPTargetTriples.empty())
8008 return;
8009
8010 // Check if we have any if clause associated with the directive.
8011 const Expr *IfCond = nullptr;
8012 if (const auto *C = S.getSingleClause<OMPIfClause>())
8013 IfCond = C->getCondition();
8014
8015 // Check if we have any device clause associated with the directive.
8016 const Expr *Device = nullptr;
8017 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
8018 Device = C->getDevice();
8019
8020 OMPLexicalScope Scope(*this, S, OMPD_task);
8021 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF&: *this, D: S, IfCond, Device);
8022}
8023
8024void CodeGenFunction::EmitOMPTargetExitDataDirective(
8025 const OMPTargetExitDataDirective &S) {
8026 // If we don't have target devices, don't bother emitting the data mapping
8027 // code.
8028 if (CGM.getLangOpts().OMPTargetTriples.empty())
8029 return;
8030
8031 // Check if we have any if clause associated with the directive.
8032 const Expr *IfCond = nullptr;
8033 if (const auto *C = S.getSingleClause<OMPIfClause>())
8034 IfCond = C->getCondition();
8035
8036 // Check if we have any device clause associated with the directive.
8037 const Expr *Device = nullptr;
8038 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
8039 Device = C->getDevice();
8040
8041 OMPLexicalScope Scope(*this, S, OMPD_task);
8042 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF&: *this, D: S, IfCond, Device);
8043}
8044
8045static void emitTargetParallelRegion(CodeGenFunction &CGF,
8046 const OMPTargetParallelDirective &S,
8047 PrePostActionTy &Action) {
8048 // Get the captured statement associated with the 'parallel' region.
8049 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_parallel);
8050 Action.Enter(CGF);
8051 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
8052 Action.Enter(CGF);
8053 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
8054 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
8055 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
8056 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
8057 (void)PrivateScope.Privatize();
8058 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
8059 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
8060 // TODO: Add support for clauses.
8061 CGF.EmitStmt(S: CS->getCapturedStmt());
8062 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
8063 };
8064 emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_parallel, CodeGen,
8065 CodeGenBoundParameters: emitEmptyBoundParameters);
8066 emitPostUpdateForReductionClause(CGF, D: S,
8067 CondGen: [](CodeGenFunction &) { return nullptr; });
8068}
8069
8070void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
8071 CodeGenModule &CGM, StringRef ParentName,
8072 const OMPTargetParallelDirective &S) {
8073 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8074 emitTargetParallelRegion(CGF, S, Action);
8075 };
8076 llvm::Function *Fn;
8077 llvm::Constant *Addr;
8078 // Emit target region as a standalone region.
8079 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8080 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
8081 assert(Fn && Addr && "Target device function emission failed.");
8082}
8083
8084void CodeGenFunction::EmitOMPTargetParallelDirective(
8085 const OMPTargetParallelDirective &S) {
8086 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8087 emitTargetParallelRegion(CGF, S, Action);
8088 };
8089 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
8090}
8091
8092static void emitTargetParallelForRegion(CodeGenFunction &CGF,
8093 const OMPTargetParallelForDirective &S,
8094 PrePostActionTy &Action) {
8095 Action.Enter(CGF);
8096 // Emit directive as a combined directive that consists of two implicit
8097 // directives: 'parallel' with 'for' directive.
8098 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8099 Action.Enter(CGF);
8100 CodeGenFunction::OMPCancelStackRAII CancelRegion(
8101 CGF, OMPD_target_parallel_for, S.hasCancel());
8102 CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds,
8103 CGDispatchBounds: emitDispatchForLoopBounds);
8104 };
8105 emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_for, CodeGen,
8106 CodeGenBoundParameters: emitEmptyBoundParameters);
8107}
8108
8109void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
8110 CodeGenModule &CGM, StringRef ParentName,
8111 const OMPTargetParallelForDirective &S) {
8112 // Emit SPMD target parallel for region as a standalone region.
8113 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8114 emitTargetParallelForRegion(CGF, S, Action);
8115 };
8116 llvm::Function *Fn;
8117 llvm::Constant *Addr;
8118 // Emit target region as a standalone region.
8119 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8120 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
8121 assert(Fn && Addr && "Target device function emission failed.");
8122}
8123
8124void CodeGenFunction::EmitOMPTargetParallelForDirective(
8125 const OMPTargetParallelForDirective &S) {
8126 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8127 emitTargetParallelForRegion(CGF, S, Action);
8128 };
8129 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
8130}
8131
8132static void
8133emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
8134 const OMPTargetParallelForSimdDirective &S,
8135 PrePostActionTy &Action) {
8136 Action.Enter(CGF);
8137 // Emit directive as a combined directive that consists of two implicit
8138 // directives: 'parallel' with 'for' directive.
8139 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8140 Action.Enter(CGF);
8141 CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds,
8142 CGDispatchBounds: emitDispatchForLoopBounds);
8143 };
8144 emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_simd, CodeGen,
8145 CodeGenBoundParameters: emitEmptyBoundParameters);
8146}
8147
8148void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
8149 CodeGenModule &CGM, StringRef ParentName,
8150 const OMPTargetParallelForSimdDirective &S) {
8151 // Emit SPMD target parallel for region as a standalone region.
8152 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8153 emitTargetParallelForSimdRegion(CGF, S, Action);
8154 };
8155 llvm::Function *Fn;
8156 llvm::Constant *Addr;
8157 // Emit target region as a standalone region.
8158 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8159 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
8160 assert(Fn && Addr && "Target device function emission failed.");
8161}
8162
8163void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
8164 const OMPTargetParallelForSimdDirective &S) {
8165 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8166 emitTargetParallelForSimdRegion(CGF, S, Action);
8167 };
8168 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
8169}
8170
8171/// Emit a helper variable and return corresponding lvalue.
8172static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
8173 const ImplicitParamDecl *PVD,
8174 CodeGenFunction::OMPPrivateScope &Privates) {
8175 const auto *VDecl = cast<VarDecl>(Val: Helper->getDecl());
8176 Privates.addPrivate(LocalVD: VDecl, Addr: CGF.GetAddrOfLocalVar(VD: PVD));
8177}
8178
8179void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
8180 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
8181 // Emit outlined function for task construct.
8182 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_taskloop);
8183 Address CapturedStruct = Address::invalid();
8184 {
8185 OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
8186 CapturedStruct = GenerateCapturedStmtArgument(S: *CS);
8187 }
8188 CanQualType SharedsTy =
8189 getContext().getCanonicalTagType(TD: CS->getCapturedRecordDecl());
8190 const Expr *IfCond = nullptr;
8191 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
8192 if (C->getNameModifier() == OMPD_unknown ||
8193 C->getNameModifier() == OMPD_taskloop) {
8194 IfCond = C->getCondition();
8195 break;
8196 }
8197 }
8198
8199 OMPTaskDataTy Data;
8200 // Check if taskloop must be emitted without taskgroup.
8201 Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
8202 // TODO: Check if we should emit tied or untied task.
8203 Data.Tied = true;
8204 // Set scheduling for taskloop
8205 if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) {
8206 // grainsize clause
8207 Data.Schedule.setInt(/*IntVal=*/false);
8208 Data.Schedule.setPointer(EmitScalarExpr(E: Clause->getGrainsize()));
8209 Data.HasModifier =
8210 (Clause->getModifier() == OMPC_GRAINSIZE_strict) ? true : false;
8211 } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) {
8212 // num_tasks clause
8213 Data.Schedule.setInt(/*IntVal=*/true);
8214 Data.Schedule.setPointer(EmitScalarExpr(E: Clause->getNumTasks()));
8215 Data.HasModifier =
8216 (Clause->getModifier() == OMPC_NUMTASKS_strict) ? true : false;
8217 }
8218
8219 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
8220 // if (PreCond) {
8221 // for (IV in 0..LastIteration) BODY;
8222 // <Final counter/linear vars updates>;
8223 // }
8224 //
8225
8226 // Emit: if (PreCond) - begin.
8227 // If the condition constant folds and can be elided, avoid emitting the
8228 // whole loop.
8229 bool CondConstant;
8230 llvm::BasicBlock *ContBlock = nullptr;
8231 OMPLoopScope PreInitScope(CGF, S);
8232 if (CGF.ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
8233 if (!CondConstant)
8234 return;
8235 } else {
8236 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "taskloop.if.then");
8237 ContBlock = CGF.createBasicBlock(name: "taskloop.if.end");
8238 emitPreCond(CGF, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
8239 TrueCount: CGF.getProfileCount(S: &S));
8240 CGF.EmitBlock(BB: ThenBlock);
8241 CGF.incrementProfileCounter(S: &S);
8242 }
8243
8244 (void)CGF.EmitOMPLinearClauseInit(D: S);
8245
8246 OMPPrivateScope LoopScope(CGF);
8247 // Emit helper vars inits.
8248 enum { LowerBound = 5, UpperBound, Stride, LastIter };
8249 auto *I = CS->getCapturedDecl()->param_begin();
8250 auto *LBP = std::next(x: I, n: LowerBound);
8251 auto *UBP = std::next(x: I, n: UpperBound);
8252 auto *STP = std::next(x: I, n: Stride);
8253 auto *LIP = std::next(x: I, n: LastIter);
8254 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getLowerBoundVariable()), PVD: *LBP,
8255 Privates&: LoopScope);
8256 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getUpperBoundVariable()), PVD: *UBP,
8257 Privates&: LoopScope);
8258 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable()), PVD: *STP, Privates&: LoopScope);
8259 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable()), PVD: *LIP,
8260 Privates&: LoopScope);
8261 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
8262 CGF.EmitOMPLinearClause(D: S, PrivateScope&: LoopScope);
8263 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
8264 (void)LoopScope.Privatize();
8265 // Emit the loop iteration variable.
8266 const Expr *IVExpr = S.getIterationVariable();
8267 const auto *IVDecl = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IVExpr)->getDecl());
8268 CGF.EmitVarDecl(D: *IVDecl);
8269 CGF.EmitIgnoredExpr(E: S.getInit());
8270
8271 // Emit the iterations count variable.
8272 // If it is not a variable, Sema decided to calculate iterations count on
8273 // each iteration (e.g., it is foldable into a constant).
8274 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
8275 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
8276 // Emit calculation of the iterations count.
8277 CGF.EmitIgnoredExpr(E: S.getCalcLastIteration());
8278 }
8279
8280 {
8281 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
8282 emitCommonSimdLoop(
8283 CGF, S,
8284 SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8285 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()))
8286 CGF.EmitOMPSimdInit(D: S);
8287 },
8288 BodyCodeGen: [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
8289 CGF.EmitOMPInnerLoop(
8290 S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: S.getCond(), IncExpr: S.getInc(),
8291 BodyGen: [&S](CodeGenFunction &CGF) {
8292 emitOMPLoopBodyWithStopPoint(CGF, S,
8293 LoopExit: CodeGenFunction::JumpDest());
8294 },
8295 PostIncGen: [](CodeGenFunction &) {});
8296 });
8297 }
8298 // Emit: if (PreCond) - end.
8299 if (ContBlock) {
8300 CGF.EmitBranch(Block: ContBlock);
8301 CGF.EmitBlock(BB: ContBlock, IsFinished: true);
8302 }
8303 // Emit final copy of the lastprivate variables if IsLastIter != 0.
8304 if (HasLastprivateClause) {
8305 CGF.EmitOMPLastprivateClauseFinal(
8306 D: S, NoFinals: isOpenMPSimdDirective(DKind: S.getDirectiveKind()),
8307 IsLastIterCond: CGF.Builder.CreateIsNotNull(Arg: CGF.EmitLoadOfScalar(
8308 Addr: CGF.GetAddrOfLocalVar(VD: *LIP), /*Volatile=*/false,
8309 Ty: (*LIP)->getType(), Loc: S.getBeginLoc())));
8310 }
8311 LoopScope.restoreMap();
8312 CGF.EmitOMPLinearClauseFinal(D: S, CondGen: [LIP, &S](CodeGenFunction &CGF) {
8313 return CGF.Builder.CreateIsNotNull(
8314 Arg: CGF.EmitLoadOfScalar(Addr: CGF.GetAddrOfLocalVar(VD: *LIP), /*Volatile=*/false,
8315 Ty: (*LIP)->getType(), Loc: S.getBeginLoc()));
8316 });
8317 };
8318 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
8319 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
8320 const OMPTaskDataTy &Data) {
8321 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
8322 &Data](CodeGenFunction &CGF, PrePostActionTy &) {
8323 OMPLoopScope PreInitScope(CGF, S);
8324 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, Loc: S.getBeginLoc(), D: S,
8325 TaskFunction: OutlinedFn, SharedsTy,
8326 Shareds: CapturedStruct, IfCond, Data);
8327 };
8328 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_taskloop,
8329 CodeGen);
8330 };
8331 if (Data.Nogroup) {
8332 EmitOMPTaskBasedDirective(S, CapturedRegion: OMPD_taskloop, BodyGen, TaskGen, Data);
8333 } else {
8334 CGM.getOpenMPRuntime().emitTaskgroupRegion(
8335 CGF&: *this,
8336 TaskgroupOpGen: [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
8337 PrePostActionTy &Action) {
8338 Action.Enter(CGF);
8339 CGF.EmitOMPTaskBasedDirective(S, CapturedRegion: OMPD_taskloop, BodyGen, TaskGen,
8340 Data);
8341 },
8342 Loc: S.getBeginLoc());
8343 }
8344}
8345
8346void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
8347 auto LPCRegion =
8348 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8349 EmitOMPTaskLoopBasedDirective(S);
8350}
8351
8352void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
8353 const OMPTaskLoopSimdDirective &S) {
8354 auto LPCRegion =
8355 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8356 OMPLexicalScope Scope(*this, S);
8357 EmitOMPTaskLoopBasedDirective(S);
8358}
8359
8360void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
8361 const OMPMasterTaskLoopDirective &S) {
8362 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8363 Action.Enter(CGF);
8364 EmitOMPTaskLoopBasedDirective(S);
8365 };
8366 auto LPCRegion =
8367 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8368 OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
8369 CGM.getOpenMPRuntime().emitMasterRegion(CGF&: *this, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
8370}
8371
8372void CodeGenFunction::EmitOMPMaskedTaskLoopDirective(
8373 const OMPMaskedTaskLoopDirective &S) {
8374 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8375 Action.Enter(CGF);
8376 EmitOMPTaskLoopBasedDirective(S);
8377 };
8378 auto LPCRegion =
8379 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8380 OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
8381 CGM.getOpenMPRuntime().emitMaskedRegion(CGF&: *this, MaskedOpGen: CodeGen, Loc: S.getBeginLoc());
8382}
8383
8384void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
8385 const OMPMasterTaskLoopSimdDirective &S) {
8386 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8387 Action.Enter(CGF);
8388 EmitOMPTaskLoopBasedDirective(S);
8389 };
8390 auto LPCRegion =
8391 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8392 OMPLexicalScope Scope(*this, S);
8393 CGM.getOpenMPRuntime().emitMasterRegion(CGF&: *this, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
8394}
8395
8396void CodeGenFunction::EmitOMPMaskedTaskLoopSimdDirective(
8397 const OMPMaskedTaskLoopSimdDirective &S) {
8398 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8399 Action.Enter(CGF);
8400 EmitOMPTaskLoopBasedDirective(S);
8401 };
8402 auto LPCRegion =
8403 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8404 OMPLexicalScope Scope(*this, S);
8405 CGM.getOpenMPRuntime().emitMaskedRegion(CGF&: *this, MaskedOpGen: CodeGen, Loc: S.getBeginLoc());
8406}
8407
8408void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
8409 const OMPParallelMasterTaskLoopDirective &S) {
8410 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8411 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
8412 PrePostActionTy &Action) {
8413 Action.Enter(CGF);
8414 CGF.EmitOMPTaskLoopBasedDirective(S);
8415 };
8416 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
8417 CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: TaskLoopCodeGen,
8418 Loc: S.getBeginLoc());
8419 };
8420 auto LPCRegion =
8421 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8422 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_master_taskloop, CodeGen,
8423 CodeGenBoundParameters: emitEmptyBoundParameters);
8424}
8425
8426void CodeGenFunction::EmitOMPParallelMaskedTaskLoopDirective(
8427 const OMPParallelMaskedTaskLoopDirective &S) {
8428 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8429 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
8430 PrePostActionTy &Action) {
8431 Action.Enter(CGF);
8432 CGF.EmitOMPTaskLoopBasedDirective(S);
8433 };
8434 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
8435 CGM.getOpenMPRuntime().emitMaskedRegion(CGF, MaskedOpGen: TaskLoopCodeGen,
8436 Loc: S.getBeginLoc());
8437 };
8438 auto LPCRegion =
8439 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8440 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_masked_taskloop, CodeGen,
8441 CodeGenBoundParameters: emitEmptyBoundParameters);
8442}
8443
8444void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
8445 const OMPParallelMasterTaskLoopSimdDirective &S) {
8446 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8447 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
8448 PrePostActionTy &Action) {
8449 Action.Enter(CGF);
8450 CGF.EmitOMPTaskLoopBasedDirective(S);
8451 };
8452 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
8453 CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: TaskLoopCodeGen,
8454 Loc: S.getBeginLoc());
8455 };
8456 auto LPCRegion =
8457 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8458 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_master_taskloop_simd, CodeGen,
8459 CodeGenBoundParameters: emitEmptyBoundParameters);
8460}
8461
8462void CodeGenFunction::EmitOMPParallelMaskedTaskLoopSimdDirective(
8463 const OMPParallelMaskedTaskLoopSimdDirective &S) {
8464 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8465 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
8466 PrePostActionTy &Action) {
8467 Action.Enter(CGF);
8468 CGF.EmitOMPTaskLoopBasedDirective(S);
8469 };
8470 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
8471 CGM.getOpenMPRuntime().emitMaskedRegion(CGF, MaskedOpGen: TaskLoopCodeGen,
8472 Loc: S.getBeginLoc());
8473 };
8474 auto LPCRegion =
8475 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8476 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_masked_taskloop_simd, CodeGen,
8477 CodeGenBoundParameters: emitEmptyBoundParameters);
8478}
8479
8480// Generate the instructions for '#pragma omp target update' directive.
8481void CodeGenFunction::EmitOMPTargetUpdateDirective(
8482 const OMPTargetUpdateDirective &S) {
8483 // If we don't have target devices, don't bother emitting the data mapping
8484 // code.
8485 if (CGM.getLangOpts().OMPTargetTriples.empty())
8486 return;
8487
8488 // Check if we have any if clause associated with the directive.
8489 const Expr *IfCond = nullptr;
8490 if (const auto *C = S.getSingleClause<OMPIfClause>())
8491 IfCond = C->getCondition();
8492
8493 // Check if we have any device clause associated with the directive.
8494 const Expr *Device = nullptr;
8495 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
8496 Device = C->getDevice();
8497
8498 OMPLexicalScope Scope(*this, S, OMPD_task);
8499 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF&: *this, D: S, IfCond, Device);
8500}
8501
8502void CodeGenFunction::EmitOMPGenericLoopDirective(
8503 const OMPGenericLoopDirective &S) {
8504 // Always expect a bind clause on the loop directive. It it wasn't
8505 // in the source, it should have been added in sema.
8506
8507 OpenMPBindClauseKind BindKind = OMPC_BIND_unknown;
8508 if (const auto *C = S.getSingleClause<OMPBindClause>())
8509 BindKind = C->getBindKind();
8510
8511 switch (BindKind) {
8512 case OMPC_BIND_parallel: // for
8513 return emitOMPForDirective(S, CGF&: *this, CGM, /*HasCancel=*/false);
8514 case OMPC_BIND_teams: // distribute
8515 return emitOMPDistributeDirective(S, CGF&: *this, CGM);
8516 case OMPC_BIND_thread: // simd
8517 return emitOMPSimdDirective(S, CGF&: *this, CGM);
8518 case OMPC_BIND_unknown:
8519 break;
8520 }
8521
8522 // Unimplemented, just inline the underlying statement for now.
8523 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8524 // Emit the loop iteration variable.
8525 const Stmt *CS =
8526 cast<CapturedStmt>(Val: S.getAssociatedStmt())->getCapturedStmt();
8527 const auto *ForS = dyn_cast<ForStmt>(Val: CS);
8528 if (ForS && !isa<DeclStmt>(Val: ForS->getInit())) {
8529 OMPPrivateScope LoopScope(CGF);
8530 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
8531 (void)LoopScope.Privatize();
8532 CGF.EmitStmt(S: CS);
8533 LoopScope.restoreMap();
8534 } else {
8535 CGF.EmitStmt(S: CS);
8536 }
8537 };
8538 OMPLexicalScope Scope(*this, S, OMPD_unknown);
8539 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_loop, CodeGen);
8540}
8541
8542void CodeGenFunction::EmitOMPParallelGenericLoopDirective(
8543 const OMPLoopDirective &S) {
8544 // Emit combined directive as if its constituent constructs are 'parallel'
8545 // and 'for'.
8546 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8547 Action.Enter(CGF);
8548 emitOMPCopyinClause(CGF, S);
8549 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
8550 };
8551 {
8552 auto LPCRegion =
8553 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8554 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_for, CodeGen,
8555 CodeGenBoundParameters: emitEmptyBoundParameters);
8556 }
8557 // Check for outer lastprivate conditional update.
8558 checkForLastprivateConditionalUpdate(CGF&: *this, S);
8559}
8560
8561void CodeGenFunction::EmitOMPTeamsGenericLoopDirective(
8562 const OMPTeamsGenericLoopDirective &S) {
8563 // To be consistent with current behavior of 'target teams loop', emit
8564 // 'teams loop' as if its constituent constructs are 'teams' and 'distribute'.
8565 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8566 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
8567 };
8568
8569 // Emit teams region as a standalone region.
8570 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8571 PrePostActionTy &Action) {
8572 Action.Enter(CGF);
8573 OMPPrivateScope PrivateScope(CGF);
8574 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
8575 (void)PrivateScope.Privatize();
8576 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
8577 CodeGen: CodeGenDistribute);
8578 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
8579 };
8580 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute, CodeGen);
8581 emitPostUpdateForReductionClause(CGF&: *this, D: S,
8582 CondGen: [](CodeGenFunction &) { return nullptr; });
8583}
8584
8585#ifndef NDEBUG
8586static void emitTargetTeamsLoopCodegenStatus(CodeGenFunction &CGF,
8587 std::string StatusMsg,
8588 const OMPExecutableDirective &D) {
8589 bool IsDevice = CGF.CGM.getLangOpts().OpenMPIsTargetDevice;
8590 if (IsDevice)
8591 StatusMsg += ": DEVICE";
8592 else
8593 StatusMsg += ": HOST";
8594 SourceLocation L = D.getBeginLoc();
8595 auto &SM = CGF.getContext().getSourceManager();
8596 PresumedLoc PLoc = SM.getPresumedLoc(L);
8597 const char *FileName = PLoc.isValid() ? PLoc.getFilename() : nullptr;
8598 unsigned LineNo =
8599 PLoc.isValid() ? PLoc.getLine() : SM.getExpansionLineNumber(L);
8600 llvm::dbgs() << StatusMsg << ": " << FileName << ": " << LineNo << "\n";
8601}
8602#endif
8603
8604static void emitTargetTeamsGenericLoopRegionAsParallel(
8605 CodeGenFunction &CGF, PrePostActionTy &Action,
8606 const OMPTargetTeamsGenericLoopDirective &S) {
8607 Action.Enter(CGF);
8608 // Emit 'teams loop' as if its constituent constructs are 'distribute,
8609 // 'parallel, and 'for'.
8610 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8611 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
8612 IncExpr: S.getDistInc());
8613 };
8614
8615 // Emit teams region as a standalone region.
8616 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8617 PrePostActionTy &Action) {
8618 Action.Enter(CGF);
8619 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
8620 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
8621 (void)PrivateScope.Privatize();
8622 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
8623 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
8624 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
8625 };
8626 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE,
8627 emitTargetTeamsLoopCodegenStatus(
8628 CGF, TTL_CODEGEN_TYPE " as parallel for", S));
8629 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_parallel_for,
8630 CodeGen: CodeGenTeams);
8631 emitPostUpdateForReductionClause(CGF, D: S,
8632 CondGen: [](CodeGenFunction &) { return nullptr; });
8633}
8634
8635static void emitTargetTeamsGenericLoopRegionAsDistribute(
8636 CodeGenFunction &CGF, PrePostActionTy &Action,
8637 const OMPTargetTeamsGenericLoopDirective &S) {
8638 Action.Enter(CGF);
8639 // Emit 'teams loop' as if its constituent construct is 'distribute'.
8640 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8641 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
8642 };
8643
8644 // Emit teams region as a standalone region.
8645 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8646 PrePostActionTy &Action) {
8647 Action.Enter(CGF);
8648 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
8649 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
8650 (void)PrivateScope.Privatize();
8651 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
8652 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
8653 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
8654 };
8655 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE,
8656 emitTargetTeamsLoopCodegenStatus(
8657 CGF, TTL_CODEGEN_TYPE " as distribute", S));
8658 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute, CodeGen);
8659 emitPostUpdateForReductionClause(CGF, D: S,
8660 CondGen: [](CodeGenFunction &) { return nullptr; });
8661}
8662
8663void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective(
8664 const OMPTargetTeamsGenericLoopDirective &S) {
8665 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8666 if (S.canBeParallelFor())
8667 emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S);
8668 else
8669 emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S);
8670 };
8671 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
8672}
8673
8674void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
8675 CodeGenModule &CGM, StringRef ParentName,
8676 const OMPTargetTeamsGenericLoopDirective &S) {
8677 // Emit SPMD target parallel loop region as a standalone region.
8678 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8679 if (S.canBeParallelFor())
8680 emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S);
8681 else
8682 emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S);
8683 };
8684 llvm::Function *Fn;
8685 llvm::Constant *Addr;
8686 // Emit target region as a standalone region.
8687 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8688 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
8689 assert(Fn && Addr &&
8690 "Target device function emission failed for 'target teams loop'.");
8691}
8692
8693static void emitTargetParallelGenericLoopRegion(
8694 CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S,
8695 PrePostActionTy &Action) {
8696 Action.Enter(CGF);
8697 // Emit as 'parallel for'.
8698 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8699 Action.Enter(CGF);
8700 CodeGenFunction::OMPCancelStackRAII CancelRegion(
8701 CGF, OMPD_target_parallel_loop, /*hasCancel=*/false);
8702 CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds,
8703 CGDispatchBounds: emitDispatchForLoopBounds);
8704 };
8705 emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_for, CodeGen,
8706 CodeGenBoundParameters: emitEmptyBoundParameters);
8707}
8708
8709void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
8710 CodeGenModule &CGM, StringRef ParentName,
8711 const OMPTargetParallelGenericLoopDirective &S) {
8712 // Emit target parallel loop region as a standalone region.
8713 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8714 emitTargetParallelGenericLoopRegion(CGF, S, Action);
8715 };
8716 llvm::Function *Fn;
8717 llvm::Constant *Addr;
8718 // Emit target region as a standalone region.
8719 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8720 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
8721 assert(Fn && Addr && "Target device function emission failed.");
8722}
8723
8724/// Emit combined directive 'target parallel loop' as if its constituent
8725/// constructs are 'target', 'parallel', and 'for'.
8726void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective(
8727 const OMPTargetParallelGenericLoopDirective &S) {
8728 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8729 emitTargetParallelGenericLoopRegion(CGF, S, Action);
8730 };
8731 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
8732}
8733
8734void CodeGenFunction::EmitSimpleOMPExecutableDirective(
8735 const OMPExecutableDirective &D) {
8736 if (const auto *SD = dyn_cast<OMPScanDirective>(Val: &D)) {
8737 EmitOMPScanDirective(S: *SD);
8738 return;
8739 }
8740 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
8741 return;
8742 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
8743 OMPPrivateScope GlobalsScope(CGF);
8744 if (isOpenMPTaskingDirective(Kind: D.getDirectiveKind())) {
8745 // Capture global firstprivates to avoid crash.
8746 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
8747 for (const Expr *Ref : C->varlist()) {
8748 const auto *DRE = cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
8749 if (!DRE)
8750 continue;
8751 const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl());
8752 if (!VD || VD->hasLocalStorage())
8753 continue;
8754 if (!CGF.LocalDeclMap.count(Val: VD)) {
8755 LValue GlobLVal = CGF.EmitLValue(E: Ref);
8756 GlobalsScope.addPrivate(LocalVD: VD, Addr: GlobLVal.getAddress());
8757 }
8758 }
8759 }
8760 }
8761 if (isOpenMPSimdDirective(DKind: D.getDirectiveKind())) {
8762 (void)GlobalsScope.Privatize();
8763 ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
8764 emitOMPSimdRegion(CGF, S: cast<OMPLoopDirective>(Val: D), Action);
8765 } else {
8766 if (const auto *LD = dyn_cast<OMPLoopDirective>(Val: &D)) {
8767 for (const Expr *E : LD->counters()) {
8768 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
8769 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(Val: VD)) {
8770 LValue GlobLVal = CGF.EmitLValue(E);
8771 GlobalsScope.addPrivate(LocalVD: VD, Addr: GlobLVal.getAddress());
8772 }
8773 if (isa<OMPCapturedExprDecl>(Val: VD)) {
8774 // Emit only those that were not explicitly referenced in clauses.
8775 if (!CGF.LocalDeclMap.count(Val: VD))
8776 CGF.EmitVarDecl(D: *VD);
8777 }
8778 }
8779 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
8780 if (!C->getNumForLoops())
8781 continue;
8782 for (unsigned I = LD->getLoopsNumber(),
8783 E = C->getLoopNumIterations().size();
8784 I < E; ++I) {
8785 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
8786 Val: cast<DeclRefExpr>(Val: C->getLoopCounter(NumLoop: I))->getDecl())) {
8787 // Emit only those that were not explicitly referenced in clauses.
8788 if (!CGF.LocalDeclMap.count(Val: VD))
8789 CGF.EmitVarDecl(D: *VD);
8790 }
8791 }
8792 }
8793 }
8794 (void)GlobalsScope.Privatize();
8795 CGF.EmitStmt(S: D.getInnermostCapturedStmt()->getCapturedStmt());
8796 }
8797 };
8798 if (D.getDirectiveKind() == OMPD_atomic ||
8799 D.getDirectiveKind() == OMPD_critical ||
8800 D.getDirectiveKind() == OMPD_section ||
8801 D.getDirectiveKind() == OMPD_master ||
8802 D.getDirectiveKind() == OMPD_masked ||
8803 D.getDirectiveKind() == OMPD_unroll ||
8804 D.getDirectiveKind() == OMPD_assume) {
8805 EmitStmt(S: D.getAssociatedStmt());
8806 } else {
8807 auto LPCRegion =
8808 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S: D);
8809 OMPSimdLexicalScope Scope(*this, D);
8810 CGM.getOpenMPRuntime().emitInlinedDirective(
8811 CGF&: *this,
8812 InnermostKind: isOpenMPSimdDirective(DKind: D.getDirectiveKind()) ? OMPD_simd
8813 : D.getDirectiveKind(),
8814 CodeGen);
8815 }
8816 // Check for outer lastprivate conditional update.
8817 checkForLastprivateConditionalUpdate(CGF&: *this, S: D);
8818}
8819
8820void CodeGenFunction::EmitOMPAssumeDirective(const OMPAssumeDirective &S) {
8821 EmitStmt(S: S.getAssociatedStmt());
8822}
8823