1 | //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This contains code to emit OpenMP nodes as LLVM code. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "CGCleanup.h" |
14 | #include "CGDebugInfo.h" |
15 | #include "CGOpenMPRuntime.h" |
16 | #include "CodeGenFunction.h" |
17 | #include "CodeGenModule.h" |
18 | #include "CodeGenPGO.h" |
19 | #include "TargetInfo.h" |
20 | #include "clang/AST/ASTContext.h" |
21 | #include "clang/AST/Attr.h" |
22 | #include "clang/AST/DeclOpenMP.h" |
23 | #include "clang/AST/OpenMPClause.h" |
24 | #include "clang/AST/Stmt.h" |
25 | #include "clang/AST/StmtOpenMP.h" |
26 | #include "clang/AST/StmtVisitor.h" |
27 | #include "clang/Basic/OpenMPKinds.h" |
28 | #include "clang/Basic/PrettyStackTrace.h" |
29 | #include "clang/Basic/SourceManager.h" |
30 | #include "llvm/ADT/SmallSet.h" |
31 | #include "llvm/BinaryFormat/Dwarf.h" |
32 | #include "llvm/Frontend/OpenMP/OMPConstants.h" |
33 | #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" |
34 | #include "llvm/IR/Constants.h" |
35 | #include "llvm/IR/DebugInfoMetadata.h" |
36 | #include "llvm/IR/Instructions.h" |
37 | #include "llvm/IR/IntrinsicInst.h" |
38 | #include "llvm/IR/Metadata.h" |
39 | #include "llvm/Support/AtomicOrdering.h" |
40 | #include "llvm/Support/Debug.h" |
41 | #include <optional> |
42 | using namespace clang; |
43 | using namespace CodeGen; |
44 | using namespace llvm::omp; |
45 | |
46 | #define TTL_CODEGEN_TYPE "target-teams-loop-codegen" |
47 | |
48 | static const VarDecl *getBaseDecl(const Expr *Ref); |
49 | static OpenMPDirectiveKind |
50 | getEffectiveDirectiveKind(const OMPExecutableDirective &S); |
51 | |
52 | namespace { |
53 | /// Lexical scope for OpenMP executable constructs, that handles correct codegen |
54 | /// for captured expressions. |
55 | class OMPLexicalScope : public CodeGenFunction::LexicalScope { |
56 | void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
57 | for (const auto *C : S.clauses()) { |
58 | if (const auto *CPI = OMPClauseWithPreInit::get(C)) { |
59 | if (const auto *PreInit = |
60 | cast_or_null<DeclStmt>(Val: CPI->getPreInitStmt())) { |
61 | for (const auto *I : PreInit->decls()) { |
62 | if (!I->hasAttr<OMPCaptureNoInitAttr>()) { |
63 | CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I)); |
64 | } else { |
65 | CodeGenFunction::AutoVarEmission Emission = |
66 | CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I)); |
67 | CGF.EmitAutoVarCleanups(emission: Emission); |
68 | } |
69 | } |
70 | } |
71 | } |
72 | } |
73 | } |
74 | CodeGenFunction::OMPPrivateScope InlinedShareds; |
75 | |
76 | static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { |
77 | return CGF.LambdaCaptureFields.lookup(Val: VD) || |
78 | (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || |
79 | (isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl) && |
80 | cast<BlockDecl>(Val: CGF.CurCodeDecl)->capturesVariable(var: VD)); |
81 | } |
82 | |
83 | public: |
84 | OMPLexicalScope( |
85 | CodeGenFunction &CGF, const OMPExecutableDirective &S, |
86 | const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt, |
87 | const bool EmitPreInitStmt = true) |
88 | : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), |
89 | InlinedShareds(CGF) { |
90 | if (EmitPreInitStmt) |
91 | emitPreInitStmt(CGF, S); |
92 | if (!CapturedRegion) |
93 | return; |
94 | assert(S.hasAssociatedStmt() && |
95 | "Expected associated statement for inlined directive." ); |
96 | const CapturedStmt *CS = S.getCapturedStmt(RegionKind: *CapturedRegion); |
97 | for (const auto &C : CS->captures()) { |
98 | if (C.capturesVariable() || C.capturesVariableByCopy()) { |
99 | auto *VD = C.getCapturedVar(); |
100 | assert(VD == VD->getCanonicalDecl() && |
101 | "Canonical decl must be captured." ); |
102 | DeclRefExpr DRE( |
103 | CGF.getContext(), const_cast<VarDecl *>(VD), |
104 | isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && |
105 | InlinedShareds.isGlobalVarCaptured(VD)), |
106 | VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); |
107 | InlinedShareds.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(E: &DRE).getAddress()); |
108 | } |
109 | } |
110 | (void)InlinedShareds.Privatize(); |
111 | } |
112 | }; |
113 | |
114 | /// Lexical scope for OpenMP parallel construct, that handles correct codegen |
115 | /// for captured expressions. |
116 | class OMPParallelScope final : public OMPLexicalScope { |
117 | bool EmitPreInitStmt(const OMPExecutableDirective &S) { |
118 | OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); |
119 | return !(isOpenMPTargetExecutionDirective(DKind: EKind) || |
120 | isOpenMPLoopBoundSharingDirective(Kind: EKind)) && |
121 | isOpenMPParallelDirective(DKind: EKind); |
122 | } |
123 | |
124 | public: |
125 | OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) |
126 | : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt, |
127 | EmitPreInitStmt(S)) {} |
128 | }; |
129 | |
130 | /// Lexical scope for OpenMP teams construct, that handles correct codegen |
131 | /// for captured expressions. |
132 | class OMPTeamsScope final : public OMPLexicalScope { |
133 | bool EmitPreInitStmt(const OMPExecutableDirective &S) { |
134 | OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); |
135 | return !isOpenMPTargetExecutionDirective(DKind: EKind) && |
136 | isOpenMPTeamsDirective(DKind: EKind); |
137 | } |
138 | |
139 | public: |
140 | OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) |
141 | : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt, |
142 | EmitPreInitStmt(S)) {} |
143 | }; |
144 | |
145 | /// Private scope for OpenMP loop-based directives, that supports capturing |
146 | /// of used expression from loop statement. |
147 | class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { |
148 | void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) { |
149 | const Stmt *PreInits; |
150 | CodeGenFunction::OMPMapVars PreCondVars; |
151 | if (auto *LD = dyn_cast<OMPLoopDirective>(Val: &S)) { |
152 | llvm::DenseSet<const VarDecl *> EmittedAsPrivate; |
153 | for (const auto *E : LD->counters()) { |
154 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
155 | EmittedAsPrivate.insert(V: VD->getCanonicalDecl()); |
156 | (void)PreCondVars.setVarAddr( |
157 | CGF, LocalVD: VD, TempAddr: CGF.CreateMemTemp(T: VD->getType().getNonReferenceType())); |
158 | } |
159 | // Mark private vars as undefs. |
160 | for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) { |
161 | for (const Expr *IRef : C->varlist()) { |
162 | const auto *OrigVD = |
163 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IRef)->getDecl()); |
164 | if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) { |
165 | QualType OrigVDTy = OrigVD->getType().getNonReferenceType(); |
166 | (void)PreCondVars.setVarAddr( |
167 | CGF, LocalVD: OrigVD, |
168 | TempAddr: Address(llvm::UndefValue::get(T: CGF.ConvertTypeForMem( |
169 | T: CGF.getContext().getPointerType(T: OrigVDTy))), |
170 | CGF.ConvertTypeForMem(T: OrigVDTy), |
171 | CGF.getContext().getDeclAlign(D: OrigVD))); |
172 | } |
173 | } |
174 | } |
175 | (void)PreCondVars.apply(CGF); |
176 | // Emit init, __range and __end variables for C++ range loops. |
177 | (void)OMPLoopBasedDirective::doForAllLoops( |
178 | CurStmt: LD->getInnermostCapturedStmt()->getCapturedStmt(), |
179 | /*TryImperfectlyNestedLoops=*/true, NumLoops: LD->getLoopsNumber(), |
180 | Callback: [&CGF](unsigned Cnt, const Stmt *CurStmt) { |
181 | if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(Val: CurStmt)) { |
182 | if (const Stmt *Init = CXXFor->getInit()) |
183 | CGF.EmitStmt(S: Init); |
184 | CGF.EmitStmt(S: CXXFor->getRangeStmt()); |
185 | CGF.EmitStmt(S: CXXFor->getEndStmt()); |
186 | } |
187 | return false; |
188 | }); |
189 | PreInits = LD->getPreInits(); |
190 | } else if (const auto *Tile = dyn_cast<OMPTileDirective>(Val: &S)) { |
191 | PreInits = Tile->getPreInits(); |
192 | } else if (const auto *Stripe = dyn_cast<OMPStripeDirective>(Val: &S)) { |
193 | PreInits = Stripe->getPreInits(); |
194 | } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(Val: &S)) { |
195 | PreInits = Unroll->getPreInits(); |
196 | } else if (const auto *Reverse = dyn_cast<OMPReverseDirective>(Val: &S)) { |
197 | PreInits = Reverse->getPreInits(); |
198 | } else if (const auto *Interchange = |
199 | dyn_cast<OMPInterchangeDirective>(Val: &S)) { |
200 | PreInits = Interchange->getPreInits(); |
201 | } else { |
202 | llvm_unreachable("Unknown loop-based directive kind." ); |
203 | } |
204 | if (PreInits) { |
205 | // CompoundStmts and DeclStmts are used as lists of PreInit statements and |
206 | // declarations. Since declarations must be visible in the the following |
207 | // that they initialize, unpack the CompoundStmt they are nested in. |
208 | SmallVector<const Stmt *> PreInitStmts; |
209 | if (auto *PreInitCompound = dyn_cast<CompoundStmt>(Val: PreInits)) |
210 | llvm::append_range(C&: PreInitStmts, R: PreInitCompound->body()); |
211 | else |
212 | PreInitStmts.push_back(Elt: PreInits); |
213 | |
214 | for (const Stmt *S : PreInitStmts) { |
215 | // EmitStmt skips any OMPCapturedExprDecls, but needs to be emitted |
216 | // here. |
217 | if (auto *PreInitDecl = dyn_cast<DeclStmt>(Val: S)) { |
218 | for (Decl *I : PreInitDecl->decls()) |
219 | CGF.EmitVarDecl(D: cast<VarDecl>(Val&: *I)); |
220 | continue; |
221 | } |
222 | CGF.EmitStmt(S); |
223 | } |
224 | } |
225 | PreCondVars.restore(CGF); |
226 | } |
227 | |
228 | public: |
229 | OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) |
230 | : CodeGenFunction::RunCleanupsScope(CGF) { |
231 | emitPreInitStmt(CGF, S); |
232 | } |
233 | }; |
234 | |
235 | class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { |
236 | CodeGenFunction::OMPPrivateScope InlinedShareds; |
237 | |
238 | static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { |
239 | return CGF.LambdaCaptureFields.lookup(Val: VD) || |
240 | (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || |
241 | (isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl) && |
242 | cast<BlockDecl>(Val: CGF.CurCodeDecl)->capturesVariable(var: VD)); |
243 | } |
244 | |
245 | public: |
246 | OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) |
247 | : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), |
248 | InlinedShareds(CGF) { |
249 | for (const auto *C : S.clauses()) { |
250 | if (const auto *CPI = OMPClauseWithPreInit::get(C)) { |
251 | if (const auto *PreInit = |
252 | cast_or_null<DeclStmt>(Val: CPI->getPreInitStmt())) { |
253 | for (const auto *I : PreInit->decls()) { |
254 | if (!I->hasAttr<OMPCaptureNoInitAttr>()) { |
255 | CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I)); |
256 | } else { |
257 | CodeGenFunction::AutoVarEmission Emission = |
258 | CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I)); |
259 | CGF.EmitAutoVarCleanups(emission: Emission); |
260 | } |
261 | } |
262 | } |
263 | } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(Val: C)) { |
264 | for (const Expr *E : UDP->varlist()) { |
265 | const Decl *D = cast<DeclRefExpr>(Val: E)->getDecl(); |
266 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D)) |
267 | CGF.EmitVarDecl(D: *OED); |
268 | } |
269 | } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(Val: C)) { |
270 | for (const Expr *E : UDP->varlist()) { |
271 | const Decl *D = getBaseDecl(Ref: E); |
272 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D)) |
273 | CGF.EmitVarDecl(D: *OED); |
274 | } |
275 | } |
276 | } |
277 | if (!isOpenMPSimdDirective(DKind: getEffectiveDirectiveKind(S))) |
278 | CGF.EmitOMPPrivateClause(D: S, PrivateScope&: InlinedShareds); |
279 | if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(Val: &S)) { |
280 | if (const Expr *E = TG->getReductionRef()) |
281 | CGF.EmitVarDecl(D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl())); |
282 | } |
283 | // Temp copy arrays for inscan reductions should not be emitted as they are |
284 | // not used in simd only mode. |
285 | llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps; |
286 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
287 | if (C->getModifier() != OMPC_REDUCTION_inscan) |
288 | continue; |
289 | for (const Expr *E : C->copy_array_temps()) |
290 | CopyArrayTemps.insert(V: cast<DeclRefExpr>(Val: E)->getDecl()); |
291 | } |
292 | const auto *CS = cast_or_null<CapturedStmt>(Val: S.getAssociatedStmt()); |
293 | while (CS) { |
294 | for (auto &C : CS->captures()) { |
295 | if (C.capturesVariable() || C.capturesVariableByCopy()) { |
296 | auto *VD = C.getCapturedVar(); |
297 | if (CopyArrayTemps.contains(V: VD)) |
298 | continue; |
299 | assert(VD == VD->getCanonicalDecl() && |
300 | "Canonical decl must be captured." ); |
301 | DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), |
302 | isCapturedVar(CGF, VD) || |
303 | (CGF.CapturedStmtInfo && |
304 | InlinedShareds.isGlobalVarCaptured(VD)), |
305 | VD->getType().getNonReferenceType(), VK_LValue, |
306 | C.getLocation()); |
307 | InlinedShareds.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(E: &DRE).getAddress()); |
308 | } |
309 | } |
310 | CS = dyn_cast<CapturedStmt>(Val: CS->getCapturedStmt()); |
311 | } |
312 | (void)InlinedShareds.Privatize(); |
313 | } |
314 | }; |
315 | |
316 | } // namespace |
317 | |
318 | // The loop directive with a bind clause will be mapped to a different |
319 | // directive with corresponding semantics. |
320 | static OpenMPDirectiveKind |
321 | getEffectiveDirectiveKind(const OMPExecutableDirective &S) { |
322 | OpenMPDirectiveKind Kind = S.getDirectiveKind(); |
323 | if (Kind != OMPD_loop) |
324 | return Kind; |
325 | |
326 | OpenMPBindClauseKind BindKind = OMPC_BIND_unknown; |
327 | if (const auto *C = S.getSingleClause<OMPBindClause>()) |
328 | BindKind = C->getBindKind(); |
329 | |
330 | switch (BindKind) { |
331 | case OMPC_BIND_parallel: |
332 | return OMPD_for; |
333 | case OMPC_BIND_teams: |
334 | return OMPD_distribute; |
335 | case OMPC_BIND_thread: |
336 | return OMPD_simd; |
337 | default: |
338 | return OMPD_loop; |
339 | } |
340 | } |
341 | |
342 | static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, |
343 | const OMPExecutableDirective &S, |
344 | const RegionCodeGenTy &CodeGen); |
345 | |
346 | LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { |
347 | if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(Val: E)) { |
348 | if (const auto *OrigVD = dyn_cast<VarDecl>(Val: OrigDRE->getDecl())) { |
349 | OrigVD = OrigVD->getCanonicalDecl(); |
350 | bool IsCaptured = |
351 | LambdaCaptureFields.lookup(Val: OrigVD) || |
352 | (CapturedStmtInfo && CapturedStmtInfo->lookup(VD: OrigVD)) || |
353 | (isa_and_nonnull<BlockDecl>(Val: CurCodeDecl)); |
354 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured, |
355 | OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); |
356 | return EmitLValue(E: &DRE); |
357 | } |
358 | } |
359 | return EmitLValue(E); |
360 | } |
361 | |
362 | llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { |
363 | ASTContext &C = getContext(); |
364 | llvm::Value *Size = nullptr; |
365 | auto SizeInChars = C.getTypeSizeInChars(T: Ty); |
366 | if (SizeInChars.isZero()) { |
367 | // getTypeSizeInChars() returns 0 for a VLA. |
368 | while (const VariableArrayType *VAT = C.getAsVariableArrayType(T: Ty)) { |
369 | VlaSizePair VlaSize = getVLASize(vla: VAT); |
370 | Ty = VlaSize.Type; |
371 | Size = |
372 | Size ? Builder.CreateNUWMul(LHS: Size, RHS: VlaSize.NumElts) : VlaSize.NumElts; |
373 | } |
374 | SizeInChars = C.getTypeSizeInChars(T: Ty); |
375 | if (SizeInChars.isZero()) |
376 | return llvm::ConstantInt::get(Ty: SizeTy, /*V=*/0); |
377 | return Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: SizeInChars)); |
378 | } |
379 | return CGM.getSize(numChars: SizeInChars); |
380 | } |
381 | |
382 | void CodeGenFunction::GenerateOpenMPCapturedVars( |
383 | const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { |
384 | const RecordDecl *RD = S.getCapturedRecordDecl(); |
385 | auto CurField = RD->field_begin(); |
386 | auto CurCap = S.captures().begin(); |
387 | for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), |
388 | E = S.capture_init_end(); |
389 | I != E; ++I, ++CurField, ++CurCap) { |
390 | if (CurField->hasCapturedVLAType()) { |
391 | const VariableArrayType *VAT = CurField->getCapturedVLAType(); |
392 | llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()]; |
393 | CapturedVars.push_back(Elt: Val); |
394 | } else if (CurCap->capturesThis()) { |
395 | CapturedVars.push_back(Elt: CXXThisValue); |
396 | } else if (CurCap->capturesVariableByCopy()) { |
397 | llvm::Value *CV = EmitLoadOfScalar(lvalue: EmitLValue(E: *I), Loc: CurCap->getLocation()); |
398 | |
399 | // If the field is not a pointer, we need to save the actual value |
400 | // and load it as a void pointer. |
401 | if (!CurField->getType()->isAnyPointerType()) { |
402 | ASTContext &Ctx = getContext(); |
403 | Address DstAddr = CreateMemTemp( |
404 | T: Ctx.getUIntPtrType(), |
405 | Name: Twine(CurCap->getCapturedVar()->getName(), ".casted" )); |
406 | LValue DstLV = MakeAddrLValue(Addr: DstAddr, T: Ctx.getUIntPtrType()); |
407 | |
408 | llvm::Value *SrcAddrVal = EmitScalarConversion( |
409 | Src: DstAddr.emitRawPointer(CGF&: *this), |
410 | SrcTy: Ctx.getPointerType(T: Ctx.getUIntPtrType()), |
411 | DstTy: Ctx.getPointerType(T: CurField->getType()), Loc: CurCap->getLocation()); |
412 | LValue SrcLV = |
413 | MakeNaturalAlignAddrLValue(V: SrcAddrVal, T: CurField->getType()); |
414 | |
415 | // Store the value using the source type pointer. |
416 | EmitStoreThroughLValue(Src: RValue::get(V: CV), Dst: SrcLV); |
417 | |
418 | // Load the value using the destination type pointer. |
419 | CV = EmitLoadOfScalar(lvalue: DstLV, Loc: CurCap->getLocation()); |
420 | } |
421 | CapturedVars.push_back(Elt: CV); |
422 | } else { |
423 | assert(CurCap->capturesVariable() && "Expected capture by reference." ); |
424 | CapturedVars.push_back(Elt: EmitLValue(E: *I).getAddress().emitRawPointer(CGF&: *this)); |
425 | } |
426 | } |
427 | } |
428 | |
429 | static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, |
430 | QualType DstType, StringRef Name, |
431 | LValue AddrLV) { |
432 | ASTContext &Ctx = CGF.getContext(); |
433 | |
434 | llvm::Value *CastedPtr = CGF.EmitScalarConversion( |
435 | Src: AddrLV.getAddress().emitRawPointer(CGF), SrcTy: Ctx.getUIntPtrType(), |
436 | DstTy: Ctx.getPointerType(T: DstType), Loc); |
437 | // FIXME: should the pointee type (DstType) be passed? |
438 | Address TmpAddr = |
439 | CGF.MakeNaturalAlignAddrLValue(V: CastedPtr, T: DstType).getAddress(); |
440 | return TmpAddr; |
441 | } |
442 | |
443 | static QualType getCanonicalParamType(ASTContext &C, QualType T) { |
444 | if (T->isLValueReferenceType()) |
445 | return C.getLValueReferenceType( |
446 | T: getCanonicalParamType(C, T: T.getNonReferenceType()), |
447 | /*SpelledAsLValue=*/false); |
448 | if (T->isPointerType()) |
449 | return C.getPointerType(T: getCanonicalParamType(C, T: T->getPointeeType())); |
450 | if (const ArrayType *A = T->getAsArrayTypeUnsafe()) { |
451 | if (const auto *VLA = dyn_cast<VariableArrayType>(Val: A)) |
452 | return getCanonicalParamType(C, T: VLA->getElementType()); |
453 | if (!A->isVariablyModifiedType()) |
454 | return C.getCanonicalType(T); |
455 | } |
456 | return C.getCanonicalParamType(T); |
457 | } |
458 | |
459 | namespace { |
460 | /// Contains required data for proper outlined function codegen. |
461 | struct FunctionOptions { |
462 | /// Captured statement for which the function is generated. |
463 | const CapturedStmt *S = nullptr; |
464 | /// true if cast to/from UIntPtr is required for variables captured by |
465 | /// value. |
466 | const bool UIntPtrCastRequired = true; |
467 | /// true if only casted arguments must be registered as local args or VLA |
468 | /// sizes. |
469 | const bool RegisterCastedArgsOnly = false; |
470 | /// Name of the generated function. |
471 | const StringRef FunctionName; |
472 | /// Location of the non-debug version of the outlined function. |
473 | SourceLocation Loc; |
474 | explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, |
475 | bool RegisterCastedArgsOnly, StringRef FunctionName, |
476 | SourceLocation Loc) |
477 | : S(S), UIntPtrCastRequired(UIntPtrCastRequired), |
478 | RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), |
479 | FunctionName(FunctionName), Loc(Loc) {} |
480 | }; |
481 | } // namespace |
482 | |
483 | static llvm::Function *emitOutlinedFunctionPrologue( |
484 | CodeGenFunction &CGF, FunctionArgList &Args, |
485 | llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> |
486 | &LocalAddrs, |
487 | llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> |
488 | &VLASizes, |
489 | llvm::Value *&CXXThisValue, const FunctionOptions &FO) { |
490 | const CapturedDecl *CD = FO.S->getCapturedDecl(); |
491 | const RecordDecl *RD = FO.S->getCapturedRecordDecl(); |
492 | assert(CD->hasBody() && "missing CapturedDecl body" ); |
493 | |
494 | CXXThisValue = nullptr; |
495 | // Build the argument list. |
496 | CodeGenModule &CGM = CGF.CGM; |
497 | ASTContext &Ctx = CGM.getContext(); |
498 | FunctionArgList TargetArgs; |
499 | Args.append(in_start: CD->param_begin(), |
500 | in_end: std::next(x: CD->param_begin(), n: CD->getContextParamPosition())); |
501 | TargetArgs.append( |
502 | in_start: CD->param_begin(), |
503 | in_end: std::next(x: CD->param_begin(), n: CD->getContextParamPosition())); |
504 | auto I = FO.S->captures().begin(); |
505 | FunctionDecl *DebugFunctionDecl = nullptr; |
506 | if (!FO.UIntPtrCastRequired) { |
507 | FunctionProtoType::ExtProtoInfo EPI; |
508 | QualType FunctionTy = Ctx.getFunctionType(ResultTy: Ctx.VoidTy, Args: {}, EPI); |
509 | DebugFunctionDecl = FunctionDecl::Create( |
510 | C&: Ctx, DC: Ctx.getTranslationUnitDecl(), StartLoc: FO.S->getBeginLoc(), |
511 | NLoc: SourceLocation(), N: DeclarationName(), T: FunctionTy, |
512 | TInfo: Ctx.getTrivialTypeSourceInfo(T: FunctionTy), SC: SC_Static, |
513 | /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false, |
514 | /*hasWrittenPrototype=*/false); |
515 | } |
516 | for (const FieldDecl *FD : RD->fields()) { |
517 | QualType ArgType = FD->getType(); |
518 | IdentifierInfo *II = nullptr; |
519 | VarDecl *CapVar = nullptr; |
520 | |
521 | // If this is a capture by copy and the type is not a pointer, the outlined |
522 | // function argument type should be uintptr and the value properly casted to |
523 | // uintptr. This is necessary given that the runtime library is only able to |
524 | // deal with pointers. We can pass in the same way the VLA type sizes to the |
525 | // outlined function. |
526 | if (FO.UIntPtrCastRequired && |
527 | ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || |
528 | I->capturesVariableArrayType())) |
529 | ArgType = Ctx.getUIntPtrType(); |
530 | |
531 | if (I->capturesVariable() || I->capturesVariableByCopy()) { |
532 | CapVar = I->getCapturedVar(); |
533 | II = CapVar->getIdentifier(); |
534 | } else if (I->capturesThis()) { |
535 | II = &Ctx.Idents.get(Name: "this" ); |
536 | } else { |
537 | assert(I->capturesVariableArrayType()); |
538 | II = &Ctx.Idents.get(Name: "vla" ); |
539 | } |
540 | if (ArgType->isVariablyModifiedType()) |
541 | ArgType = getCanonicalParamType(C&: Ctx, T: ArgType); |
542 | VarDecl *Arg; |
543 | if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) { |
544 | Arg = ImplicitParamDecl::Create(C&: Ctx, /*DC=*/nullptr, IdLoc: FD->getLocation(), |
545 | Id: II, T: ArgType, |
546 | ParamKind: ImplicitParamKind::ThreadPrivateVar); |
547 | } else if (DebugFunctionDecl && (CapVar || I->capturesThis())) { |
548 | Arg = ParmVarDecl::Create( |
549 | C&: Ctx, DC: DebugFunctionDecl, |
550 | StartLoc: CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(), |
551 | IdLoc: CapVar ? CapVar->getLocation() : FD->getLocation(), Id: II, T: ArgType, |
552 | /*TInfo=*/nullptr, S: SC_None, /*DefArg=*/nullptr); |
553 | } else { |
554 | Arg = ImplicitParamDecl::Create(C&: Ctx, /*DC=*/nullptr, IdLoc: FD->getLocation(), |
555 | Id: II, T: ArgType, ParamKind: ImplicitParamKind::Other); |
556 | } |
557 | Args.emplace_back(Args&: Arg); |
558 | // Do not cast arguments if we emit function with non-original types. |
559 | TargetArgs.emplace_back( |
560 | Args: FO.UIntPtrCastRequired |
561 | ? Arg |
562 | : CGM.getOpenMPRuntime().translateParameter(FD, NativeParam: Arg)); |
563 | ++I; |
564 | } |
565 | Args.append(in_start: std::next(x: CD->param_begin(), n: CD->getContextParamPosition() + 1), |
566 | in_end: CD->param_end()); |
567 | TargetArgs.append( |
568 | in_start: std::next(x: CD->param_begin(), n: CD->getContextParamPosition() + 1), |
569 | in_end: CD->param_end()); |
570 | |
571 | // Create the function declaration. |
572 | const CGFunctionInfo &FuncInfo = |
573 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: Ctx.VoidTy, args: TargetArgs); |
574 | llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(Info: FuncInfo); |
575 | |
576 | auto *F = |
577 | llvm::Function::Create(Ty: FuncLLVMTy, Linkage: llvm::GlobalValue::InternalLinkage, |
578 | N: FO.FunctionName, M: &CGM.getModule()); |
579 | CGM.SetInternalFunctionAttributes(GD: CD, F, FI: FuncInfo); |
580 | if (CD->isNothrow()) |
581 | F->setDoesNotThrow(); |
582 | F->setDoesNotRecurse(); |
583 | |
584 | // Always inline the outlined function if optimizations are enabled. |
585 | if (CGM.getCodeGenOpts().OptimizationLevel != 0) { |
586 | F->removeFnAttr(Kind: llvm::Attribute::NoInline); |
587 | F->addFnAttr(Kind: llvm::Attribute::AlwaysInline); |
588 | } |
589 | |
590 | // Generate the function. |
591 | CGF.StartFunction(GD: CD, RetTy: Ctx.VoidTy, Fn: F, FnInfo: FuncInfo, Args: TargetArgs, |
592 | Loc: FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(), |
593 | StartLoc: FO.UIntPtrCastRequired ? FO.Loc |
594 | : CD->getBody()->getBeginLoc()); |
595 | unsigned Cnt = CD->getContextParamPosition(); |
596 | I = FO.S->captures().begin(); |
597 | for (const FieldDecl *FD : RD->fields()) { |
598 | // Do not map arguments if we emit function with non-original types. |
599 | Address LocalAddr(Address::invalid()); |
600 | if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { |
601 | LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, NativeParam: Args[Cnt], |
602 | TargetParam: TargetArgs[Cnt]); |
603 | } else { |
604 | LocalAddr = CGF.GetAddrOfLocalVar(VD: Args[Cnt]); |
605 | } |
606 | // If we are capturing a pointer by copy we don't need to do anything, just |
607 | // use the value that we get from the arguments. |
608 | if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { |
609 | const VarDecl *CurVD = I->getCapturedVar(); |
610 | if (!FO.RegisterCastedArgsOnly) |
611 | LocalAddrs.insert(KV: {Args[Cnt], {CurVD, LocalAddr}}); |
612 | ++Cnt; |
613 | ++I; |
614 | continue; |
615 | } |
616 | |
617 | LValue ArgLVal = CGF.MakeAddrLValue(Addr: LocalAddr, T: Args[Cnt]->getType(), |
618 | Source: AlignmentSource::Decl); |
619 | if (FD->hasCapturedVLAType()) { |
620 | if (FO.UIntPtrCastRequired) { |
621 | ArgLVal = CGF.MakeAddrLValue( |
622 | Addr: castValueFromUintptr(CGF, Loc: I->getLocation(), DstType: FD->getType(), |
623 | Name: Args[Cnt]->getName(), AddrLV: ArgLVal), |
624 | T: FD->getType(), Source: AlignmentSource::Decl); |
625 | } |
626 | llvm::Value *ExprArg = CGF.EmitLoadOfScalar(lvalue: ArgLVal, Loc: I->getLocation()); |
627 | const VariableArrayType *VAT = FD->getCapturedVLAType(); |
628 | VLASizes.try_emplace(Key: Args[Cnt], Args: VAT->getSizeExpr(), Args&: ExprArg); |
629 | } else if (I->capturesVariable()) { |
630 | const VarDecl *Var = I->getCapturedVar(); |
631 | QualType VarTy = Var->getType(); |
632 | Address ArgAddr = ArgLVal.getAddress(); |
633 | if (ArgLVal.getType()->isLValueReferenceType()) { |
634 | ArgAddr = CGF.EmitLoadOfReference(RefLVal: ArgLVal); |
635 | } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { |
636 | assert(ArgLVal.getType()->isPointerType()); |
637 | ArgAddr = CGF.EmitLoadOfPointer( |
638 | Ptr: ArgAddr, PtrTy: ArgLVal.getType()->castAs<PointerType>()); |
639 | } |
640 | if (!FO.RegisterCastedArgsOnly) { |
641 | LocalAddrs.insert( |
642 | KV: {Args[Cnt], {Var, ArgAddr.withAlignment(NewAlignment: Ctx.getDeclAlign(D: Var))}}); |
643 | } |
644 | } else if (I->capturesVariableByCopy()) { |
645 | assert(!FD->getType()->isAnyPointerType() && |
646 | "Not expecting a captured pointer." ); |
647 | const VarDecl *Var = I->getCapturedVar(); |
648 | LocalAddrs.insert(KV: {Args[Cnt], |
649 | {Var, FO.UIntPtrCastRequired |
650 | ? castValueFromUintptr( |
651 | CGF, Loc: I->getLocation(), DstType: FD->getType(), |
652 | Name: Args[Cnt]->getName(), AddrLV: ArgLVal) |
653 | : ArgLVal.getAddress()}}); |
654 | } else { |
655 | // If 'this' is captured, load it into CXXThisValue. |
656 | assert(I->capturesThis()); |
657 | CXXThisValue = CGF.EmitLoadOfScalar(lvalue: ArgLVal, Loc: I->getLocation()); |
658 | LocalAddrs.insert(KV: {Args[Cnt], {nullptr, ArgLVal.getAddress()}}); |
659 | } |
660 | ++Cnt; |
661 | ++I; |
662 | } |
663 | |
664 | return F; |
665 | } |
666 | |
667 | llvm::Function * |
668 | CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, |
669 | SourceLocation Loc) { |
670 | assert( |
671 | CapturedStmtInfo && |
672 | "CapturedStmtInfo should be set when generating the captured function" ); |
673 | const CapturedDecl *CD = S.getCapturedDecl(); |
674 | // Build the argument list. |
675 | bool NeedWrapperFunction = |
676 | getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo(); |
677 | FunctionArgList Args, WrapperArgs; |
678 | llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs, |
679 | WrapperLocalAddrs; |
680 | llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes, |
681 | WrapperVLASizes; |
682 | SmallString<256> Buffer; |
683 | llvm::raw_svector_ostream Out(Buffer); |
684 | Out << CapturedStmtInfo->getHelperName(); |
685 | |
686 | CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); |
687 | llvm::Function *WrapperF = nullptr; |
688 | if (NeedWrapperFunction) { |
689 | // Emit the final kernel early to allow attributes to be added by the |
690 | // OpenMPI-IR-Builder. |
691 | FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, |
692 | /*RegisterCastedArgsOnly=*/true, |
693 | CapturedStmtInfo->getHelperName(), Loc); |
694 | WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; |
695 | WrapperF = |
696 | emitOutlinedFunctionPrologue(CGF&: WrapperCGF, Args, LocalAddrs, VLASizes, |
697 | CXXThisValue&: WrapperCGF.CXXThisValue, FO: WrapperFO); |
698 | Out << "_debug__" ; |
699 | } |
700 | FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, |
701 | Out.str(), Loc); |
702 | llvm::Function *F = emitOutlinedFunctionPrologue( |
703 | CGF&: *this, Args&: WrapperArgs, LocalAddrs&: WrapperLocalAddrs, VLASizes&: WrapperVLASizes, CXXThisValue, FO); |
704 | CodeGenFunction::OMPPrivateScope LocalScope(*this); |
705 | for (const auto &LocalAddrPair : WrapperLocalAddrs) { |
706 | if (LocalAddrPair.second.first) { |
707 | LocalScope.addPrivate(LocalVD: LocalAddrPair.second.first, |
708 | Addr: LocalAddrPair.second.second); |
709 | } |
710 | } |
711 | (void)LocalScope.Privatize(); |
712 | for (const auto &VLASizePair : WrapperVLASizes) |
713 | VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; |
714 | PGO->assignRegionCounters(GD: GlobalDecl(CD), Fn: F); |
715 | CapturedStmtInfo->EmitBody(CGF&: *this, S: CD->getBody()); |
716 | LocalScope.ForceCleanup(); |
717 | FinishFunction(EndLoc: CD->getBodyRBrace()); |
718 | if (!NeedWrapperFunction) |
719 | return F; |
720 | |
721 | // Reverse the order. |
722 | WrapperF->removeFromParent(); |
723 | F->getParent()->getFunctionList().insertAfter(where: F->getIterator(), New: WrapperF); |
724 | |
725 | llvm::SmallVector<llvm::Value *, 4> CallArgs; |
726 | auto *PI = F->arg_begin(); |
727 | for (const auto *Arg : Args) { |
728 | llvm::Value *CallArg; |
729 | auto I = LocalAddrs.find(Key: Arg); |
730 | if (I != LocalAddrs.end()) { |
731 | LValue LV = WrapperCGF.MakeAddrLValue( |
732 | Addr: I->second.second, |
733 | T: I->second.first ? I->second.first->getType() : Arg->getType(), |
734 | Source: AlignmentSource::Decl); |
735 | if (LV.getType()->isAnyComplexType()) |
736 | LV.setAddress(LV.getAddress().withElementType(ElemTy: PI->getType())); |
737 | CallArg = WrapperCGF.EmitLoadOfScalar(lvalue: LV, Loc: S.getBeginLoc()); |
738 | } else { |
739 | auto EI = VLASizes.find(Val: Arg); |
740 | if (EI != VLASizes.end()) { |
741 | CallArg = EI->second.second; |
742 | } else { |
743 | LValue LV = |
744 | WrapperCGF.MakeAddrLValue(Addr: WrapperCGF.GetAddrOfLocalVar(VD: Arg), |
745 | T: Arg->getType(), Source: AlignmentSource::Decl); |
746 | CallArg = WrapperCGF.EmitLoadOfScalar(lvalue: LV, Loc: S.getBeginLoc()); |
747 | } |
748 | } |
749 | CallArgs.emplace_back(Args: WrapperCGF.EmitFromMemory(Value: CallArg, Ty: Arg->getType())); |
750 | ++PI; |
751 | } |
752 | CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF&: WrapperCGF, Loc, OutlinedFn: F, Args: CallArgs); |
753 | WrapperCGF.FinishFunction(); |
754 | return WrapperF; |
755 | } |
756 | |
757 | //===----------------------------------------------------------------------===// |
758 | // OpenMP Directive Emission |
759 | //===----------------------------------------------------------------------===// |
760 | void CodeGenFunction::EmitOMPAggregateAssign( |
761 | Address DestAddr, Address SrcAddr, QualType OriginalType, |
762 | const llvm::function_ref<void(Address, Address)> CopyGen) { |
763 | // Perform element-by-element initialization. |
764 | QualType ElementTy; |
765 | |
766 | // Drill down to the base element type on both arrays. |
767 | const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe(); |
768 | llvm::Value *NumElements = emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: DestAddr); |
769 | SrcAddr = SrcAddr.withElementType(ElemTy: DestAddr.getElementType()); |
770 | |
771 | llvm::Value *SrcBegin = SrcAddr.emitRawPointer(CGF&: *this); |
772 | llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF&: *this); |
773 | // Cast from pointer to array type to pointer to single element. |
774 | llvm::Value *DestEnd = Builder.CreateInBoundsGEP(Ty: DestAddr.getElementType(), |
775 | Ptr: DestBegin, IdxList: NumElements); |
776 | |
777 | // The basic structure here is a while-do loop. |
778 | llvm::BasicBlock *BodyBB = createBasicBlock(name: "omp.arraycpy.body" ); |
779 | llvm::BasicBlock *DoneBB = createBasicBlock(name: "omp.arraycpy.done" ); |
780 | llvm::Value *IsEmpty = |
781 | Builder.CreateICmpEQ(LHS: DestBegin, RHS: DestEnd, Name: "omp.arraycpy.isempty" ); |
782 | Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB); |
783 | |
784 | // Enter the loop body, making that address the current address. |
785 | llvm::BasicBlock *EntryBB = Builder.GetInsertBlock(); |
786 | EmitBlock(BB: BodyBB); |
787 | |
788 | CharUnits ElementSize = getContext().getTypeSizeInChars(T: ElementTy); |
789 | |
790 | llvm::PHINode *SrcElementPHI = |
791 | Builder.CreatePHI(Ty: SrcBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.srcElementPast" ); |
792 | SrcElementPHI->addIncoming(V: SrcBegin, BB: EntryBB); |
793 | Address SrcElementCurrent = |
794 | Address(SrcElementPHI, SrcAddr.getElementType(), |
795 | SrcAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize)); |
796 | |
797 | llvm::PHINode *DestElementPHI = Builder.CreatePHI( |
798 | Ty: DestBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast" ); |
799 | DestElementPHI->addIncoming(V: DestBegin, BB: EntryBB); |
800 | Address DestElementCurrent = |
801 | Address(DestElementPHI, DestAddr.getElementType(), |
802 | DestAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize)); |
803 | |
804 | // Emit copy. |
805 | CopyGen(DestElementCurrent, SrcElementCurrent); |
806 | |
807 | // Shift the address forward by one element. |
808 | llvm::Value *DestElementNext = |
809 | Builder.CreateConstGEP1_32(Ty: DestAddr.getElementType(), Ptr: DestElementPHI, |
810 | /*Idx0=*/1, Name: "omp.arraycpy.dest.element" ); |
811 | llvm::Value *SrcElementNext = |
812 | Builder.CreateConstGEP1_32(Ty: SrcAddr.getElementType(), Ptr: SrcElementPHI, |
813 | /*Idx0=*/1, Name: "omp.arraycpy.src.element" ); |
814 | // Check whether we've reached the end. |
815 | llvm::Value *Done = |
816 | Builder.CreateICmpEQ(LHS: DestElementNext, RHS: DestEnd, Name: "omp.arraycpy.done" ); |
817 | Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB); |
818 | DestElementPHI->addIncoming(V: DestElementNext, BB: Builder.GetInsertBlock()); |
819 | SrcElementPHI->addIncoming(V: SrcElementNext, BB: Builder.GetInsertBlock()); |
820 | |
821 | // Done. |
822 | EmitBlock(BB: DoneBB, /*IsFinished=*/true); |
823 | } |
824 | |
825 | void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, |
826 | Address SrcAddr, const VarDecl *DestVD, |
827 | const VarDecl *SrcVD, const Expr *Copy) { |
828 | if (OriginalType->isArrayType()) { |
829 | const auto *BO = dyn_cast<BinaryOperator>(Val: Copy); |
830 | if (BO && BO->getOpcode() == BO_Assign) { |
831 | // Perform simple memcpy for simple copying. |
832 | LValue Dest = MakeAddrLValue(Addr: DestAddr, T: OriginalType); |
833 | LValue Src = MakeAddrLValue(Addr: SrcAddr, T: OriginalType); |
834 | EmitAggregateAssign(Dest, Src, EltTy: OriginalType); |
835 | } else { |
836 | // For arrays with complex element types perform element by element |
837 | // copying. |
838 | EmitOMPAggregateAssign( |
839 | DestAddr, SrcAddr, OriginalType, |
840 | CopyGen: [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { |
841 | // Working with the single array element, so have to remap |
842 | // destination and source variables to corresponding array |
843 | // elements. |
844 | CodeGenFunction::OMPPrivateScope Remap(*this); |
845 | Remap.addPrivate(LocalVD: DestVD, Addr: DestElement); |
846 | Remap.addPrivate(LocalVD: SrcVD, Addr: SrcElement); |
847 | (void)Remap.Privatize(); |
848 | EmitIgnoredExpr(E: Copy); |
849 | }); |
850 | } |
851 | } else { |
852 | // Remap pseudo source variable to private copy. |
853 | CodeGenFunction::OMPPrivateScope Remap(*this); |
854 | Remap.addPrivate(LocalVD: SrcVD, Addr: SrcAddr); |
855 | Remap.addPrivate(LocalVD: DestVD, Addr: DestAddr); |
856 | (void)Remap.Privatize(); |
857 | // Emit copying of the whole variable. |
858 | EmitIgnoredExpr(E: Copy); |
859 | } |
860 | } |
861 | |
862 | bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, |
863 | OMPPrivateScope &PrivateScope) { |
864 | if (!HaveInsertPoint()) |
865 | return false; |
866 | OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D); |
867 | bool DeviceConstTarget = getLangOpts().OpenMPIsTargetDevice && |
868 | isOpenMPTargetExecutionDirective(DKind: EKind); |
869 | bool FirstprivateIsLastprivate = false; |
870 | llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates; |
871 | for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { |
872 | for (const auto *D : C->varlist()) |
873 | Lastprivates.try_emplace( |
874 | Key: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D)->getDecl())->getCanonicalDecl(), |
875 | Args: C->getKind()); |
876 | } |
877 | llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; |
878 | llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; |
879 | getOpenMPCaptureRegions(CaptureRegions, DKind: EKind); |
880 | // Force emission of the firstprivate copy if the directive does not emit |
881 | // outlined function, like omp for, omp simd, omp distribute etc. |
882 | bool MustEmitFirstprivateCopy = |
883 | CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; |
884 | for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { |
885 | const auto *IRef = C->varlist_begin(); |
886 | const auto *InitsRef = C->inits().begin(); |
887 | for (const Expr *IInit : C->private_copies()) { |
888 | const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl()); |
889 | bool ThisFirstprivateIsLastprivate = |
890 | Lastprivates.count(Val: OrigVD->getCanonicalDecl()) > 0; |
891 | const FieldDecl *FD = CapturedStmtInfo->lookup(VD: OrigVD); |
892 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl()); |
893 | if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && |
894 | !FD->getType()->isReferenceType() && |
895 | (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { |
896 | EmittedAsFirstprivate.insert(V: OrigVD->getCanonicalDecl()); |
897 | ++IRef; |
898 | ++InitsRef; |
899 | continue; |
900 | } |
901 | // Do not emit copy for firstprivate constant variables in target regions, |
902 | // captured by reference. |
903 | if (DeviceConstTarget && OrigVD->getType().isConstant(Ctx: getContext()) && |
904 | FD && FD->getType()->isReferenceType() && |
905 | (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { |
906 | EmittedAsFirstprivate.insert(V: OrigVD->getCanonicalDecl()); |
907 | ++IRef; |
908 | ++InitsRef; |
909 | continue; |
910 | } |
911 | FirstprivateIsLastprivate = |
912 | FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; |
913 | if (EmittedAsFirstprivate.insert(V: OrigVD->getCanonicalDecl()).second) { |
914 | const auto *VDInit = |
915 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *InitsRef)->getDecl()); |
916 | bool IsRegistered; |
917 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
918 | /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, |
919 | (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); |
920 | LValue OriginalLVal; |
921 | if (!FD) { |
922 | // Check if the firstprivate variable is just a constant value. |
923 | ConstantEmission CE = tryEmitAsConstant(RefExpr: &DRE); |
924 | if (CE && !CE.isReference()) { |
925 | // Constant value, no need to create a copy. |
926 | ++IRef; |
927 | ++InitsRef; |
928 | continue; |
929 | } |
930 | if (CE && CE.isReference()) { |
931 | OriginalLVal = CE.getReferenceLValue(CGF&: *this, RefExpr: &DRE); |
932 | } else { |
933 | assert(!CE && "Expected non-constant firstprivate." ); |
934 | OriginalLVal = EmitLValue(E: &DRE); |
935 | } |
936 | } else { |
937 | OriginalLVal = EmitLValue(E: &DRE); |
938 | } |
939 | QualType Type = VD->getType(); |
940 | if (Type->isArrayType()) { |
941 | // Emit VarDecl with copy init for arrays. |
942 | // Get the address of the original variable captured in current |
943 | // captured region. |
944 | AutoVarEmission Emission = EmitAutoVarAlloca(var: *VD); |
945 | const Expr *Init = VD->getInit(); |
946 | if (!isa<CXXConstructExpr>(Val: Init) || isTrivialInitializer(Init)) { |
947 | // Perform simple memcpy. |
948 | LValue Dest = MakeAddrLValue(Addr: Emission.getAllocatedAddress(), T: Type); |
949 | EmitAggregateAssign(Dest, Src: OriginalLVal, EltTy: Type); |
950 | } else { |
951 | EmitOMPAggregateAssign( |
952 | DestAddr: Emission.getAllocatedAddress(), SrcAddr: OriginalLVal.getAddress(), OriginalType: Type, |
953 | CopyGen: [this, VDInit, Init](Address DestElement, Address SrcElement) { |
954 | // Clean up any temporaries needed by the |
955 | // initialization. |
956 | RunCleanupsScope InitScope(*this); |
957 | // Emit initialization for single element. |
958 | setAddrOfLocalVar(VD: VDInit, Addr: SrcElement); |
959 | EmitAnyExprToMem(E: Init, Location: DestElement, |
960 | Quals: Init->getType().getQualifiers(), |
961 | /*IsInitializer*/ false); |
962 | LocalDeclMap.erase(Val: VDInit); |
963 | }); |
964 | } |
965 | EmitAutoVarCleanups(emission: Emission); |
966 | IsRegistered = |
967 | PrivateScope.addPrivate(LocalVD: OrigVD, Addr: Emission.getAllocatedAddress()); |
968 | } else { |
969 | Address OriginalAddr = OriginalLVal.getAddress(); |
970 | // Emit private VarDecl with copy init. |
971 | // Remap temp VDInit variable to the address of the original |
972 | // variable (for proper handling of captured global variables). |
973 | setAddrOfLocalVar(VD: VDInit, Addr: OriginalAddr); |
974 | EmitDecl(D: *VD); |
975 | LocalDeclMap.erase(Val: VDInit); |
976 | Address VDAddr = GetAddrOfLocalVar(VD); |
977 | if (ThisFirstprivateIsLastprivate && |
978 | Lastprivates[OrigVD->getCanonicalDecl()] == |
979 | OMPC_LASTPRIVATE_conditional) { |
980 | // Create/init special variable for lastprivate conditionals. |
981 | llvm::Value *V = |
982 | EmitLoadOfScalar(lvalue: MakeAddrLValue(Addr: VDAddr, T: (*IRef)->getType(), |
983 | Source: AlignmentSource::Decl), |
984 | Loc: (*IRef)->getExprLoc()); |
985 | VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit( |
986 | CGF&: *this, VD: OrigVD); |
987 | EmitStoreOfScalar(value: V, lvalue: MakeAddrLValue(Addr: VDAddr, T: (*IRef)->getType(), |
988 | Source: AlignmentSource::Decl)); |
989 | LocalDeclMap.erase(Val: VD); |
990 | setAddrOfLocalVar(VD, Addr: VDAddr); |
991 | } |
992 | IsRegistered = PrivateScope.addPrivate(LocalVD: OrigVD, Addr: VDAddr); |
993 | } |
994 | assert(IsRegistered && |
995 | "firstprivate var already registered as private" ); |
996 | // Silence the warning about unused variable. |
997 | (void)IsRegistered; |
998 | } |
999 | ++IRef; |
1000 | ++InitsRef; |
1001 | } |
1002 | } |
1003 | return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); |
1004 | } |
1005 | |
1006 | void CodeGenFunction::EmitOMPPrivateClause( |
1007 | const OMPExecutableDirective &D, |
1008 | CodeGenFunction::OMPPrivateScope &PrivateScope) { |
1009 | if (!HaveInsertPoint()) |
1010 | return; |
1011 | llvm::DenseSet<const VarDecl *> EmittedAsPrivate; |
1012 | for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { |
1013 | auto IRef = C->varlist_begin(); |
1014 | for (const Expr *IInit : C->private_copies()) { |
1015 | const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl()); |
1016 | if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) { |
1017 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl()); |
1018 | EmitDecl(D: *VD); |
1019 | // Emit private VarDecl with copy init. |
1020 | bool IsRegistered = |
1021 | PrivateScope.addPrivate(LocalVD: OrigVD, Addr: GetAddrOfLocalVar(VD)); |
1022 | assert(IsRegistered && "private var already registered as private" ); |
1023 | // Silence the warning about unused variable. |
1024 | (void)IsRegistered; |
1025 | } |
1026 | ++IRef; |
1027 | } |
1028 | } |
1029 | } |
1030 | |
1031 | bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { |
1032 | if (!HaveInsertPoint()) |
1033 | return false; |
1034 | // threadprivate_var1 = master_threadprivate_var1; |
1035 | // operator=(threadprivate_var2, master_threadprivate_var2); |
1036 | // ... |
1037 | // __kmpc_barrier(&loc, global_tid); |
1038 | llvm::DenseSet<const VarDecl *> CopiedVars; |
1039 | llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; |
1040 | for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { |
1041 | auto IRef = C->varlist_begin(); |
1042 | auto ISrcRef = C->source_exprs().begin(); |
1043 | auto IDestRef = C->destination_exprs().begin(); |
1044 | for (const Expr *AssignOp : C->assignment_ops()) { |
1045 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl()); |
1046 | QualType Type = VD->getType(); |
1047 | if (CopiedVars.insert(V: VD->getCanonicalDecl()).second) { |
1048 | // Get the address of the master variable. If we are emitting code with |
1049 | // TLS support, the address is passed from the master as field in the |
1050 | // captured declaration. |
1051 | Address MasterAddr = Address::invalid(); |
1052 | if (getLangOpts().OpenMPUseTLS && |
1053 | getContext().getTargetInfo().isTLSSupported()) { |
1054 | assert(CapturedStmtInfo->lookup(VD) && |
1055 | "Copyin threadprivates should have been captured!" ); |
1056 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true, |
1057 | (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); |
1058 | MasterAddr = EmitLValue(E: &DRE).getAddress(); |
1059 | LocalDeclMap.erase(Val: VD); |
1060 | } else { |
1061 | MasterAddr = |
1062 | Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(D: VD) |
1063 | : CGM.GetAddrOfGlobal(GD: VD), |
1064 | CGM.getTypes().ConvertTypeForMem(T: VD->getType()), |
1065 | getContext().getDeclAlign(D: VD)); |
1066 | } |
1067 | // Get the address of the threadprivate variable. |
1068 | Address PrivateAddr = EmitLValue(E: *IRef).getAddress(); |
1069 | if (CopiedVars.size() == 1) { |
1070 | // At first check if current thread is a master thread. If it is, no |
1071 | // need to copy data. |
1072 | CopyBegin = createBasicBlock(name: "copyin.not.master" ); |
1073 | CopyEnd = createBasicBlock(name: "copyin.not.master.end" ); |
1074 | // TODO: Avoid ptrtoint conversion. |
1075 | auto *MasterAddrInt = Builder.CreatePtrToInt( |
1076 | V: MasterAddr.emitRawPointer(CGF&: *this), DestTy: CGM.IntPtrTy); |
1077 | auto *PrivateAddrInt = Builder.CreatePtrToInt( |
1078 | V: PrivateAddr.emitRawPointer(CGF&: *this), DestTy: CGM.IntPtrTy); |
1079 | Builder.CreateCondBr( |
1080 | Cond: Builder.CreateICmpNE(LHS: MasterAddrInt, RHS: PrivateAddrInt), True: CopyBegin, |
1081 | False: CopyEnd); |
1082 | EmitBlock(BB: CopyBegin); |
1083 | } |
1084 | const auto *SrcVD = |
1085 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ISrcRef)->getDecl()); |
1086 | const auto *DestVD = |
1087 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl()); |
1088 | EmitOMPCopy(OriginalType: Type, DestAddr: PrivateAddr, SrcAddr: MasterAddr, DestVD, SrcVD, Copy: AssignOp); |
1089 | } |
1090 | ++IRef; |
1091 | ++ISrcRef; |
1092 | ++IDestRef; |
1093 | } |
1094 | } |
1095 | if (CopyEnd) { |
1096 | // Exit out of copying procedure for non-master thread. |
1097 | EmitBlock(BB: CopyEnd, /*IsFinished=*/true); |
1098 | return true; |
1099 | } |
1100 | return false; |
1101 | } |
1102 | |
1103 | bool CodeGenFunction::EmitOMPLastprivateClauseInit( |
1104 | const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { |
1105 | if (!HaveInsertPoint()) |
1106 | return false; |
1107 | bool HasAtLeastOneLastprivate = false; |
1108 | OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D); |
1109 | llvm::DenseSet<const VarDecl *> SIMDLCVs; |
1110 | if (isOpenMPSimdDirective(DKind: EKind)) { |
1111 | const auto *LoopDirective = cast<OMPLoopDirective>(Val: &D); |
1112 | for (const Expr *C : LoopDirective->counters()) { |
1113 | SIMDLCVs.insert( |
1114 | V: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: C)->getDecl())->getCanonicalDecl()); |
1115 | } |
1116 | } |
1117 | llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; |
1118 | for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { |
1119 | HasAtLeastOneLastprivate = true; |
1120 | if (isOpenMPTaskLoopDirective(DKind: EKind) && !getLangOpts().OpenMPSimd) |
1121 | break; |
1122 | const auto *IRef = C->varlist_begin(); |
1123 | const auto *IDestRef = C->destination_exprs().begin(); |
1124 | for (const Expr *IInit : C->private_copies()) { |
1125 | // Keep the address of the original variable for future update at the end |
1126 | // of the loop. |
1127 | const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl()); |
1128 | // Taskloops do not require additional initialization, it is done in |
1129 | // runtime support library. |
1130 | if (AlreadyEmittedVars.insert(V: OrigVD->getCanonicalDecl()).second) { |
1131 | const auto *DestVD = |
1132 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl()); |
1133 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
1134 | /*RefersToEnclosingVariableOrCapture=*/ |
1135 | CapturedStmtInfo->lookup(VD: OrigVD) != nullptr, |
1136 | (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); |
1137 | PrivateScope.addPrivate(LocalVD: DestVD, Addr: EmitLValue(E: &DRE).getAddress()); |
1138 | // Check if the variable is also a firstprivate: in this case IInit is |
1139 | // not generated. Initialization of this variable will happen in codegen |
1140 | // for 'firstprivate' clause. |
1141 | if (IInit && !SIMDLCVs.count(V: OrigVD->getCanonicalDecl())) { |
1142 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl()); |
1143 | Address VDAddr = Address::invalid(); |
1144 | if (C->getKind() == OMPC_LASTPRIVATE_conditional) { |
1145 | VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit( |
1146 | CGF&: *this, VD: OrigVD); |
1147 | setAddrOfLocalVar(VD, Addr: VDAddr); |
1148 | } else { |
1149 | // Emit private VarDecl with copy init. |
1150 | EmitDecl(D: *VD); |
1151 | VDAddr = GetAddrOfLocalVar(VD); |
1152 | } |
1153 | bool IsRegistered = PrivateScope.addPrivate(LocalVD: OrigVD, Addr: VDAddr); |
1154 | assert(IsRegistered && |
1155 | "lastprivate var already registered as private" ); |
1156 | (void)IsRegistered; |
1157 | } |
1158 | } |
1159 | ++IRef; |
1160 | ++IDestRef; |
1161 | } |
1162 | } |
1163 | return HasAtLeastOneLastprivate; |
1164 | } |
1165 | |
1166 | void CodeGenFunction::EmitOMPLastprivateClauseFinal( |
1167 | const OMPExecutableDirective &D, bool NoFinals, |
1168 | llvm::Value *IsLastIterCond) { |
1169 | if (!HaveInsertPoint()) |
1170 | return; |
1171 | // Emit following code: |
1172 | // if (<IsLastIterCond>) { |
1173 | // orig_var1 = private_orig_var1; |
1174 | // ... |
1175 | // orig_varn = private_orig_varn; |
1176 | // } |
1177 | llvm::BasicBlock *ThenBB = nullptr; |
1178 | llvm::BasicBlock *DoneBB = nullptr; |
1179 | if (IsLastIterCond) { |
1180 | // Emit implicit barrier if at least one lastprivate conditional is found |
1181 | // and this is not a simd mode. |
1182 | if (!getLangOpts().OpenMPSimd && |
1183 | llvm::any_of(Range: D.getClausesOfKind<OMPLastprivateClause>(), |
1184 | P: [](const OMPLastprivateClause *C) { |
1185 | return C->getKind() == OMPC_LASTPRIVATE_conditional; |
1186 | })) { |
1187 | CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: D.getBeginLoc(), |
1188 | Kind: OMPD_unknown, |
1189 | /*EmitChecks=*/false, |
1190 | /*ForceSimpleCall=*/true); |
1191 | } |
1192 | ThenBB = createBasicBlock(name: ".omp.lastprivate.then" ); |
1193 | DoneBB = createBasicBlock(name: ".omp.lastprivate.done" ); |
1194 | Builder.CreateCondBr(Cond: IsLastIterCond, True: ThenBB, False: DoneBB); |
1195 | EmitBlock(BB: ThenBB); |
1196 | } |
1197 | llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; |
1198 | llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; |
1199 | if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(Val: &D)) { |
1200 | auto IC = LoopDirective->counters().begin(); |
1201 | for (const Expr *F : LoopDirective->finals()) { |
1202 | const auto *D = |
1203 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IC)->getDecl())->getCanonicalDecl(); |
1204 | if (NoFinals) |
1205 | AlreadyEmittedVars.insert(V: D); |
1206 | else |
1207 | LoopCountersAndUpdates[D] = F; |
1208 | ++IC; |
1209 | } |
1210 | } |
1211 | for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { |
1212 | auto IRef = C->varlist_begin(); |
1213 | auto ISrcRef = C->source_exprs().begin(); |
1214 | auto IDestRef = C->destination_exprs().begin(); |
1215 | for (const Expr *AssignOp : C->assignment_ops()) { |
1216 | const auto *PrivateVD = |
1217 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl()); |
1218 | QualType Type = PrivateVD->getType(); |
1219 | const auto *CanonicalVD = PrivateVD->getCanonicalDecl(); |
1220 | if (AlreadyEmittedVars.insert(V: CanonicalVD).second) { |
1221 | // If lastprivate variable is a loop control variable for loop-based |
1222 | // directive, update its value before copyin back to original |
1223 | // variable. |
1224 | if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(Val: CanonicalVD)) |
1225 | EmitIgnoredExpr(E: FinalExpr); |
1226 | const auto *SrcVD = |
1227 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ISrcRef)->getDecl()); |
1228 | const auto *DestVD = |
1229 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl()); |
1230 | // Get the address of the private variable. |
1231 | Address PrivateAddr = GetAddrOfLocalVar(VD: PrivateVD); |
1232 | if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) |
1233 | PrivateAddr = Address( |
1234 | Builder.CreateLoad(Addr: PrivateAddr), |
1235 | CGM.getTypes().ConvertTypeForMem(T: RefTy->getPointeeType()), |
1236 | CGM.getNaturalTypeAlignment(T: RefTy->getPointeeType())); |
1237 | // Store the last value to the private copy in the last iteration. |
1238 | if (C->getKind() == OMPC_LASTPRIVATE_conditional) |
1239 | CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( |
1240 | CGF&: *this, PrivLVal: MakeAddrLValue(Addr: PrivateAddr, T: (*IRef)->getType()), VD: PrivateVD, |
1241 | Loc: (*IRef)->getExprLoc()); |
1242 | // Get the address of the original variable. |
1243 | Address OriginalAddr = GetAddrOfLocalVar(VD: DestVD); |
1244 | EmitOMPCopy(OriginalType: Type, DestAddr: OriginalAddr, SrcAddr: PrivateAddr, DestVD, SrcVD, Copy: AssignOp); |
1245 | } |
1246 | ++IRef; |
1247 | ++ISrcRef; |
1248 | ++IDestRef; |
1249 | } |
1250 | if (const Expr *PostUpdate = C->getPostUpdateExpr()) |
1251 | EmitIgnoredExpr(E: PostUpdate); |
1252 | } |
1253 | if (IsLastIterCond) |
1254 | EmitBlock(BB: DoneBB, /*IsFinished=*/true); |
1255 | } |
1256 | |
1257 | void CodeGenFunction::EmitOMPReductionClauseInit( |
1258 | const OMPExecutableDirective &D, |
1259 | CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) { |
1260 | if (!HaveInsertPoint()) |
1261 | return; |
1262 | SmallVector<const Expr *, 4> Shareds; |
1263 | SmallVector<const Expr *, 4> Privates; |
1264 | SmallVector<const Expr *, 4> ReductionOps; |
1265 | SmallVector<const Expr *, 4> LHSs; |
1266 | SmallVector<const Expr *, 4> RHSs; |
1267 | OMPTaskDataTy Data; |
1268 | SmallVector<const Expr *, 4> TaskLHSs; |
1269 | SmallVector<const Expr *, 4> TaskRHSs; |
1270 | for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { |
1271 | if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan)) |
1272 | continue; |
1273 | Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
1274 | Privates.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
1275 | ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end()); |
1276 | LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end()); |
1277 | RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end()); |
1278 | if (C->getModifier() == OMPC_REDUCTION_task) { |
1279 | Data.ReductionVars.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
1280 | Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
1281 | Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
1282 | Data.ReductionOps.append(in_start: C->reduction_ops().begin(), |
1283 | in_end: C->reduction_ops().end()); |
1284 | TaskLHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end()); |
1285 | TaskRHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end()); |
1286 | } |
1287 | } |
1288 | ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); |
1289 | unsigned Count = 0; |
1290 | auto *ILHS = LHSs.begin(); |
1291 | auto *IRHS = RHSs.begin(); |
1292 | auto *IPriv = Privates.begin(); |
1293 | for (const Expr *IRef : Shareds) { |
1294 | const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IPriv)->getDecl()); |
1295 | // Emit private VarDecl with reduction init. |
1296 | RedCG.emitSharedOrigLValue(CGF&: *this, N: Count); |
1297 | RedCG.emitAggregateType(CGF&: *this, N: Count); |
1298 | AutoVarEmission Emission = EmitAutoVarAlloca(var: *PrivateVD); |
1299 | RedCG.emitInitialization(CGF&: *this, N: Count, PrivateAddr: Emission.getAllocatedAddress(), |
1300 | SharedAddr: RedCG.getSharedLValue(N: Count).getAddress(), |
1301 | DefaultInit: [&Emission](CodeGenFunction &CGF) { |
1302 | CGF.EmitAutoVarInit(emission: Emission); |
1303 | return true; |
1304 | }); |
1305 | EmitAutoVarCleanups(emission: Emission); |
1306 | Address BaseAddr = RedCG.adjustPrivateAddress( |
1307 | CGF&: *this, N: Count, PrivateAddr: Emission.getAllocatedAddress()); |
1308 | bool IsRegistered = |
1309 | PrivateScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Count), Addr: BaseAddr); |
1310 | assert(IsRegistered && "private var already registered as private" ); |
1311 | // Silence the warning about unused variable. |
1312 | (void)IsRegistered; |
1313 | |
1314 | const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl()); |
1315 | const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl()); |
1316 | QualType Type = PrivateVD->getType(); |
1317 | bool isaOMPArraySectionExpr = isa<ArraySectionExpr>(Val: IRef); |
1318 | if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { |
1319 | // Store the address of the original variable associated with the LHS |
1320 | // implicit variable. |
1321 | PrivateScope.addPrivate(LocalVD: LHSVD, Addr: RedCG.getSharedLValue(N: Count).getAddress()); |
1322 | PrivateScope.addPrivate(LocalVD: RHSVD, Addr: GetAddrOfLocalVar(VD: PrivateVD)); |
1323 | } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || |
1324 | isa<ArraySubscriptExpr>(Val: IRef)) { |
1325 | // Store the address of the original variable associated with the LHS |
1326 | // implicit variable. |
1327 | PrivateScope.addPrivate(LocalVD: LHSVD, Addr: RedCG.getSharedLValue(N: Count).getAddress()); |
1328 | PrivateScope.addPrivate(LocalVD: RHSVD, |
1329 | Addr: GetAddrOfLocalVar(VD: PrivateVD).withElementType( |
1330 | ElemTy: ConvertTypeForMem(T: RHSVD->getType()))); |
1331 | } else { |
1332 | QualType Type = PrivateVD->getType(); |
1333 | bool IsArray = getContext().getAsArrayType(T: Type) != nullptr; |
1334 | Address OriginalAddr = RedCG.getSharedLValue(N: Count).getAddress(); |
1335 | // Store the address of the original variable associated with the LHS |
1336 | // implicit variable. |
1337 | if (IsArray) { |
1338 | OriginalAddr = |
1339 | OriginalAddr.withElementType(ElemTy: ConvertTypeForMem(T: LHSVD->getType())); |
1340 | } |
1341 | PrivateScope.addPrivate(LocalVD: LHSVD, Addr: OriginalAddr); |
1342 | PrivateScope.addPrivate( |
1343 | LocalVD: RHSVD, Addr: IsArray ? GetAddrOfLocalVar(VD: PrivateVD).withElementType( |
1344 | ElemTy: ConvertTypeForMem(T: RHSVD->getType())) |
1345 | : GetAddrOfLocalVar(VD: PrivateVD)); |
1346 | } |
1347 | ++ILHS; |
1348 | ++IRHS; |
1349 | ++IPriv; |
1350 | ++Count; |
1351 | } |
1352 | if (!Data.ReductionVars.empty()) { |
1353 | OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D); |
1354 | Data.IsReductionWithTaskMod = true; |
1355 | Data.IsWorksharingReduction = isOpenMPWorksharingDirective(DKind: EKind); |
1356 | llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit( |
1357 | CGF&: *this, Loc: D.getBeginLoc(), LHSExprs: TaskLHSs, RHSExprs: TaskRHSs, Data); |
1358 | const Expr *TaskRedRef = nullptr; |
1359 | switch (EKind) { |
1360 | case OMPD_parallel: |
1361 | TaskRedRef = cast<OMPParallelDirective>(Val: D).getTaskReductionRefExpr(); |
1362 | break; |
1363 | case OMPD_for: |
1364 | TaskRedRef = cast<OMPForDirective>(Val: D).getTaskReductionRefExpr(); |
1365 | break; |
1366 | case OMPD_sections: |
1367 | TaskRedRef = cast<OMPSectionsDirective>(Val: D).getTaskReductionRefExpr(); |
1368 | break; |
1369 | case OMPD_parallel_for: |
1370 | TaskRedRef = cast<OMPParallelForDirective>(Val: D).getTaskReductionRefExpr(); |
1371 | break; |
1372 | case OMPD_parallel_master: |
1373 | TaskRedRef = |
1374 | cast<OMPParallelMasterDirective>(Val: D).getTaskReductionRefExpr(); |
1375 | break; |
1376 | case OMPD_parallel_sections: |
1377 | TaskRedRef = |
1378 | cast<OMPParallelSectionsDirective>(Val: D).getTaskReductionRefExpr(); |
1379 | break; |
1380 | case OMPD_target_parallel: |
1381 | TaskRedRef = |
1382 | cast<OMPTargetParallelDirective>(Val: D).getTaskReductionRefExpr(); |
1383 | break; |
1384 | case OMPD_target_parallel_for: |
1385 | TaskRedRef = |
1386 | cast<OMPTargetParallelForDirective>(Val: D).getTaskReductionRefExpr(); |
1387 | break; |
1388 | case OMPD_distribute_parallel_for: |
1389 | TaskRedRef = |
1390 | cast<OMPDistributeParallelForDirective>(Val: D).getTaskReductionRefExpr(); |
1391 | break; |
1392 | case OMPD_teams_distribute_parallel_for: |
1393 | TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(Val: D) |
1394 | .getTaskReductionRefExpr(); |
1395 | break; |
1396 | case OMPD_target_teams_distribute_parallel_for: |
1397 | TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(Val: D) |
1398 | .getTaskReductionRefExpr(); |
1399 | break; |
1400 | case OMPD_simd: |
1401 | case OMPD_for_simd: |
1402 | case OMPD_section: |
1403 | case OMPD_single: |
1404 | case OMPD_master: |
1405 | case OMPD_critical: |
1406 | case OMPD_parallel_for_simd: |
1407 | case OMPD_task: |
1408 | case OMPD_taskyield: |
1409 | case OMPD_error: |
1410 | case OMPD_barrier: |
1411 | case OMPD_taskwait: |
1412 | case OMPD_taskgroup: |
1413 | case OMPD_flush: |
1414 | case OMPD_depobj: |
1415 | case OMPD_scan: |
1416 | case OMPD_ordered: |
1417 | case OMPD_atomic: |
1418 | case OMPD_teams: |
1419 | case OMPD_target: |
1420 | case OMPD_cancellation_point: |
1421 | case OMPD_cancel: |
1422 | case OMPD_target_data: |
1423 | case OMPD_target_enter_data: |
1424 | case OMPD_target_exit_data: |
1425 | case OMPD_taskloop: |
1426 | case OMPD_taskloop_simd: |
1427 | case OMPD_master_taskloop: |
1428 | case OMPD_master_taskloop_simd: |
1429 | case OMPD_parallel_master_taskloop: |
1430 | case OMPD_parallel_master_taskloop_simd: |
1431 | case OMPD_distribute: |
1432 | case OMPD_target_update: |
1433 | case OMPD_distribute_parallel_for_simd: |
1434 | case OMPD_distribute_simd: |
1435 | case OMPD_target_parallel_for_simd: |
1436 | case OMPD_target_simd: |
1437 | case OMPD_teams_distribute: |
1438 | case OMPD_teams_distribute_simd: |
1439 | case OMPD_teams_distribute_parallel_for_simd: |
1440 | case OMPD_target_teams: |
1441 | case OMPD_target_teams_distribute: |
1442 | case OMPD_target_teams_distribute_parallel_for_simd: |
1443 | case OMPD_target_teams_distribute_simd: |
1444 | case OMPD_declare_target: |
1445 | case OMPD_end_declare_target: |
1446 | case OMPD_threadprivate: |
1447 | case OMPD_allocate: |
1448 | case OMPD_declare_reduction: |
1449 | case OMPD_declare_mapper: |
1450 | case OMPD_declare_simd: |
1451 | case OMPD_requires: |
1452 | case OMPD_declare_variant: |
1453 | case OMPD_begin_declare_variant: |
1454 | case OMPD_end_declare_variant: |
1455 | case OMPD_unknown: |
1456 | default: |
1457 | llvm_unreachable("Unexpected directive with task reductions." ); |
1458 | } |
1459 | |
1460 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: TaskRedRef)->getDecl()); |
1461 | EmitVarDecl(D: *VD); |
1462 | EmitStoreOfScalar(Value: ReductionDesc, Addr: GetAddrOfLocalVar(VD), |
1463 | /*Volatile=*/false, Ty: TaskRedRef->getType()); |
1464 | } |
1465 | } |
1466 | |
1467 | void CodeGenFunction::EmitOMPReductionClauseFinal( |
1468 | const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { |
1469 | if (!HaveInsertPoint()) |
1470 | return; |
1471 | llvm::SmallVector<const Expr *, 8> Privates; |
1472 | llvm::SmallVector<const Expr *, 8> LHSExprs; |
1473 | llvm::SmallVector<const Expr *, 8> RHSExprs; |
1474 | llvm::SmallVector<const Expr *, 8> ReductionOps; |
1475 | llvm::SmallVector<bool, 8> IsPrivateVarReduction; |
1476 | bool HasAtLeastOneReduction = false; |
1477 | bool IsReductionWithTaskMod = false; |
1478 | for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { |
1479 | // Do not emit for inscan reductions. |
1480 | if (C->getModifier() == OMPC_REDUCTION_inscan) |
1481 | continue; |
1482 | HasAtLeastOneReduction = true; |
1483 | Privates.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
1484 | LHSExprs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end()); |
1485 | RHSExprs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end()); |
1486 | IsPrivateVarReduction.append(in_start: C->private_var_reduction_flags().begin(), |
1487 | in_end: C->private_var_reduction_flags().end()); |
1488 | ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end()); |
1489 | IsReductionWithTaskMod = |
1490 | IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task; |
1491 | } |
1492 | if (HasAtLeastOneReduction) { |
1493 | OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D); |
1494 | if (IsReductionWithTaskMod) { |
1495 | CGM.getOpenMPRuntime().emitTaskReductionFini( |
1496 | CGF&: *this, Loc: D.getBeginLoc(), IsWorksharingReduction: isOpenMPWorksharingDirective(DKind: EKind)); |
1497 | } |
1498 | bool TeamsLoopCanBeParallel = false; |
1499 | if (auto *TTLD = dyn_cast<OMPTargetTeamsGenericLoopDirective>(Val: &D)) |
1500 | TeamsLoopCanBeParallel = TTLD->canBeParallelFor(); |
1501 | bool WithNowait = D.getSingleClause<OMPNowaitClause>() || |
1502 | isOpenMPParallelDirective(DKind: EKind) || |
1503 | TeamsLoopCanBeParallel || ReductionKind == OMPD_simd; |
1504 | bool SimpleReduction = ReductionKind == OMPD_simd; |
1505 | // Emit nowait reduction if nowait clause is present or directive is a |
1506 | // parallel directive (it always has implicit barrier). |
1507 | CGM.getOpenMPRuntime().emitReduction( |
1508 | CGF&: *this, Loc: D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps, |
1509 | Options: {.WithNowait: WithNowait, .SimpleReduction: SimpleReduction, .IsPrivateVarReduction: IsPrivateVarReduction, .ReductionKind: ReductionKind}); |
1510 | } |
1511 | } |
1512 | |
1513 | static void emitPostUpdateForReductionClause( |
1514 | CodeGenFunction &CGF, const OMPExecutableDirective &D, |
1515 | const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { |
1516 | if (!CGF.HaveInsertPoint()) |
1517 | return; |
1518 | llvm::BasicBlock *DoneBB = nullptr; |
1519 | for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { |
1520 | if (const Expr *PostUpdate = C->getPostUpdateExpr()) { |
1521 | if (!DoneBB) { |
1522 | if (llvm::Value *Cond = CondGen(CGF)) { |
1523 | // If the first post-update expression is found, emit conditional |
1524 | // block if it was requested. |
1525 | llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: ".omp.reduction.pu" ); |
1526 | DoneBB = CGF.createBasicBlock(name: ".omp.reduction.pu.done" ); |
1527 | CGF.Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB); |
1528 | CGF.EmitBlock(BB: ThenBB); |
1529 | } |
1530 | } |
1531 | CGF.EmitIgnoredExpr(E: PostUpdate); |
1532 | } |
1533 | } |
1534 | if (DoneBB) |
1535 | CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true); |
1536 | } |
1537 | |
1538 | namespace { |
1539 | /// Codegen lambda for appending distribute lower and upper bounds to outlined |
1540 | /// parallel function. This is necessary for combined constructs such as |
1541 | /// 'distribute parallel for' |
1542 | typedef llvm::function_ref<void(CodeGenFunction &, |
1543 | const OMPExecutableDirective &, |
1544 | llvm::SmallVectorImpl<llvm::Value *> &)> |
1545 | CodeGenBoundParametersTy; |
1546 | } // anonymous namespace |
1547 | |
1548 | static void |
1549 | checkForLastprivateConditionalUpdate(CodeGenFunction &CGF, |
1550 | const OMPExecutableDirective &S) { |
1551 | if (CGF.getLangOpts().OpenMP < 50) |
1552 | return; |
1553 | llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls; |
1554 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
1555 | for (const Expr *Ref : C->varlist()) { |
1556 | if (!Ref->getType()->isScalarType()) |
1557 | continue; |
1558 | const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts()); |
1559 | if (!DRE) |
1560 | continue; |
1561 | PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl())); |
1562 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: Ref); |
1563 | } |
1564 | } |
1565 | for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { |
1566 | for (const Expr *Ref : C->varlist()) { |
1567 | if (!Ref->getType()->isScalarType()) |
1568 | continue; |
1569 | const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts()); |
1570 | if (!DRE) |
1571 | continue; |
1572 | PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl())); |
1573 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: Ref); |
1574 | } |
1575 | } |
1576 | for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { |
1577 | for (const Expr *Ref : C->varlist()) { |
1578 | if (!Ref->getType()->isScalarType()) |
1579 | continue; |
1580 | const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts()); |
1581 | if (!DRE) |
1582 | continue; |
1583 | PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl())); |
1584 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: Ref); |
1585 | } |
1586 | } |
1587 | // Privates should ne analyzed since they are not captured at all. |
1588 | // Task reductions may be skipped - tasks are ignored. |
1589 | // Firstprivates do not return value but may be passed by reference - no need |
1590 | // to check for updated lastprivate conditional. |
1591 | for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { |
1592 | for (const Expr *Ref : C->varlist()) { |
1593 | if (!Ref->getType()->isScalarType()) |
1594 | continue; |
1595 | const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts()); |
1596 | if (!DRE) |
1597 | continue; |
1598 | PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl())); |
1599 | } |
1600 | } |
1601 | CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional( |
1602 | CGF, D: S, IgnoredDecls: PrivateDecls); |
1603 | } |
1604 | |
1605 | static void emitCommonOMPParallelDirective( |
1606 | CodeGenFunction &CGF, const OMPExecutableDirective &S, |
1607 | OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, |
1608 | const CodeGenBoundParametersTy &CodeGenBoundParameters) { |
1609 | const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_parallel); |
1610 | llvm::Value *NumThreads = nullptr; |
1611 | llvm::Function *OutlinedFn = |
1612 | CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( |
1613 | CGF, D: S, ThreadIDVar: *CS->getCapturedDecl()->param_begin(), InnermostKind, |
1614 | CodeGen); |
1615 | if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { |
1616 | CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); |
1617 | NumThreads = CGF.EmitScalarExpr(E: NumThreadsClause->getNumThreads(), |
1618 | /*IgnoreResultAssign=*/true); |
1619 | CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( |
1620 | CGF, NumThreads, Loc: NumThreadsClause->getBeginLoc()); |
1621 | } |
1622 | if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { |
1623 | CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); |
1624 | CGF.CGM.getOpenMPRuntime().emitProcBindClause( |
1625 | CGF, ProcBind: ProcBindClause->getProcBindKind(), Loc: ProcBindClause->getBeginLoc()); |
1626 | } |
1627 | const Expr *IfCond = nullptr; |
1628 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
1629 | if (C->getNameModifier() == OMPD_unknown || |
1630 | C->getNameModifier() == OMPD_parallel) { |
1631 | IfCond = C->getCondition(); |
1632 | break; |
1633 | } |
1634 | } |
1635 | |
1636 | OMPParallelScope Scope(CGF, S); |
1637 | llvm::SmallVector<llvm::Value *, 16> CapturedVars; |
1638 | // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk |
1639 | // lower and upper bounds with the pragma 'for' chunking mechanism. |
1640 | // The following lambda takes care of appending the lower and upper bound |
1641 | // parameters when necessary |
1642 | CodeGenBoundParameters(CGF, S, CapturedVars); |
1643 | CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars); |
1644 | CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, Loc: S.getBeginLoc(), OutlinedFn, |
1645 | CapturedVars, IfCond, NumThreads); |
1646 | } |
1647 | |
1648 | static bool isAllocatableDecl(const VarDecl *VD) { |
1649 | const VarDecl *CVD = VD->getCanonicalDecl(); |
1650 | if (!CVD->hasAttr<OMPAllocateDeclAttr>()) |
1651 | return false; |
1652 | const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); |
1653 | // Use the default allocation. |
1654 | return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || |
1655 | AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && |
1656 | !AA->getAllocator()); |
1657 | } |
1658 | |
1659 | static void emitEmptyBoundParameters(CodeGenFunction &, |
1660 | const OMPExecutableDirective &, |
1661 | llvm::SmallVectorImpl<llvm::Value *> &) {} |
1662 | |
1663 | static void emitOMPCopyinClause(CodeGenFunction &CGF, |
1664 | const OMPExecutableDirective &S) { |
1665 | bool Copyins = CGF.EmitOMPCopyinClause(D: S); |
1666 | if (Copyins) { |
1667 | // Emit implicit barrier to synchronize threads and avoid data races on |
1668 | // propagation master's thread values of threadprivate variables to local |
1669 | // instances of that variables of all other implicit threads. |
1670 | CGF.CGM.getOpenMPRuntime().emitBarrierCall( |
1671 | CGF, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false, |
1672 | /*ForceSimpleCall=*/true); |
1673 | } |
1674 | } |
1675 | |
1676 | Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable( |
1677 | CodeGenFunction &CGF, const VarDecl *VD) { |
1678 | CodeGenModule &CGM = CGF.CGM; |
1679 | auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1680 | |
1681 | if (!VD) |
1682 | return Address::invalid(); |
1683 | const VarDecl *CVD = VD->getCanonicalDecl(); |
1684 | if (!isAllocatableDecl(VD: CVD)) |
1685 | return Address::invalid(); |
1686 | llvm::Value *Size; |
1687 | CharUnits Align = CGM.getContext().getDeclAlign(D: CVD); |
1688 | if (CVD->getType()->isVariablyModifiedType()) { |
1689 | Size = CGF.getTypeSize(Ty: CVD->getType()); |
1690 | // Align the size: ((size + align - 1) / align) * align |
1691 | Size = CGF.Builder.CreateNUWAdd( |
1692 | LHS: Size, RHS: CGM.getSize(numChars: Align - CharUnits::fromQuantity(Quantity: 1))); |
1693 | Size = CGF.Builder.CreateUDiv(LHS: Size, RHS: CGM.getSize(numChars: Align)); |
1694 | Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: Align)); |
1695 | } else { |
1696 | CharUnits Sz = CGM.getContext().getTypeSizeInChars(T: CVD->getType()); |
1697 | Size = CGM.getSize(numChars: Sz.alignTo(Align)); |
1698 | } |
1699 | |
1700 | const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); |
1701 | assert(AA->getAllocator() && |
1702 | "Expected allocator expression for non-default allocator." ); |
1703 | llvm::Value *Allocator = CGF.EmitScalarExpr(E: AA->getAllocator()); |
1704 | // According to the standard, the original allocator type is a enum (integer). |
1705 | // Convert to pointer type, if required. |
1706 | if (Allocator->getType()->isIntegerTy()) |
1707 | Allocator = CGF.Builder.CreateIntToPtr(V: Allocator, DestTy: CGM.VoidPtrTy); |
1708 | else if (Allocator->getType()->isPointerTy()) |
1709 | Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: Allocator, |
1710 | DestTy: CGM.VoidPtrTy); |
1711 | |
1712 | llvm::Value *Addr = OMPBuilder.createOMPAlloc( |
1713 | Loc: CGF.Builder, Size, Allocator, |
1714 | Name: getNameWithSeparators(Parts: {CVD->getName(), ".void.addr" }, FirstSeparator: "." , Separator: "." )); |
1715 | llvm::CallInst *FreeCI = |
1716 | OMPBuilder.createOMPFree(Loc: CGF.Builder, Addr, Allocator); |
1717 | |
1718 | CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(Kind: NormalAndEHCleanup, A: FreeCI); |
1719 | Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
1720 | V: Addr, |
1721 | DestTy: CGF.ConvertTypeForMem(T: CGM.getContext().getPointerType(T: CVD->getType())), |
1722 | Name: getNameWithSeparators(Parts: {CVD->getName(), ".addr" }, FirstSeparator: "." , Separator: "." )); |
1723 | return Address(Addr, CGF.ConvertTypeForMem(T: CVD->getType()), Align); |
1724 | } |
1725 | |
1726 | Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate( |
1727 | CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, |
1728 | SourceLocation Loc) { |
1729 | CodeGenModule &CGM = CGF.CGM; |
1730 | if (CGM.getLangOpts().OpenMPUseTLS && |
1731 | CGM.getContext().getTargetInfo().isTLSSupported()) |
1732 | return VDAddr; |
1733 | |
1734 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1735 | |
1736 | llvm::Type *VarTy = VDAddr.getElementType(); |
1737 | llvm::Value *Data = |
1738 | CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.Int8PtrTy); |
1739 | llvm::ConstantInt *Size = CGM.getSize(numChars: CGM.GetTargetTypeStoreSize(Ty: VarTy)); |
1740 | std::string Suffix = getNameWithSeparators(Parts: {"cache" , "" }); |
1741 | llvm::Twine CacheName = Twine(CGM.getMangledName(GD: VD)).concat(Suffix); |
1742 | |
1743 | llvm::CallInst *ThreadPrivateCacheCall = |
1744 | OMPBuilder.createCachedThreadPrivate(Loc: CGF.Builder, Pointer: Data, Size, Name: CacheName); |
1745 | |
1746 | return Address(ThreadPrivateCacheCall, CGM.Int8Ty, VDAddr.getAlignment()); |
1747 | } |
1748 | |
1749 | std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators( |
1750 | ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) { |
1751 | SmallString<128> Buffer; |
1752 | llvm::raw_svector_ostream OS(Buffer); |
1753 | StringRef Sep = FirstSeparator; |
1754 | for (StringRef Part : Parts) { |
1755 | OS << Sep << Part; |
1756 | Sep = Separator; |
1757 | } |
1758 | return OS.str().str(); |
1759 | } |
1760 | |
1761 | void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
1762 | CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP, |
1763 | InsertPointTy CodeGenIP, Twine RegionName) { |
1764 | CGBuilderTy &Builder = CGF.Builder; |
1765 | Builder.restoreIP(IP: CodeGenIP); |
1766 | llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false, |
1767 | Suffix: "." + RegionName + ".after" ); |
1768 | |
1769 | { |
1770 | OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB); |
1771 | CGF.EmitStmt(S: RegionBodyStmt); |
1772 | } |
1773 | |
1774 | if (Builder.saveIP().isSet()) |
1775 | Builder.CreateBr(Dest: FiniBB); |
1776 | } |
1777 | |
1778 | void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( |
1779 | CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP, |
1780 | InsertPointTy CodeGenIP, Twine RegionName) { |
1781 | CGBuilderTy &Builder = CGF.Builder; |
1782 | Builder.restoreIP(IP: CodeGenIP); |
1783 | llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false, |
1784 | Suffix: "." + RegionName + ".after" ); |
1785 | |
1786 | { |
1787 | OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB); |
1788 | CGF.EmitStmt(S: RegionBodyStmt); |
1789 | } |
1790 | |
1791 | if (Builder.saveIP().isSet()) |
1792 | Builder.CreateBr(Dest: FiniBB); |
1793 | } |
1794 | |
1795 | void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { |
1796 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
1797 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1798 | // Check if we have any if clause associated with the directive. |
1799 | llvm::Value *IfCond = nullptr; |
1800 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
1801 | IfCond = EmitScalarExpr(E: C->getCondition(), |
1802 | /*IgnoreResultAssign=*/true); |
1803 | |
1804 | llvm::Value *NumThreads = nullptr; |
1805 | if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) |
1806 | NumThreads = EmitScalarExpr(E: NumThreadsClause->getNumThreads(), |
1807 | /*IgnoreResultAssign=*/true); |
1808 | |
1809 | ProcBindKind ProcBind = OMP_PROC_BIND_default; |
1810 | if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) |
1811 | ProcBind = ProcBindClause->getProcBindKind(); |
1812 | |
1813 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
1814 | |
1815 | // The cleanup callback that finalizes all variables at the given location, |
1816 | // thus calls destructors etc. |
1817 | auto FiniCB = [this](InsertPointTy IP) { |
1818 | OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP); |
1819 | return llvm::Error::success(); |
1820 | }; |
1821 | |
1822 | // Privatization callback that performs appropriate action for |
1823 | // shared/private/firstprivate/lastprivate/copyin/... variables. |
1824 | // |
1825 | // TODO: This defaults to shared right now. |
1826 | auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, |
1827 | llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { |
1828 | // The next line is appropriate only for variables (Val) with the |
1829 | // data-sharing attribute "shared". |
1830 | ReplVal = &Val; |
1831 | |
1832 | return CodeGenIP; |
1833 | }; |
1834 | |
1835 | const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_parallel); |
1836 | const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); |
1837 | |
1838 | auto BodyGenCB = [&, this](InsertPointTy AllocaIP, |
1839 | InsertPointTy CodeGenIP) { |
1840 | OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( |
1841 | CGF&: *this, RegionBodyStmt: ParallelRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "parallel" ); |
1842 | return llvm::Error::success(); |
1843 | }; |
1844 | |
1845 | CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); |
1846 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); |
1847 | llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( |
1848 | AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); |
1849 | llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail( |
1850 | ValOrErr: OMPBuilder.createParallel(Loc: Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB, |
1851 | IfCondition: IfCond, NumThreads, ProcBind, IsCancellable: S.hasCancel())); |
1852 | Builder.restoreIP(IP: AfterIP); |
1853 | return; |
1854 | } |
1855 | |
1856 | // Emit parallel region as a standalone region. |
1857 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
1858 | Action.Enter(CGF); |
1859 | OMPPrivateScope PrivateScope(CGF); |
1860 | emitOMPCopyinClause(CGF, S); |
1861 | (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope); |
1862 | CGF.EmitOMPPrivateClause(D: S, PrivateScope); |
1863 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
1864 | (void)PrivateScope.Privatize(); |
1865 | CGF.EmitStmt(S: S.getCapturedStmt(RegionKind: OMPD_parallel)->getCapturedStmt()); |
1866 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel); |
1867 | }; |
1868 | { |
1869 | auto LPCRegion = |
1870 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
1871 | emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_parallel, CodeGen, |
1872 | CodeGenBoundParameters: emitEmptyBoundParameters); |
1873 | emitPostUpdateForReductionClause(CGF&: *this, D: S, |
1874 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
1875 | } |
1876 | // Check for outer lastprivate conditional update. |
1877 | checkForLastprivateConditionalUpdate(CGF&: *this, S); |
1878 | } |
1879 | |
1880 | void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) { |
1881 | EmitStmt(S: S.getIfStmt()); |
1882 | } |
1883 | |
1884 | namespace { |
1885 | /// RAII to handle scopes for loop transformation directives. |
1886 | class OMPTransformDirectiveScopeRAII { |
1887 | OMPLoopScope *Scope = nullptr; |
1888 | CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr; |
1889 | CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr; |
1890 | |
1891 | OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII &) = |
1892 | delete; |
1893 | OMPTransformDirectiveScopeRAII & |
1894 | operator=(const OMPTransformDirectiveScopeRAII &) = delete; |
1895 | |
1896 | public: |
1897 | OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) { |
1898 | if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(Val: S)) { |
1899 | Scope = new OMPLoopScope(CGF, *Dir); |
1900 | CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP); |
1901 | CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI); |
1902 | } |
1903 | } |
1904 | ~OMPTransformDirectiveScopeRAII() { |
1905 | if (!Scope) |
1906 | return; |
1907 | delete CapInfoRAII; |
1908 | delete CGSI; |
1909 | delete Scope; |
1910 | } |
1911 | }; |
1912 | } // namespace |
1913 | |
1914 | static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, |
1915 | int MaxLevel, int Level = 0) { |
1916 | assert(Level < MaxLevel && "Too deep lookup during loop body codegen." ); |
1917 | const Stmt *SimplifiedS = S->IgnoreContainers(); |
1918 | if (const auto *CS = dyn_cast<CompoundStmt>(Val: SimplifiedS)) { |
1919 | PrettyStackTraceLoc CrashInfo( |
1920 | CGF.getContext().getSourceManager(), CS->getLBracLoc(), |
1921 | "LLVM IR generation of compound statement ('{}')" ); |
1922 | |
1923 | // Keep track of the current cleanup stack depth, including debug scopes. |
1924 | CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange()); |
1925 | for (const Stmt *CurStmt : CS->body()) |
1926 | emitBody(CGF, S: CurStmt, NextLoop, MaxLevel, Level); |
1927 | return; |
1928 | } |
1929 | if (SimplifiedS == NextLoop) { |
1930 | if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(Val: SimplifiedS)) |
1931 | SimplifiedS = Dir->getTransformedStmt(); |
1932 | if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(Val: SimplifiedS)) |
1933 | SimplifiedS = CanonLoop->getLoopStmt(); |
1934 | if (const auto *For = dyn_cast<ForStmt>(Val: SimplifiedS)) { |
1935 | S = For->getBody(); |
1936 | } else { |
1937 | assert(isa<CXXForRangeStmt>(SimplifiedS) && |
1938 | "Expected canonical for loop or range-based for loop." ); |
1939 | const auto *CXXFor = cast<CXXForRangeStmt>(Val: SimplifiedS); |
1940 | CGF.EmitStmt(S: CXXFor->getLoopVarStmt()); |
1941 | S = CXXFor->getBody(); |
1942 | } |
1943 | if (Level + 1 < MaxLevel) { |
1944 | NextLoop = OMPLoopDirective::tryToFindNextInnerLoop( |
1945 | CurStmt: S, /*TryImperfectlyNestedLoops=*/true); |
1946 | emitBody(CGF, S, NextLoop, MaxLevel, Level: Level + 1); |
1947 | return; |
1948 | } |
1949 | } |
1950 | CGF.EmitStmt(S); |
1951 | } |
1952 | |
1953 | void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, |
1954 | JumpDest LoopExit) { |
1955 | RunCleanupsScope BodyScope(*this); |
1956 | // Update counters values on current iteration. |
1957 | for (const Expr *UE : D.updates()) |
1958 | EmitIgnoredExpr(E: UE); |
1959 | // Update the linear variables. |
1960 | // In distribute directives only loop counters may be marked as linear, no |
1961 | // need to generate the code for them. |
1962 | OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D); |
1963 | if (!isOpenMPDistributeDirective(DKind: EKind)) { |
1964 | for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { |
1965 | for (const Expr *UE : C->updates()) |
1966 | EmitIgnoredExpr(E: UE); |
1967 | } |
1968 | } |
1969 | |
1970 | // On a continue in the body, jump to the end. |
1971 | JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.body.continue" ); |
1972 | BreakContinueStack.push_back(Elt: BreakContinue(LoopExit, Continue)); |
1973 | for (const Expr *E : D.finals_conditions()) { |
1974 | if (!E) |
1975 | continue; |
1976 | // Check that loop counter in non-rectangular nest fits into the iteration |
1977 | // space. |
1978 | llvm::BasicBlock *NextBB = createBasicBlock(name: "omp.body.next" ); |
1979 | EmitBranchOnBoolExpr(Cond: E, TrueBlock: NextBB, FalseBlock: Continue.getBlock(), |
1980 | TrueCount: getProfileCount(S: D.getBody())); |
1981 | EmitBlock(BB: NextBB); |
1982 | } |
1983 | |
1984 | OMPPrivateScope InscanScope(*this); |
1985 | EmitOMPReductionClauseInit(D, PrivateScope&: InscanScope, /*ForInscan=*/true); |
1986 | bool IsInscanRegion = InscanScope.Privatize(); |
1987 | if (IsInscanRegion) { |
1988 | // Need to remember the block before and after scan directive |
1989 | // to dispatch them correctly depending on the clause used in |
1990 | // this directive, inclusive or exclusive. For inclusive scan the natural |
1991 | // order of the blocks is used, for exclusive clause the blocks must be |
1992 | // executed in reverse order. |
1993 | OMPBeforeScanBlock = createBasicBlock(name: "omp.before.scan.bb" ); |
1994 | OMPAfterScanBlock = createBasicBlock(name: "omp.after.scan.bb" ); |
1995 | // No need to allocate inscan exit block, in simd mode it is selected in the |
1996 | // codegen for the scan directive. |
1997 | if (EKind != OMPD_simd && !getLangOpts().OpenMPSimd) |
1998 | OMPScanExitBlock = createBasicBlock(name: "omp.exit.inscan.bb" ); |
1999 | OMPScanDispatch = createBasicBlock(name: "omp.inscan.dispatch" ); |
2000 | EmitBranch(Block: OMPScanDispatch); |
2001 | EmitBlock(BB: OMPBeforeScanBlock); |
2002 | } |
2003 | |
2004 | // Emit loop variables for C++ range loops. |
2005 | const Stmt *Body = |
2006 | D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); |
2007 | // Emit loop body. |
2008 | emitBody(CGF&: *this, S: Body, |
2009 | NextLoop: OMPLoopBasedDirective::tryToFindNextInnerLoop( |
2010 | CurStmt: Body, /*TryImperfectlyNestedLoops=*/true), |
2011 | MaxLevel: D.getLoopsNumber()); |
2012 | |
2013 | // Jump to the dispatcher at the end of the loop body. |
2014 | if (IsInscanRegion) |
2015 | EmitBranch(Block: OMPScanExitBlock); |
2016 | |
2017 | // The end (updates/cleanups). |
2018 | EmitBlock(BB: Continue.getBlock()); |
2019 | BreakContinueStack.pop_back(); |
2020 | } |
2021 | |
2022 | using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>; |
2023 | |
2024 | /// Emit a captured statement and return the function as well as its captured |
2025 | /// closure context. |
2026 | static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF, |
2027 | const CapturedStmt *S) { |
2028 | LValue CapStruct = ParentCGF.InitCapturedStruct(S: *S); |
2029 | CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true); |
2030 | std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI = |
2031 | std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(args: *S); |
2032 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get()); |
2033 | llvm::Function *F = CGF.GenerateCapturedStmtFunction(S: *S); |
2034 | |
2035 | return {F, CapStruct.getPointer(CGF&: ParentCGF)}; |
2036 | } |
2037 | |
2038 | /// Emit a call to a previously captured closure. |
2039 | static llvm::CallInst * |
2040 | emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap, |
2041 | llvm::ArrayRef<llvm::Value *> Args) { |
2042 | // Append the closure context to the argument. |
2043 | SmallVector<llvm::Value *> EffectiveArgs; |
2044 | EffectiveArgs.reserve(N: Args.size() + 1); |
2045 | llvm::append_range(C&: EffectiveArgs, R&: Args); |
2046 | EffectiveArgs.push_back(Elt: Cap.second); |
2047 | |
2048 | return ParentCGF.Builder.CreateCall(Callee: Cap.first, Args: EffectiveArgs); |
2049 | } |
2050 | |
2051 | llvm::CanonicalLoopInfo * |
2052 | CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) { |
2053 | assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented" ); |
2054 | |
2055 | // The caller is processing the loop-associated directive processing the \p |
2056 | // Depth loops nested in \p S. Put the previous pending loop-associated |
2057 | // directive to the stack. If the current loop-associated directive is a loop |
2058 | // transformation directive, it will push its generated loops onto the stack |
2059 | // such that together with the loops left here they form the combined loop |
2060 | // nest for the parent loop-associated directive. |
2061 | int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth; |
2062 | ExpectedOMPLoopDepth = Depth; |
2063 | |
2064 | EmitStmt(S); |
2065 | assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops" ); |
2066 | |
2067 | // The last added loop is the outermost one. |
2068 | llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back(); |
2069 | |
2070 | // Pop the \p Depth loops requested by the call from that stack and restore |
2071 | // the previous context. |
2072 | OMPLoopNestStack.pop_back_n(NumItems: Depth); |
2073 | ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth; |
2074 | |
2075 | return Result; |
2076 | } |
2077 | |
2078 | void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) { |
2079 | const Stmt *SyntacticalLoop = S->getLoopStmt(); |
2080 | if (!getLangOpts().OpenMPIRBuilder) { |
2081 | // Ignore if OpenMPIRBuilder is not enabled. |
2082 | EmitStmt(S: SyntacticalLoop); |
2083 | return; |
2084 | } |
2085 | |
2086 | LexicalScope ForScope(*this, S->getSourceRange()); |
2087 | |
2088 | // Emit init statements. The Distance/LoopVar funcs may reference variable |
2089 | // declarations they contain. |
2090 | const Stmt *BodyStmt; |
2091 | if (const auto *For = dyn_cast<ForStmt>(Val: SyntacticalLoop)) { |
2092 | if (const Stmt *InitStmt = For->getInit()) |
2093 | EmitStmt(S: InitStmt); |
2094 | BodyStmt = For->getBody(); |
2095 | } else if (const auto *RangeFor = |
2096 | dyn_cast<CXXForRangeStmt>(Val: SyntacticalLoop)) { |
2097 | if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt()) |
2098 | EmitStmt(S: RangeStmt); |
2099 | if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt()) |
2100 | EmitStmt(S: BeginStmt); |
2101 | if (const DeclStmt *EndStmt = RangeFor->getEndStmt()) |
2102 | EmitStmt(S: EndStmt); |
2103 | if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt()) |
2104 | EmitStmt(S: LoopVarStmt); |
2105 | BodyStmt = RangeFor->getBody(); |
2106 | } else |
2107 | llvm_unreachable("Expected for-stmt or range-based for-stmt" ); |
2108 | |
2109 | // Emit closure for later use. By-value captures will be captured here. |
2110 | const CapturedStmt *DistanceFunc = S->getDistanceFunc(); |
2111 | EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(ParentCGF&: *this, S: DistanceFunc); |
2112 | const CapturedStmt *LoopVarFunc = S->getLoopVarFunc(); |
2113 | EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(ParentCGF&: *this, S: LoopVarFunc); |
2114 | |
2115 | // Call the distance function to get the number of iterations of the loop to |
2116 | // come. |
2117 | QualType LogicalTy = DistanceFunc->getCapturedDecl() |
2118 | ->getParam(i: 0) |
2119 | ->getType() |
2120 | .getNonReferenceType(); |
2121 | RawAddress CountAddr = CreateMemTemp(T: LogicalTy, Name: ".count.addr" ); |
2122 | emitCapturedStmtCall(ParentCGF&: *this, Cap: DistanceClosure, Args: {CountAddr.getPointer()}); |
2123 | llvm::Value *DistVal = Builder.CreateLoad(Addr: CountAddr, Name: ".count" ); |
2124 | |
2125 | // Emit the loop structure. |
2126 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
2127 | auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, |
2128 | llvm::Value *IndVar) { |
2129 | Builder.restoreIP(IP: CodeGenIP); |
2130 | |
2131 | // Emit the loop body: Convert the logical iteration number to the loop |
2132 | // variable and emit the body. |
2133 | const DeclRefExpr *LoopVarRef = S->getLoopVarRef(); |
2134 | LValue LCVal = EmitLValue(E: LoopVarRef); |
2135 | Address LoopVarAddress = LCVal.getAddress(); |
2136 | emitCapturedStmtCall(ParentCGF&: *this, Cap: LoopVarClosure, |
2137 | Args: {LoopVarAddress.emitRawPointer(CGF&: *this), IndVar}); |
2138 | |
2139 | RunCleanupsScope BodyScope(*this); |
2140 | EmitStmt(S: BodyStmt); |
2141 | return llvm::Error::success(); |
2142 | }; |
2143 | |
2144 | llvm::CanonicalLoopInfo *CL = |
2145 | cantFail(ValOrErr: OMPBuilder.createCanonicalLoop(Loc: Builder, BodyGenCB: BodyGen, TripCount: DistVal)); |
2146 | |
2147 | // Finish up the loop. |
2148 | Builder.restoreIP(IP: CL->getAfterIP()); |
2149 | ForScope.ForceCleanup(); |
2150 | |
2151 | // Remember the CanonicalLoopInfo for parent AST nodes consuming it. |
2152 | OMPLoopNestStack.push_back(Elt: CL); |
2153 | } |
2154 | |
2155 | void CodeGenFunction::EmitOMPInnerLoop( |
2156 | const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond, |
2157 | const Expr *IncExpr, |
2158 | const llvm::function_ref<void(CodeGenFunction &)> BodyGen, |
2159 | const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { |
2160 | auto LoopExit = getJumpDestInCurrentScope(Name: "omp.inner.for.end" ); |
2161 | |
2162 | // Start the loop with a block that tests the condition. |
2163 | auto CondBlock = createBasicBlock(name: "omp.inner.for.cond" ); |
2164 | EmitBlock(BB: CondBlock); |
2165 | const SourceRange R = S.getSourceRange(); |
2166 | |
2167 | // If attributes are attached, push to the basic block with them. |
2168 | const auto &OMPED = cast<OMPExecutableDirective>(Val: S); |
2169 | const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt(); |
2170 | const Stmt *SS = ICS->getCapturedStmt(); |
2171 | const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(Val: SS); |
2172 | OMPLoopNestStack.clear(); |
2173 | if (AS) |
2174 | LoopStack.push(Header: CondBlock, Ctx&: CGM.getContext(), CGOpts: CGM.getCodeGenOpts(), |
2175 | Attrs: AS->getAttrs(), StartLoc: SourceLocToDebugLoc(Location: R.getBegin()), |
2176 | EndLoc: SourceLocToDebugLoc(Location: R.getEnd())); |
2177 | else |
2178 | LoopStack.push(Header: CondBlock, StartLoc: SourceLocToDebugLoc(Location: R.getBegin()), |
2179 | EndLoc: SourceLocToDebugLoc(Location: R.getEnd())); |
2180 | |
2181 | // If there are any cleanups between here and the loop-exit scope, |
2182 | // create a block to stage a loop exit along. |
2183 | llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); |
2184 | if (RequiresCleanup) |
2185 | ExitBlock = createBasicBlock(name: "omp.inner.for.cond.cleanup" ); |
2186 | |
2187 | llvm::BasicBlock *LoopBody = createBasicBlock(name: "omp.inner.for.body" ); |
2188 | |
2189 | // Emit condition. |
2190 | EmitBranchOnBoolExpr(Cond: LoopCond, TrueBlock: LoopBody, FalseBlock: ExitBlock, TrueCount: getProfileCount(S: &S)); |
2191 | if (ExitBlock != LoopExit.getBlock()) { |
2192 | EmitBlock(BB: ExitBlock); |
2193 | EmitBranchThroughCleanup(Dest: LoopExit); |
2194 | } |
2195 | |
2196 | EmitBlock(BB: LoopBody); |
2197 | incrementProfileCounter(S: &S); |
2198 | |
2199 | // Create a block for the increment. |
2200 | JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.inner.for.inc" ); |
2201 | BreakContinueStack.push_back(Elt: BreakContinue(LoopExit, Continue)); |
2202 | |
2203 | BodyGen(*this); |
2204 | |
2205 | // Emit "IV = IV + 1" and a back-edge to the condition block. |
2206 | EmitBlock(BB: Continue.getBlock()); |
2207 | EmitIgnoredExpr(E: IncExpr); |
2208 | PostIncGen(*this); |
2209 | BreakContinueStack.pop_back(); |
2210 | EmitBranch(Block: CondBlock); |
2211 | LoopStack.pop(); |
2212 | // Emit the fall-through block. |
2213 | EmitBlock(BB: LoopExit.getBlock()); |
2214 | } |
2215 | |
2216 | bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { |
2217 | if (!HaveInsertPoint()) |
2218 | return false; |
2219 | // Emit inits for the linear variables. |
2220 | bool HasLinears = false; |
2221 | for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { |
2222 | for (const Expr *Init : C->inits()) { |
2223 | HasLinears = true; |
2224 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Init)->getDecl()); |
2225 | if (const auto *Ref = |
2226 | dyn_cast<DeclRefExpr>(Val: VD->getInit()->IgnoreImpCasts())) { |
2227 | AutoVarEmission Emission = EmitAutoVarAlloca(var: *VD); |
2228 | const auto *OrigVD = cast<VarDecl>(Val: Ref->getDecl()); |
2229 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
2230 | CapturedStmtInfo->lookup(VD: OrigVD) != nullptr, |
2231 | VD->getInit()->getType(), VK_LValue, |
2232 | VD->getInit()->getExprLoc()); |
2233 | EmitExprAsInit( |
2234 | init: &DRE, D: VD, |
2235 | lvalue: MakeAddrLValue(Addr: Emission.getAllocatedAddress(), T: VD->getType()), |
2236 | /*capturedByInit=*/false); |
2237 | EmitAutoVarCleanups(emission: Emission); |
2238 | } else { |
2239 | EmitVarDecl(D: *VD); |
2240 | } |
2241 | } |
2242 | // Emit the linear steps for the linear clauses. |
2243 | // If a step is not constant, it is pre-calculated before the loop. |
2244 | if (const auto *CS = cast_or_null<BinaryOperator>(Val: C->getCalcStep())) |
2245 | if (const auto *SaveRef = cast<DeclRefExpr>(Val: CS->getLHS())) { |
2246 | EmitVarDecl(D: *cast<VarDecl>(Val: SaveRef->getDecl())); |
2247 | // Emit calculation of the linear step. |
2248 | EmitIgnoredExpr(E: CS); |
2249 | } |
2250 | } |
2251 | return HasLinears; |
2252 | } |
2253 | |
2254 | void CodeGenFunction::EmitOMPLinearClauseFinal( |
2255 | const OMPLoopDirective &D, |
2256 | const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { |
2257 | if (!HaveInsertPoint()) |
2258 | return; |
2259 | llvm::BasicBlock *DoneBB = nullptr; |
2260 | // Emit the final values of the linear variables. |
2261 | for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { |
2262 | auto IC = C->varlist_begin(); |
2263 | for (const Expr *F : C->finals()) { |
2264 | if (!DoneBB) { |
2265 | if (llvm::Value *Cond = CondGen(*this)) { |
2266 | // If the first post-update expression is found, emit conditional |
2267 | // block if it was requested. |
2268 | llvm::BasicBlock *ThenBB = createBasicBlock(name: ".omp.linear.pu" ); |
2269 | DoneBB = createBasicBlock(name: ".omp.linear.pu.done" ); |
2270 | Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB); |
2271 | EmitBlock(BB: ThenBB); |
2272 | } |
2273 | } |
2274 | const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IC)->getDecl()); |
2275 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
2276 | CapturedStmtInfo->lookup(VD: OrigVD) != nullptr, |
2277 | (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); |
2278 | Address OrigAddr = EmitLValue(E: &DRE).getAddress(); |
2279 | CodeGenFunction::OMPPrivateScope VarScope(*this); |
2280 | VarScope.addPrivate(LocalVD: OrigVD, Addr: OrigAddr); |
2281 | (void)VarScope.Privatize(); |
2282 | EmitIgnoredExpr(E: F); |
2283 | ++IC; |
2284 | } |
2285 | if (const Expr *PostUpdate = C->getPostUpdateExpr()) |
2286 | EmitIgnoredExpr(E: PostUpdate); |
2287 | } |
2288 | if (DoneBB) |
2289 | EmitBlock(BB: DoneBB, /*IsFinished=*/true); |
2290 | } |
2291 | |
2292 | static void emitAlignedClause(CodeGenFunction &CGF, |
2293 | const OMPExecutableDirective &D) { |
2294 | if (!CGF.HaveInsertPoint()) |
2295 | return; |
2296 | for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { |
2297 | llvm::APInt ClauseAlignment(64, 0); |
2298 | if (const Expr *AlignmentExpr = Clause->getAlignment()) { |
2299 | auto *AlignmentCI = |
2300 | cast<llvm::ConstantInt>(Val: CGF.EmitScalarExpr(E: AlignmentExpr)); |
2301 | ClauseAlignment = AlignmentCI->getValue(); |
2302 | } |
2303 | for (const Expr *E : Clause->varlist()) { |
2304 | llvm::APInt Alignment(ClauseAlignment); |
2305 | if (Alignment == 0) { |
2306 | // OpenMP [2.8.1, Description] |
2307 | // If no optional parameter is specified, implementation-defined default |
2308 | // alignments for SIMD instructions on the target platforms are assumed. |
2309 | Alignment = |
2310 | CGF.getContext() |
2311 | .toCharUnitsFromBits(BitSize: CGF.getContext().getOpenMPDefaultSimdAlign( |
2312 | T: E->getType()->getPointeeType())) |
2313 | .getQuantity(); |
2314 | } |
2315 | assert((Alignment == 0 || Alignment.isPowerOf2()) && |
2316 | "alignment is not power of 2" ); |
2317 | if (Alignment != 0) { |
2318 | llvm::Value *PtrValue = CGF.EmitScalarExpr(E); |
2319 | CGF.emitAlignmentAssumption( |
2320 | PtrValue, E, /*No second loc needed*/ AssumptionLoc: SourceLocation(), |
2321 | Alignment: llvm::ConstantInt::get(Context&: CGF.getLLVMContext(), V: Alignment)); |
2322 | } |
2323 | } |
2324 | } |
2325 | } |
2326 | |
2327 | void CodeGenFunction::EmitOMPPrivateLoopCounters( |
2328 | const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { |
2329 | if (!HaveInsertPoint()) |
2330 | return; |
2331 | auto I = S.private_counters().begin(); |
2332 | for (const Expr *E : S.counters()) { |
2333 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
2334 | const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()); |
2335 | // Emit var without initialization. |
2336 | AutoVarEmission VarEmission = EmitAutoVarAlloca(var: *PrivateVD); |
2337 | EmitAutoVarCleanups(emission: VarEmission); |
2338 | LocalDeclMap.erase(Val: PrivateVD); |
2339 | (void)LoopScope.addPrivate(LocalVD: VD, Addr: VarEmission.getAllocatedAddress()); |
2340 | if (LocalDeclMap.count(Val: VD) || CapturedStmtInfo->lookup(VD) || |
2341 | VD->hasGlobalStorage()) { |
2342 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), |
2343 | LocalDeclMap.count(Val: VD) || CapturedStmtInfo->lookup(VD), |
2344 | E->getType(), VK_LValue, E->getExprLoc()); |
2345 | (void)LoopScope.addPrivate(LocalVD: PrivateVD, Addr: EmitLValue(E: &DRE).getAddress()); |
2346 | } else { |
2347 | (void)LoopScope.addPrivate(LocalVD: PrivateVD, Addr: VarEmission.getAllocatedAddress()); |
2348 | } |
2349 | ++I; |
2350 | } |
2351 | // Privatize extra loop counters used in loops for ordered(n) clauses. |
2352 | for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { |
2353 | if (!C->getNumForLoops()) |
2354 | continue; |
2355 | for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size(); |
2356 | I < E; ++I) { |
2357 | const auto *DRE = cast<DeclRefExpr>(Val: C->getLoopCounter(NumLoop: I)); |
2358 | const auto *VD = cast<VarDecl>(Val: DRE->getDecl()); |
2359 | // Override only those variables that can be captured to avoid re-emission |
2360 | // of the variables declared within the loops. |
2361 | if (DRE->refersToEnclosingVariableOrCapture()) { |
2362 | (void)LoopScope.addPrivate( |
2363 | LocalVD: VD, Addr: CreateMemTemp(T: DRE->getType(), Name: VD->getName())); |
2364 | } |
2365 | } |
2366 | } |
2367 | } |
2368 | |
2369 | static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, |
2370 | const Expr *Cond, llvm::BasicBlock *TrueBlock, |
2371 | llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { |
2372 | if (!CGF.HaveInsertPoint()) |
2373 | return; |
2374 | { |
2375 | CodeGenFunction::OMPPrivateScope PreCondScope(CGF); |
2376 | CGF.EmitOMPPrivateLoopCounters(S, LoopScope&: PreCondScope); |
2377 | (void)PreCondScope.Privatize(); |
2378 | // Get initial values of real counters. |
2379 | for (const Expr *I : S.inits()) { |
2380 | CGF.EmitIgnoredExpr(E: I); |
2381 | } |
2382 | } |
2383 | // Create temp loop control variables with their init values to support |
2384 | // non-rectangular loops. |
2385 | CodeGenFunction::OMPMapVars PreCondVars; |
2386 | for (const Expr *E : S.dependent_counters()) { |
2387 | if (!E) |
2388 | continue; |
2389 | assert(!E->getType().getNonReferenceType()->isRecordType() && |
2390 | "dependent counter must not be an iterator." ); |
2391 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
2392 | Address CounterAddr = |
2393 | CGF.CreateMemTemp(T: VD->getType().getNonReferenceType()); |
2394 | (void)PreCondVars.setVarAddr(CGF, LocalVD: VD, TempAddr: CounterAddr); |
2395 | } |
2396 | (void)PreCondVars.apply(CGF); |
2397 | for (const Expr *E : S.dependent_inits()) { |
2398 | if (!E) |
2399 | continue; |
2400 | CGF.EmitIgnoredExpr(E); |
2401 | } |
2402 | // Check that loop is executed at least one time. |
2403 | CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); |
2404 | PreCondVars.restore(CGF); |
2405 | } |
2406 | |
2407 | void CodeGenFunction::EmitOMPLinearClause( |
2408 | const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { |
2409 | if (!HaveInsertPoint()) |
2410 | return; |
2411 | llvm::DenseSet<const VarDecl *> SIMDLCVs; |
2412 | OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D); |
2413 | if (isOpenMPSimdDirective(DKind: EKind)) { |
2414 | const auto *LoopDirective = cast<OMPLoopDirective>(Val: &D); |
2415 | for (const Expr *C : LoopDirective->counters()) { |
2416 | SIMDLCVs.insert( |
2417 | V: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: C)->getDecl())->getCanonicalDecl()); |
2418 | } |
2419 | } |
2420 | for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { |
2421 | auto CurPrivate = C->privates().begin(); |
2422 | for (const Expr *E : C->varlist()) { |
2423 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
2424 | const auto *PrivateVD = |
2425 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *CurPrivate)->getDecl()); |
2426 | if (!SIMDLCVs.count(V: VD->getCanonicalDecl())) { |
2427 | // Emit private VarDecl with copy init. |
2428 | EmitVarDecl(D: *PrivateVD); |
2429 | bool IsRegistered = |
2430 | PrivateScope.addPrivate(LocalVD: VD, Addr: GetAddrOfLocalVar(VD: PrivateVD)); |
2431 | assert(IsRegistered && "linear var already registered as private" ); |
2432 | // Silence the warning about unused variable. |
2433 | (void)IsRegistered; |
2434 | } else { |
2435 | EmitVarDecl(D: *PrivateVD); |
2436 | } |
2437 | ++CurPrivate; |
2438 | } |
2439 | } |
2440 | } |
2441 | |
2442 | static void emitSimdlenSafelenClause(CodeGenFunction &CGF, |
2443 | const OMPExecutableDirective &D) { |
2444 | if (!CGF.HaveInsertPoint()) |
2445 | return; |
2446 | if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { |
2447 | RValue Len = CGF.EmitAnyExpr(E: C->getSimdlen(), aggSlot: AggValueSlot::ignored(), |
2448 | /*ignoreResult=*/true); |
2449 | auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal()); |
2450 | CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); |
2451 | // In presence of finite 'safelen', it may be unsafe to mark all |
2452 | // the memory instructions parallel, because loop-carried |
2453 | // dependences of 'safelen' iterations are possible. |
2454 | CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); |
2455 | } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { |
2456 | RValue Len = CGF.EmitAnyExpr(E: C->getSafelen(), aggSlot: AggValueSlot::ignored(), |
2457 | /*ignoreResult=*/true); |
2458 | auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal()); |
2459 | CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); |
2460 | // In presence of finite 'safelen', it may be unsafe to mark all |
2461 | // the memory instructions parallel, because loop-carried |
2462 | // dependences of 'safelen' iterations are possible. |
2463 | CGF.LoopStack.setParallel(/*Enable=*/false); |
2464 | } |
2465 | } |
2466 | |
2467 | // Check for the presence of an `OMPOrderedDirective`, |
2468 | // i.e., `ordered` in `#pragma omp ordered simd`. |
2469 | // |
2470 | // Consider the following source code: |
2471 | // ``` |
2472 | // __attribute__((noinline)) void omp_simd_loop(float X[ARRAY_SIZE][ARRAY_SIZE]) |
2473 | // { |
2474 | // for (int r = 1; r < ARRAY_SIZE; ++r) { |
2475 | // for (int c = 1; c < ARRAY_SIZE; ++c) { |
2476 | // #pragma omp simd |
2477 | // for (int k = 2; k < ARRAY_SIZE; ++k) { |
2478 | // #pragma omp ordered simd |
2479 | // X[r][k] = X[r][k - 2] + sinf((float)(r / c)); |
2480 | // } |
2481 | // } |
2482 | // } |
2483 | // } |
2484 | // ``` |
2485 | // |
2486 | // Suppose we are in `CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective |
2487 | // &D)`. By examining `D.dump()` we have the following AST containing |
2488 | // `OMPOrderedDirective`: |
2489 | // |
2490 | // ``` |
2491 | // OMPSimdDirective 0x1c32950 |
2492 | // `-CapturedStmt 0x1c32028 |
2493 | // |-CapturedDecl 0x1c310e8 |
2494 | // | |-ForStmt 0x1c31e30 |
2495 | // | | |-DeclStmt 0x1c31298 |
2496 | // | | | `-VarDecl 0x1c31208 used k 'int' cinit |
2497 | // | | | `-IntegerLiteral 0x1c31278 'int' 2 |
2498 | // | | |-<<<NULL>>> |
2499 | // | | |-BinaryOperator 0x1c31308 'int' '<' |
2500 | // | | | |-ImplicitCastExpr 0x1c312f0 'int' <LValueToRValue> |
2501 | // | | | | `-DeclRefExpr 0x1c312b0 'int' lvalue Var 0x1c31208 'k' 'int' |
2502 | // | | | `-IntegerLiteral 0x1c312d0 'int' 256 |
2503 | // | | |-UnaryOperator 0x1c31348 'int' prefix '++' |
2504 | // | | | `-DeclRefExpr 0x1c31328 'int' lvalue Var 0x1c31208 'k' 'int' |
2505 | // | | `-CompoundStmt 0x1c31e18 |
2506 | // | | `-OMPOrderedDirective 0x1c31dd8 |
2507 | // | | |-OMPSimdClause 0x1c31380 |
2508 | // | | `-CapturedStmt 0x1c31cd0 |
2509 | // ``` |
2510 | // |
2511 | // Note the presence of `OMPOrderedDirective` above: |
2512 | // It's (transitively) nested in a `CapturedStmt` representing the pragma |
2513 | // annotated compound statement. Thus, we need to consider this nesting and |
2514 | // include checking the `getCapturedStmt` in this case. |
2515 | static bool hasOrderedDirective(const Stmt *S) { |
2516 | if (isa<OMPOrderedDirective>(Val: S)) |
2517 | return true; |
2518 | |
2519 | if (const auto *CS = dyn_cast<CapturedStmt>(Val: S)) |
2520 | return hasOrderedDirective(S: CS->getCapturedStmt()); |
2521 | |
2522 | for (const Stmt *Child : S->children()) { |
2523 | if (Child && hasOrderedDirective(S: Child)) |
2524 | return true; |
2525 | } |
2526 | |
2527 | return false; |
2528 | } |
2529 | |
2530 | static void applyConservativeSimdOrderedDirective(const Stmt &AssociatedStmt, |
2531 | LoopInfoStack &LoopStack) { |
2532 | // Check for the presence of an `OMPOrderedDirective` |
2533 | // i.e., `ordered` in `#pragma omp ordered simd` |
2534 | bool HasOrderedDirective = hasOrderedDirective(S: &AssociatedStmt); |
2535 | // If present then conservatively disable loop vectorization |
2536 | // analogously to how `emitSimdlenSafelenClause` does. |
2537 | if (HasOrderedDirective) |
2538 | LoopStack.setParallel(/*Enable=*/false); |
2539 | } |
2540 | |
2541 | void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) { |
2542 | // Walk clauses and process safelen/lastprivate. |
2543 | LoopStack.setParallel(/*Enable=*/true); |
2544 | LoopStack.setVectorizeEnable(); |
2545 | const Stmt *AssociatedStmt = D.getAssociatedStmt(); |
2546 | applyConservativeSimdOrderedDirective(AssociatedStmt: *AssociatedStmt, LoopStack); |
2547 | emitSimdlenSafelenClause(CGF&: *this, D); |
2548 | if (const auto *C = D.getSingleClause<OMPOrderClause>()) |
2549 | if (C->getKind() == OMPC_ORDER_concurrent) |
2550 | LoopStack.setParallel(/*Enable=*/true); |
2551 | OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D); |
2552 | if ((EKind == OMPD_simd || |
2553 | (getLangOpts().OpenMPSimd && isOpenMPSimdDirective(DKind: EKind))) && |
2554 | llvm::any_of(Range: D.getClausesOfKind<OMPReductionClause>(), |
2555 | P: [](const OMPReductionClause *C) { |
2556 | return C->getModifier() == OMPC_REDUCTION_inscan; |
2557 | })) |
2558 | // Disable parallel access in case of prefix sum. |
2559 | LoopStack.setParallel(/*Enable=*/false); |
2560 | } |
2561 | |
2562 | void CodeGenFunction::EmitOMPSimdFinal( |
2563 | const OMPLoopDirective &D, |
2564 | const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { |
2565 | if (!HaveInsertPoint()) |
2566 | return; |
2567 | llvm::BasicBlock *DoneBB = nullptr; |
2568 | auto IC = D.counters().begin(); |
2569 | auto IPC = D.private_counters().begin(); |
2570 | for (const Expr *F : D.finals()) { |
2571 | const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: (*IC))->getDecl()); |
2572 | const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: (*IPC))->getDecl()); |
2573 | const auto *CED = dyn_cast<OMPCapturedExprDecl>(Val: OrigVD); |
2574 | if (LocalDeclMap.count(Val: OrigVD) || CapturedStmtInfo->lookup(VD: OrigVD) || |
2575 | OrigVD->hasGlobalStorage() || CED) { |
2576 | if (!DoneBB) { |
2577 | if (llvm::Value *Cond = CondGen(*this)) { |
2578 | // If the first post-update expression is found, emit conditional |
2579 | // block if it was requested. |
2580 | llvm::BasicBlock *ThenBB = createBasicBlock(name: ".omp.final.then" ); |
2581 | DoneBB = createBasicBlock(name: ".omp.final.done" ); |
2582 | Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB); |
2583 | EmitBlock(BB: ThenBB); |
2584 | } |
2585 | } |
2586 | Address OrigAddr = Address::invalid(); |
2587 | if (CED) { |
2588 | OrigAddr = EmitLValue(E: CED->getInit()->IgnoreImpCasts()).getAddress(); |
2589 | } else { |
2590 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD), |
2591 | /*RefersToEnclosingVariableOrCapture=*/false, |
2592 | (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); |
2593 | OrigAddr = EmitLValue(E: &DRE).getAddress(); |
2594 | } |
2595 | OMPPrivateScope VarScope(*this); |
2596 | VarScope.addPrivate(LocalVD: OrigVD, Addr: OrigAddr); |
2597 | (void)VarScope.Privatize(); |
2598 | EmitIgnoredExpr(E: F); |
2599 | } |
2600 | ++IC; |
2601 | ++IPC; |
2602 | } |
2603 | if (DoneBB) |
2604 | EmitBlock(BB: DoneBB, /*IsFinished=*/true); |
2605 | } |
2606 | |
2607 | static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, |
2608 | const OMPLoopDirective &S, |
2609 | CodeGenFunction::JumpDest LoopExit) { |
2610 | CGF.EmitOMPLoopBody(D: S, LoopExit); |
2611 | CGF.EmitStopPoint(S: &S); |
2612 | } |
2613 | |
2614 | /// Emit a helper variable and return corresponding lvalue. |
2615 | static LValue EmitOMPHelperVar(CodeGenFunction &CGF, |
2616 | const DeclRefExpr *Helper) { |
2617 | auto VDecl = cast<VarDecl>(Val: Helper->getDecl()); |
2618 | CGF.EmitVarDecl(D: *VDecl); |
2619 | return CGF.EmitLValue(E: Helper); |
2620 | } |
2621 | |
2622 | static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S, |
2623 | const RegionCodeGenTy &SimdInitGen, |
2624 | const RegionCodeGenTy &BodyCodeGen) { |
2625 | auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF, |
2626 | PrePostActionTy &) { |
2627 | CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S); |
2628 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
2629 | SimdInitGen(CGF); |
2630 | |
2631 | BodyCodeGen(CGF); |
2632 | }; |
2633 | auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) { |
2634 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
2635 | CGF.LoopStack.setVectorizeEnable(/*Enable=*/false); |
2636 | |
2637 | BodyCodeGen(CGF); |
2638 | }; |
2639 | const Expr *IfCond = nullptr; |
2640 | OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); |
2641 | if (isOpenMPSimdDirective(DKind: EKind)) { |
2642 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
2643 | if (CGF.getLangOpts().OpenMP >= 50 && |
2644 | (C->getNameModifier() == OMPD_unknown || |
2645 | C->getNameModifier() == OMPD_simd)) { |
2646 | IfCond = C->getCondition(); |
2647 | break; |
2648 | } |
2649 | } |
2650 | } |
2651 | if (IfCond) { |
2652 | CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, Cond: IfCond, ThenGen, ElseGen); |
2653 | } else { |
2654 | RegionCodeGenTy ThenRCG(ThenGen); |
2655 | ThenRCG(CGF); |
2656 | } |
2657 | } |
2658 | |
2659 | static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, |
2660 | PrePostActionTy &Action) { |
2661 | Action.Enter(CGF); |
2662 | OMPLoopScope PreInitScope(CGF, S); |
2663 | // if (PreCond) { |
2664 | // for (IV in 0..LastIteration) BODY; |
2665 | // <Final counter/linear vars updates>; |
2666 | // } |
2667 | |
2668 | // The presence of lower/upper bound variable depends on the actual directive |
2669 | // kind in the AST node. The variables must be emitted because some of the |
2670 | // expressions associated with the loop will use them. |
2671 | OpenMPDirectiveKind DKind = S.getDirectiveKind(); |
2672 | if (isOpenMPDistributeDirective(DKind) || |
2673 | isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) || |
2674 | isOpenMPGenericLoopDirective(DKind)) { |
2675 | (void)EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: S.getLowerBoundVariable())); |
2676 | (void)EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: S.getUpperBoundVariable())); |
2677 | } |
2678 | |
2679 | OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); |
2680 | // Emit: if (PreCond) - begin. |
2681 | // If the condition constant folds and can be elided, avoid emitting the |
2682 | // whole loop. |
2683 | bool CondConstant; |
2684 | llvm::BasicBlock *ContBlock = nullptr; |
2685 | if (CGF.ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) { |
2686 | if (!CondConstant) |
2687 | return; |
2688 | } else { |
2689 | llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "simd.if.then" ); |
2690 | ContBlock = CGF.createBasicBlock(name: "simd.if.end" ); |
2691 | emitPreCond(CGF, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock, |
2692 | TrueCount: CGF.getProfileCount(S: &S)); |
2693 | CGF.EmitBlock(BB: ThenBlock); |
2694 | CGF.incrementProfileCounter(S: &S); |
2695 | } |
2696 | |
2697 | // Emit the loop iteration variable. |
2698 | const Expr *IVExpr = S.getIterationVariable(); |
2699 | const auto *IVDecl = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IVExpr)->getDecl()); |
2700 | CGF.EmitVarDecl(D: *IVDecl); |
2701 | CGF.EmitIgnoredExpr(E: S.getInit()); |
2702 | |
2703 | // Emit the iterations count variable. |
2704 | // If it is not a variable, Sema decided to calculate iterations count on |
2705 | // each iteration (e.g., it is foldable into a constant). |
2706 | if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) { |
2707 | CGF.EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl())); |
2708 | // Emit calculation of the iterations count. |
2709 | CGF.EmitIgnoredExpr(E: S.getCalcLastIteration()); |
2710 | } |
2711 | |
2712 | emitAlignedClause(CGF, D: S); |
2713 | (void)CGF.EmitOMPLinearClauseInit(D: S); |
2714 | { |
2715 | CodeGenFunction::OMPPrivateScope LoopScope(CGF); |
2716 | CGF.EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope); |
2717 | CGF.EmitOMPPrivateLoopCounters(S, LoopScope); |
2718 | CGF.EmitOMPLinearClause(D: S, PrivateScope&: LoopScope); |
2719 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope); |
2720 | CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( |
2721 | CGF, S, CGF.EmitLValue(E: S.getIterationVariable())); |
2722 | bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope); |
2723 | (void)LoopScope.Privatize(); |
2724 | if (isOpenMPTargetExecutionDirective(DKind: EKind)) |
2725 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S); |
2726 | |
2727 | emitCommonSimdLoop( |
2728 | CGF, S, |
2729 | SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
2730 | CGF.EmitOMPSimdInit(D: S); |
2731 | }, |
2732 | BodyCodeGen: [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
2733 | CGF.EmitOMPInnerLoop( |
2734 | S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: S.getCond(), IncExpr: S.getInc(), |
2735 | BodyGen: [&S](CodeGenFunction &CGF) { |
2736 | emitOMPLoopBodyWithStopPoint(CGF, S, |
2737 | LoopExit: CodeGenFunction::JumpDest()); |
2738 | }, |
2739 | PostIncGen: [](CodeGenFunction &) {}); |
2740 | }); |
2741 | CGF.EmitOMPSimdFinal(D: S, CondGen: [](CodeGenFunction &) { return nullptr; }); |
2742 | // Emit final copy of the lastprivate variables at the end of loops. |
2743 | if (HasLastprivateClause) |
2744 | CGF.EmitOMPLastprivateClauseFinal(D: S, /*NoFinals=*/true); |
2745 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_simd); |
2746 | emitPostUpdateForReductionClause(CGF, D: S, |
2747 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
2748 | LoopScope.restoreMap(); |
2749 | CGF.EmitOMPLinearClauseFinal(D: S, CondGen: [](CodeGenFunction &) { return nullptr; }); |
2750 | } |
2751 | // Emit: if (PreCond) - end. |
2752 | if (ContBlock) { |
2753 | CGF.EmitBranch(Block: ContBlock); |
2754 | CGF.EmitBlock(BB: ContBlock, IsFinished: true); |
2755 | } |
2756 | } |
2757 | |
2758 | // Pass OMPLoopDirective (instead of OMPSimdDirective) to make this function |
2759 | // available for "loop bind(thread)", which maps to "simd". |
2760 | static bool isSimdSupportedByOpenMPIRBuilder(const OMPLoopDirective &S) { |
2761 | // Check for unsupported clauses |
2762 | for (OMPClause *C : S.clauses()) { |
2763 | // Currently only order, simdlen and safelen clauses are supported |
2764 | if (!(isa<OMPSimdlenClause>(Val: C) || isa<OMPSafelenClause>(Val: C) || |
2765 | isa<OMPOrderClause>(Val: C) || isa<OMPAlignedClause>(Val: C))) |
2766 | return false; |
2767 | } |
2768 | |
2769 | // Check if we have a statement with the ordered directive. |
2770 | // Visit the statement hierarchy to find a compound statement |
2771 | // with a ordered directive in it. |
2772 | if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(Val: S.getRawStmt())) { |
2773 | if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) { |
2774 | for (const Stmt *SubStmt : SyntacticalLoop->children()) { |
2775 | if (!SubStmt) |
2776 | continue; |
2777 | if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(Val: SubStmt)) { |
2778 | for (const Stmt *CSSubStmt : CS->children()) { |
2779 | if (!CSSubStmt) |
2780 | continue; |
2781 | if (isa<OMPOrderedDirective>(Val: CSSubStmt)) { |
2782 | return false; |
2783 | } |
2784 | } |
2785 | } |
2786 | } |
2787 | } |
2788 | } |
2789 | return true; |
2790 | } |
2791 | |
2792 | static llvm::MapVector<llvm::Value *, llvm::Value *> |
2793 | GetAlignedMapping(const OMPLoopDirective &S, CodeGenFunction &CGF) { |
2794 | llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars; |
2795 | for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) { |
2796 | llvm::APInt ClauseAlignment(64, 0); |
2797 | if (const Expr *AlignmentExpr = Clause->getAlignment()) { |
2798 | auto *AlignmentCI = |
2799 | cast<llvm::ConstantInt>(Val: CGF.EmitScalarExpr(E: AlignmentExpr)); |
2800 | ClauseAlignment = AlignmentCI->getValue(); |
2801 | } |
2802 | for (const Expr *E : Clause->varlist()) { |
2803 | llvm::APInt Alignment(ClauseAlignment); |
2804 | if (Alignment == 0) { |
2805 | // OpenMP [2.8.1, Description] |
2806 | // If no optional parameter is specified, implementation-defined default |
2807 | // alignments for SIMD instructions on the target platforms are assumed. |
2808 | Alignment = |
2809 | CGF.getContext() |
2810 | .toCharUnitsFromBits(BitSize: CGF.getContext().getOpenMPDefaultSimdAlign( |
2811 | T: E->getType()->getPointeeType())) |
2812 | .getQuantity(); |
2813 | } |
2814 | assert((Alignment == 0 || Alignment.isPowerOf2()) && |
2815 | "alignment is not power of 2" ); |
2816 | llvm::Value *PtrValue = CGF.EmitScalarExpr(E); |
2817 | AlignedVars[PtrValue] = CGF.Builder.getInt64(C: Alignment.getSExtValue()); |
2818 | } |
2819 | } |
2820 | return AlignedVars; |
2821 | } |
2822 | |
2823 | // Pass OMPLoopDirective (instead of OMPSimdDirective) to make this function |
2824 | // available for "loop bind(thread)", which maps to "simd". |
2825 | static void emitOMPSimdDirective(const OMPLoopDirective &S, |
2826 | CodeGenFunction &CGF, CodeGenModule &CGM) { |
2827 | bool UseOMPIRBuilder = |
2828 | CGM.getLangOpts().OpenMPIRBuilder && isSimdSupportedByOpenMPIRBuilder(S); |
2829 | if (UseOMPIRBuilder) { |
2830 | auto &&CodeGenIRBuilder = [&S, &CGM, UseOMPIRBuilder](CodeGenFunction &CGF, |
2831 | PrePostActionTy &) { |
2832 | // Use the OpenMPIRBuilder if enabled. |
2833 | if (UseOMPIRBuilder) { |
2834 | llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars = |
2835 | GetAlignedMapping(S, CGF); |
2836 | // Emit the associated statement and get its loop representation. |
2837 | const Stmt *Inner = S.getRawStmt(); |
2838 | llvm::CanonicalLoopInfo *CLI = |
2839 | CGF.EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1); |
2840 | |
2841 | llvm::OpenMPIRBuilder &OMPBuilder = |
2842 | CGM.getOpenMPRuntime().getOMPBuilder(); |
2843 | // Add SIMD specific metadata |
2844 | llvm::ConstantInt *Simdlen = nullptr; |
2845 | if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) { |
2846 | RValue Len = CGF.EmitAnyExpr(E: C->getSimdlen(), aggSlot: AggValueSlot::ignored(), |
2847 | /*ignoreResult=*/true); |
2848 | auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal()); |
2849 | Simdlen = Val; |
2850 | } |
2851 | llvm::ConstantInt *Safelen = nullptr; |
2852 | if (const auto *C = S.getSingleClause<OMPSafelenClause>()) { |
2853 | RValue Len = CGF.EmitAnyExpr(E: C->getSafelen(), aggSlot: AggValueSlot::ignored(), |
2854 | /*ignoreResult=*/true); |
2855 | auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal()); |
2856 | Safelen = Val; |
2857 | } |
2858 | llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown; |
2859 | if (const auto *C = S.getSingleClause<OMPOrderClause>()) { |
2860 | if (C->getKind() == OpenMPOrderClauseKind::OMPC_ORDER_concurrent) { |
2861 | Order = llvm::omp::OrderKind::OMP_ORDER_concurrent; |
2862 | } |
2863 | } |
2864 | // Add simd metadata to the collapsed loop. Do not generate |
2865 | // another loop for if clause. Support for if clause is done earlier. |
2866 | OMPBuilder.applySimd(Loop: CLI, AlignedVars, |
2867 | /*IfCond*/ nullptr, Order, Simdlen, Safelen); |
2868 | return; |
2869 | } |
2870 | }; |
2871 | { |
2872 | auto LPCRegion = |
2873 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); |
2874 | OMPLexicalScope Scope(CGF, S, OMPD_unknown); |
2875 | CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_simd, |
2876 | CodeGen: CodeGenIRBuilder); |
2877 | } |
2878 | return; |
2879 | } |
2880 | |
2881 | CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S); |
2882 | CGF.OMPFirstScanLoop = true; |
2883 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
2884 | emitOMPSimdRegion(CGF, S, Action); |
2885 | }; |
2886 | { |
2887 | auto LPCRegion = |
2888 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); |
2889 | OMPLexicalScope Scope(CGF, S, OMPD_unknown); |
2890 | CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_simd, CodeGen); |
2891 | } |
2892 | // Check for outer lastprivate conditional update. |
2893 | checkForLastprivateConditionalUpdate(CGF, S); |
2894 | } |
2895 | |
2896 | void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { |
2897 | emitOMPSimdDirective(S, CGF&: *this, CGM); |
2898 | } |
2899 | |
2900 | void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) { |
2901 | // Emit the de-sugared statement. |
2902 | OMPTransformDirectiveScopeRAII TileScope(*this, &S); |
2903 | EmitStmt(S: S.getTransformedStmt()); |
2904 | } |
2905 | |
2906 | void CodeGenFunction::EmitOMPStripeDirective(const OMPStripeDirective &S) { |
2907 | // Emit the de-sugared statement. |
2908 | OMPTransformDirectiveScopeRAII StripeScope(*this, &S); |
2909 | EmitStmt(S: S.getTransformedStmt()); |
2910 | } |
2911 | |
2912 | void CodeGenFunction::EmitOMPReverseDirective(const OMPReverseDirective &S) { |
2913 | // Emit the de-sugared statement. |
2914 | OMPTransformDirectiveScopeRAII ReverseScope(*this, &S); |
2915 | EmitStmt(S: S.getTransformedStmt()); |
2916 | } |
2917 | |
2918 | void CodeGenFunction::EmitOMPInterchangeDirective( |
2919 | const OMPInterchangeDirective &S) { |
2920 | // Emit the de-sugared statement. |
2921 | OMPTransformDirectiveScopeRAII InterchangeScope(*this, &S); |
2922 | EmitStmt(S: S.getTransformedStmt()); |
2923 | } |
2924 | |
2925 | void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { |
2926 | bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder; |
2927 | |
2928 | if (UseOMPIRBuilder) { |
2929 | auto DL = SourceLocToDebugLoc(Location: S.getBeginLoc()); |
2930 | const Stmt *Inner = S.getRawStmt(); |
2931 | |
2932 | // Consume nested loop. Clear the entire remaining loop stack because a |
2933 | // fully unrolled loop is non-transformable. For partial unrolling the |
2934 | // generated outer loop is pushed back to the stack. |
2935 | llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1); |
2936 | OMPLoopNestStack.clear(); |
2937 | |
2938 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
2939 | |
2940 | bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1; |
2941 | llvm::CanonicalLoopInfo *UnrolledCLI = nullptr; |
2942 | |
2943 | if (S.hasClausesOfKind<OMPFullClause>()) { |
2944 | assert(ExpectedOMPLoopDepth == 0); |
2945 | OMPBuilder.unrollLoopFull(DL, Loop: CLI); |
2946 | } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { |
2947 | uint64_t Factor = 0; |
2948 | if (Expr *FactorExpr = PartialClause->getFactor()) { |
2949 | Factor = FactorExpr->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue(); |
2950 | assert(Factor >= 1 && "Only positive factors are valid" ); |
2951 | } |
2952 | OMPBuilder.unrollLoopPartial(DL, Loop: CLI, Factor, |
2953 | UnrolledCLI: NeedsUnrolledCLI ? &UnrolledCLI : nullptr); |
2954 | } else { |
2955 | OMPBuilder.unrollLoopHeuristic(DL, Loop: CLI); |
2956 | } |
2957 | |
2958 | assert((!NeedsUnrolledCLI || UnrolledCLI) && |
2959 | "NeedsUnrolledCLI implies UnrolledCLI to be set" ); |
2960 | if (UnrolledCLI) |
2961 | OMPLoopNestStack.push_back(Elt: UnrolledCLI); |
2962 | |
2963 | return; |
2964 | } |
2965 | |
2966 | // This function is only called if the unrolled loop is not consumed by any |
2967 | // other loop-associated construct. Such a loop-associated construct will have |
2968 | // used the transformed AST. |
2969 | |
2970 | // Set the unroll metadata for the next emitted loop. |
2971 | LoopStack.setUnrollState(LoopAttributes::Enable); |
2972 | |
2973 | if (S.hasClausesOfKind<OMPFullClause>()) { |
2974 | LoopStack.setUnrollState(LoopAttributes::Full); |
2975 | } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { |
2976 | if (Expr *FactorExpr = PartialClause->getFactor()) { |
2977 | uint64_t Factor = |
2978 | FactorExpr->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue(); |
2979 | assert(Factor >= 1 && "Only positive factors are valid" ); |
2980 | LoopStack.setUnrollCount(Factor); |
2981 | } |
2982 | } |
2983 | |
2984 | EmitStmt(S: S.getAssociatedStmt()); |
2985 | } |
2986 | |
2987 | void CodeGenFunction::EmitOMPOuterLoop( |
2988 | bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, |
2989 | CodeGenFunction::OMPPrivateScope &LoopScope, |
2990 | const CodeGenFunction::OMPLoopArguments &LoopArgs, |
2991 | const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, |
2992 | const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { |
2993 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
2994 | |
2995 | const Expr *IVExpr = S.getIterationVariable(); |
2996 | const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType()); |
2997 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
2998 | |
2999 | JumpDest LoopExit = getJumpDestInCurrentScope(Name: "omp.dispatch.end" ); |
3000 | |
3001 | // Start the loop with a block that tests the condition. |
3002 | llvm::BasicBlock *CondBlock = createBasicBlock(name: "omp.dispatch.cond" ); |
3003 | EmitBlock(BB: CondBlock); |
3004 | const SourceRange R = S.getSourceRange(); |
3005 | OMPLoopNestStack.clear(); |
3006 | LoopStack.push(Header: CondBlock, StartLoc: SourceLocToDebugLoc(Location: R.getBegin()), |
3007 | EndLoc: SourceLocToDebugLoc(Location: R.getEnd())); |
3008 | |
3009 | llvm::Value *BoolCondVal = nullptr; |
3010 | if (!DynamicOrOrdered) { |
3011 | // UB = min(UB, GlobalUB) or |
3012 | // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. |
3013 | // 'distribute parallel for') |
3014 | EmitIgnoredExpr(E: LoopArgs.EUB); |
3015 | // IV = LB |
3016 | EmitIgnoredExpr(E: LoopArgs.Init); |
3017 | // IV < UB |
3018 | BoolCondVal = EvaluateExprAsBool(E: LoopArgs.Cond); |
3019 | } else { |
3020 | BoolCondVal = |
3021 | RT.emitForNext(CGF&: *this, Loc: S.getBeginLoc(), IVSize, IVSigned, IL: LoopArgs.IL, |
3022 | LB: LoopArgs.LB, UB: LoopArgs.UB, ST: LoopArgs.ST); |
3023 | } |
3024 | |
3025 | // If there are any cleanups between here and the loop-exit scope, |
3026 | // create a block to stage a loop exit along. |
3027 | llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); |
3028 | if (LoopScope.requiresCleanups()) |
3029 | ExitBlock = createBasicBlock(name: "omp.dispatch.cleanup" ); |
3030 | |
3031 | llvm::BasicBlock *LoopBody = createBasicBlock(name: "omp.dispatch.body" ); |
3032 | Builder.CreateCondBr(Cond: BoolCondVal, True: LoopBody, False: ExitBlock); |
3033 | if (ExitBlock != LoopExit.getBlock()) { |
3034 | EmitBlock(BB: ExitBlock); |
3035 | EmitBranchThroughCleanup(Dest: LoopExit); |
3036 | } |
3037 | EmitBlock(BB: LoopBody); |
3038 | |
3039 | // Emit "IV = LB" (in case of static schedule, we have already calculated new |
3040 | // LB for loop condition and emitted it above). |
3041 | if (DynamicOrOrdered) |
3042 | EmitIgnoredExpr(E: LoopArgs.Init); |
3043 | |
3044 | // Create a block for the increment. |
3045 | JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.dispatch.inc" ); |
3046 | BreakContinueStack.push_back(Elt: BreakContinue(LoopExit, Continue)); |
3047 | |
3048 | OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); |
3049 | emitCommonSimdLoop( |
3050 | CGF&: *this, S, |
3051 | SimdInitGen: [&S, IsMonotonic, EKind](CodeGenFunction &CGF, PrePostActionTy &) { |
3052 | // Generate !llvm.loop.parallel metadata for loads and stores for loops |
3053 | // with dynamic/guided scheduling and without ordered clause. |
3054 | if (!isOpenMPSimdDirective(DKind: EKind)) { |
3055 | CGF.LoopStack.setParallel(!IsMonotonic); |
3056 | if (const auto *C = S.getSingleClause<OMPOrderClause>()) |
3057 | if (C->getKind() == OMPC_ORDER_concurrent) |
3058 | CGF.LoopStack.setParallel(/*Enable=*/true); |
3059 | } else { |
3060 | CGF.EmitOMPSimdInit(D: S); |
3061 | } |
3062 | }, |
3063 | BodyCodeGen: [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, |
3064 | &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
3065 | SourceLocation Loc = S.getBeginLoc(); |
3066 | // when 'distribute' is not combined with a 'for': |
3067 | // while (idx <= UB) { BODY; ++idx; } |
3068 | // when 'distribute' is combined with a 'for' |
3069 | // (e.g. 'distribute parallel for') |
3070 | // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } |
3071 | CGF.EmitOMPInnerLoop( |
3072 | S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: LoopArgs.Cond, IncExpr: LoopArgs.IncExpr, |
3073 | BodyGen: [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { |
3074 | CodeGenLoop(CGF, S, LoopExit); |
3075 | }, |
3076 | PostIncGen: [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { |
3077 | CodeGenOrdered(CGF, Loc, IVSize, IVSigned); |
3078 | }); |
3079 | }); |
3080 | |
3081 | EmitBlock(BB: Continue.getBlock()); |
3082 | BreakContinueStack.pop_back(); |
3083 | if (!DynamicOrOrdered) { |
3084 | // Emit "LB = LB + Stride", "UB = UB + Stride". |
3085 | EmitIgnoredExpr(E: LoopArgs.NextLB); |
3086 | EmitIgnoredExpr(E: LoopArgs.NextUB); |
3087 | } |
3088 | |
3089 | EmitBranch(Block: CondBlock); |
3090 | OMPLoopNestStack.clear(); |
3091 | LoopStack.pop(); |
3092 | // Emit the fall-through block. |
3093 | EmitBlock(BB: LoopExit.getBlock()); |
3094 | |
3095 | // Tell the runtime we are done. |
3096 | auto &&CodeGen = [DynamicOrOrdered, &S, &LoopArgs](CodeGenFunction &CGF) { |
3097 | if (!DynamicOrOrdered) |
3098 | CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, Loc: S.getEndLoc(), |
3099 | DKind: LoopArgs.DKind); |
3100 | }; |
3101 | OMPCancelStack.emitExit(CGF&: *this, Kind: EKind, CodeGen); |
3102 | } |
3103 | |
3104 | void CodeGenFunction::EmitOMPForOuterLoop( |
3105 | const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, |
3106 | const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, |
3107 | const OMPLoopArguments &LoopArgs, |
3108 | const CodeGenDispatchBoundsTy &CGDispatchBounds) { |
3109 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
3110 | |
3111 | // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). |
3112 | const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind: ScheduleKind.Schedule); |
3113 | |
3114 | assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule, |
3115 | LoopArgs.Chunk != nullptr)) && |
3116 | "static non-chunked schedule does not need outer loop" ); |
3117 | |
3118 | // Emit outer loop. |
3119 | // |
3120 | // OpenMP [2.7.1, Loop Construct, Description, table 2-1] |
3121 | // When schedule(dynamic,chunk_size) is specified, the iterations are |
3122 | // distributed to threads in the team in chunks as the threads request them. |
3123 | // Each thread executes a chunk of iterations, then requests another chunk, |
3124 | // until no chunks remain to be distributed. Each chunk contains chunk_size |
3125 | // iterations, except for the last chunk to be distributed, which may have |
3126 | // fewer iterations. When no chunk_size is specified, it defaults to 1. |
3127 | // |
3128 | // When schedule(guided,chunk_size) is specified, the iterations are assigned |
3129 | // to threads in the team in chunks as the executing threads request them. |
3130 | // Each thread executes a chunk of iterations, then requests another chunk, |
3131 | // until no chunks remain to be assigned. For a chunk_size of 1, the size of |
3132 | // each chunk is proportional to the number of unassigned iterations divided |
3133 | // by the number of threads in the team, decreasing to 1. For a chunk_size |
3134 | // with value k (greater than 1), the size of each chunk is determined in the |
3135 | // same way, with the restriction that the chunks do not contain fewer than k |
3136 | // iterations (except for the last chunk to be assigned, which may have fewer |
3137 | // than k iterations). |
3138 | // |
3139 | // When schedule(auto) is specified, the decision regarding scheduling is |
3140 | // delegated to the compiler and/or runtime system. The programmer gives the |
3141 | // implementation the freedom to choose any possible mapping of iterations to |
3142 | // threads in the team. |
3143 | // |
3144 | // When schedule(runtime) is specified, the decision regarding scheduling is |
3145 | // deferred until run time, and the schedule and chunk size are taken from the |
3146 | // run-sched-var ICV. If the ICV is set to auto, the schedule is |
3147 | // implementation defined |
3148 | // |
3149 | // __kmpc_dispatch_init(); |
3150 | // while(__kmpc_dispatch_next(&LB, &UB)) { |
3151 | // idx = LB; |
3152 | // while (idx <= UB) { BODY; ++idx; |
3153 | // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. |
3154 | // } // inner loop |
3155 | // } |
3156 | // __kmpc_dispatch_deinit(); |
3157 | // |
3158 | // OpenMP [2.7.1, Loop Construct, Description, table 2-1] |
3159 | // When schedule(static, chunk_size) is specified, iterations are divided into |
3160 | // chunks of size chunk_size, and the chunks are assigned to the threads in |
3161 | // the team in a round-robin fashion in the order of the thread number. |
3162 | // |
3163 | // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { |
3164 | // while (idx <= UB) { BODY; ++idx; } // inner loop |
3165 | // LB = LB + ST; |
3166 | // UB = UB + ST; |
3167 | // } |
3168 | // |
3169 | |
3170 | const Expr *IVExpr = S.getIterationVariable(); |
3171 | const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType()); |
3172 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
3173 | |
3174 | if (DynamicOrOrdered) { |
3175 | const std::pair<llvm::Value *, llvm::Value *> DispatchBounds = |
3176 | CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); |
3177 | llvm::Value *LBVal = DispatchBounds.first; |
3178 | llvm::Value *UBVal = DispatchBounds.second; |
3179 | CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, |
3180 | LoopArgs.Chunk}; |
3181 | RT.emitForDispatchInit(CGF&: *this, Loc: S.getBeginLoc(), ScheduleKind, IVSize, |
3182 | IVSigned, Ordered, DispatchValues: DipatchRTInputValues); |
3183 | } else { |
3184 | CGOpenMPRuntime::StaticRTInput StaticInit( |
3185 | IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, |
3186 | LoopArgs.ST, LoopArgs.Chunk); |
3187 | OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); |
3188 | RT.emitForStaticInit(CGF&: *this, Loc: S.getBeginLoc(), DKind: EKind, ScheduleKind, |
3189 | Values: StaticInit); |
3190 | } |
3191 | |
3192 | auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, |
3193 | const unsigned IVSize, |
3194 | const bool IVSigned) { |
3195 | if (Ordered) { |
3196 | CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, |
3197 | IVSigned); |
3198 | } |
3199 | }; |
3200 | |
3201 | OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, |
3202 | LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); |
3203 | OuterLoopArgs.IncExpr = S.getInc(); |
3204 | OuterLoopArgs.Init = S.getInit(); |
3205 | OuterLoopArgs.Cond = S.getCond(); |
3206 | OuterLoopArgs.NextLB = S.getNextLowerBound(); |
3207 | OuterLoopArgs.NextUB = S.getNextUpperBound(); |
3208 | OuterLoopArgs.DKind = LoopArgs.DKind; |
3209 | EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, LoopArgs: OuterLoopArgs, |
3210 | CodeGenLoop: emitOMPLoopBodyWithStopPoint, CodeGenOrdered); |
3211 | if (DynamicOrOrdered) { |
3212 | RT.emitForDispatchDeinit(CGF&: *this, Loc: S.getBeginLoc()); |
3213 | } |
3214 | } |
3215 | |
3216 | static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, |
3217 | const unsigned IVSize, const bool IVSigned) {} |
3218 | |
3219 | void CodeGenFunction::EmitOMPDistributeOuterLoop( |
3220 | OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, |
3221 | OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, |
3222 | const CodeGenLoopTy &CodeGenLoopContent) { |
3223 | |
3224 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
3225 | |
3226 | // Emit outer loop. |
3227 | // Same behavior as a OMPForOuterLoop, except that schedule cannot be |
3228 | // dynamic |
3229 | // |
3230 | |
3231 | const Expr *IVExpr = S.getIterationVariable(); |
3232 | const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType()); |
3233 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
3234 | OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); |
3235 | |
3236 | CGOpenMPRuntime::StaticRTInput StaticInit( |
3237 | IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, |
3238 | LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); |
3239 | RT.emitDistributeStaticInit(CGF&: *this, Loc: S.getBeginLoc(), SchedKind: ScheduleKind, Values: StaticInit); |
3240 | |
3241 | // for combined 'distribute' and 'for' the increment expression of distribute |
3242 | // is stored in DistInc. For 'distribute' alone, it is in Inc. |
3243 | Expr *IncExpr; |
3244 | if (isOpenMPLoopBoundSharingDirective(Kind: EKind)) |
3245 | IncExpr = S.getDistInc(); |
3246 | else |
3247 | IncExpr = S.getInc(); |
3248 | |
3249 | // this routine is shared by 'omp distribute parallel for' and |
3250 | // 'omp distribute': select the right EUB expression depending on the |
3251 | // directive |
3252 | OMPLoopArguments OuterLoopArgs; |
3253 | OuterLoopArgs.LB = LoopArgs.LB; |
3254 | OuterLoopArgs.UB = LoopArgs.UB; |
3255 | OuterLoopArgs.ST = LoopArgs.ST; |
3256 | OuterLoopArgs.IL = LoopArgs.IL; |
3257 | OuterLoopArgs.Chunk = LoopArgs.Chunk; |
3258 | OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(Kind: EKind) |
3259 | ? S.getCombinedEnsureUpperBound() |
3260 | : S.getEnsureUpperBound(); |
3261 | OuterLoopArgs.IncExpr = IncExpr; |
3262 | OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(Kind: EKind) |
3263 | ? S.getCombinedInit() |
3264 | : S.getInit(); |
3265 | OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(Kind: EKind) |
3266 | ? S.getCombinedCond() |
3267 | : S.getCond(); |
3268 | OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(Kind: EKind) |
3269 | ? S.getCombinedNextLowerBound() |
3270 | : S.getNextLowerBound(); |
3271 | OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(Kind: EKind) |
3272 | ? S.getCombinedNextUpperBound() |
3273 | : S.getNextUpperBound(); |
3274 | OuterLoopArgs.DKind = OMPD_distribute; |
3275 | |
3276 | EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, |
3277 | LoopScope, LoopArgs: OuterLoopArgs, CodeGenLoop: CodeGenLoopContent, |
3278 | CodeGenOrdered: emitEmptyOrdered); |
3279 | } |
3280 | |
3281 | static std::pair<LValue, LValue> |
3282 | emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, |
3283 | const OMPExecutableDirective &S) { |
3284 | const OMPLoopDirective &LS = cast<OMPLoopDirective>(Val: S); |
3285 | LValue LB = |
3286 | EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getLowerBoundVariable())); |
3287 | LValue UB = |
3288 | EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getUpperBoundVariable())); |
3289 | |
3290 | // When composing 'distribute' with 'for' (e.g. as in 'distribute |
3291 | // parallel for') we need to use the 'distribute' |
3292 | // chunk lower and upper bounds rather than the whole loop iteration |
3293 | // space. These are parameters to the outlined function for 'parallel' |
3294 | // and we copy the bounds of the previous schedule into the |
3295 | // the current ones. |
3296 | LValue PrevLB = CGF.EmitLValue(E: LS.getPrevLowerBoundVariable()); |
3297 | LValue PrevUB = CGF.EmitLValue(E: LS.getPrevUpperBoundVariable()); |
3298 | llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar( |
3299 | lvalue: PrevLB, Loc: LS.getPrevLowerBoundVariable()->getExprLoc()); |
3300 | PrevLBVal = CGF.EmitScalarConversion( |
3301 | Src: PrevLBVal, SrcTy: LS.getPrevLowerBoundVariable()->getType(), |
3302 | DstTy: LS.getIterationVariable()->getType(), |
3303 | Loc: LS.getPrevLowerBoundVariable()->getExprLoc()); |
3304 | llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar( |
3305 | lvalue: PrevUB, Loc: LS.getPrevUpperBoundVariable()->getExprLoc()); |
3306 | PrevUBVal = CGF.EmitScalarConversion( |
3307 | Src: PrevUBVal, SrcTy: LS.getPrevUpperBoundVariable()->getType(), |
3308 | DstTy: LS.getIterationVariable()->getType(), |
3309 | Loc: LS.getPrevUpperBoundVariable()->getExprLoc()); |
3310 | |
3311 | CGF.EmitStoreOfScalar(value: PrevLBVal, lvalue: LB); |
3312 | CGF.EmitStoreOfScalar(value: PrevUBVal, lvalue: UB); |
3313 | |
3314 | return {LB, UB}; |
3315 | } |
3316 | |
3317 | /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then |
3318 | /// we need to use the LB and UB expressions generated by the worksharing |
3319 | /// code generation support, whereas in non combined situations we would |
3320 | /// just emit 0 and the LastIteration expression |
3321 | /// This function is necessary due to the difference of the LB and UB |
3322 | /// types for the RT emission routines for 'for_static_init' and |
3323 | /// 'for_dispatch_init' |
3324 | static std::pair<llvm::Value *, llvm::Value *> |
3325 | emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, |
3326 | const OMPExecutableDirective &S, |
3327 | Address LB, Address UB) { |
3328 | const OMPLoopDirective &LS = cast<OMPLoopDirective>(Val: S); |
3329 | const Expr *IVExpr = LS.getIterationVariable(); |
3330 | // when implementing a dynamic schedule for a 'for' combined with a |
3331 | // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop |
3332 | // is not normalized as each team only executes its own assigned |
3333 | // distribute chunk |
3334 | QualType IteratorTy = IVExpr->getType(); |
3335 | llvm::Value *LBVal = |
3336 | CGF.EmitLoadOfScalar(Addr: LB, /*Volatile=*/false, Ty: IteratorTy, Loc: S.getBeginLoc()); |
3337 | llvm::Value *UBVal = |
3338 | CGF.EmitLoadOfScalar(Addr: UB, /*Volatile=*/false, Ty: IteratorTy, Loc: S.getBeginLoc()); |
3339 | return {LBVal, UBVal}; |
3340 | } |
3341 | |
3342 | static void emitDistributeParallelForDistributeInnerBoundParams( |
3343 | CodeGenFunction &CGF, const OMPExecutableDirective &S, |
3344 | llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { |
3345 | const auto &Dir = cast<OMPLoopDirective>(Val: S); |
3346 | LValue LB = |
3347 | CGF.EmitLValue(E: cast<DeclRefExpr>(Val: Dir.getCombinedLowerBoundVariable())); |
3348 | llvm::Value *LBCast = CGF.Builder.CreateIntCast( |
3349 | V: CGF.Builder.CreateLoad(Addr: LB.getAddress()), DestTy: CGF.SizeTy, /*isSigned=*/false); |
3350 | CapturedVars.push_back(Elt: LBCast); |
3351 | LValue UB = |
3352 | CGF.EmitLValue(E: cast<DeclRefExpr>(Val: Dir.getCombinedUpperBoundVariable())); |
3353 | |
3354 | llvm::Value *UBCast = CGF.Builder.CreateIntCast( |
3355 | V: CGF.Builder.CreateLoad(Addr: UB.getAddress()), DestTy: CGF.SizeTy, /*isSigned=*/false); |
3356 | CapturedVars.push_back(Elt: UBCast); |
3357 | } |
3358 | |
3359 | static void |
3360 | emitInnerParallelForWhenCombined(CodeGenFunction &CGF, |
3361 | const OMPLoopDirective &S, |
3362 | CodeGenFunction::JumpDest LoopExit) { |
3363 | OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); |
3364 | auto &&CGInlinedWorksharingLoop = [&S, EKind](CodeGenFunction &CGF, |
3365 | PrePostActionTy &Action) { |
3366 | Action.Enter(CGF); |
3367 | bool HasCancel = false; |
3368 | if (!isOpenMPSimdDirective(DKind: EKind)) { |
3369 | if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(Val: &S)) |
3370 | HasCancel = D->hasCancel(); |
3371 | else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(Val: &S)) |
3372 | HasCancel = D->hasCancel(); |
3373 | else if (const auto *D = |
3374 | dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(Val: &S)) |
3375 | HasCancel = D->hasCancel(); |
3376 | } |
3377 | CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel); |
3378 | CGF.EmitOMPWorksharingLoop(S, EUB: S.getPrevEnsureUpperBound(), |
3379 | CodeGenLoopBounds: emitDistributeParallelForInnerBounds, |
3380 | CGDispatchBounds: emitDistributeParallelForDispatchBounds); |
3381 | }; |
3382 | |
3383 | emitCommonOMPParallelDirective( |
3384 | CGF, S, InnermostKind: isOpenMPSimdDirective(DKind: EKind) ? OMPD_for_simd : OMPD_for, |
3385 | CodeGen: CGInlinedWorksharingLoop, |
3386 | CodeGenBoundParameters: emitDistributeParallelForDistributeInnerBoundParams); |
3387 | } |
3388 | |
3389 | void CodeGenFunction::EmitOMPDistributeParallelForDirective( |
3390 | const OMPDistributeParallelForDirective &S) { |
3391 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
3392 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined, |
3393 | IncExpr: S.getDistInc()); |
3394 | }; |
3395 | OMPLexicalScope Scope(*this, S, OMPD_parallel); |
3396 | CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_distribute, CodeGen); |
3397 | } |
3398 | |
3399 | void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( |
3400 | const OMPDistributeParallelForSimdDirective &S) { |
3401 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
3402 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined, |
3403 | IncExpr: S.getDistInc()); |
3404 | }; |
3405 | OMPLexicalScope Scope(*this, S, OMPD_parallel); |
3406 | CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_distribute, CodeGen); |
3407 | } |
3408 | |
3409 | void CodeGenFunction::EmitOMPDistributeSimdDirective( |
3410 | const OMPDistributeSimdDirective &S) { |
3411 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
3412 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc()); |
3413 | }; |
3414 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
3415 | CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_simd, CodeGen); |
3416 | } |
3417 | |
3418 | void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( |
3419 | CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { |
3420 | // Emit SPMD target parallel for region as a standalone region. |
3421 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3422 | emitOMPSimdRegion(CGF, S, Action); |
3423 | }; |
3424 | llvm::Function *Fn; |
3425 | llvm::Constant *Addr; |
3426 | // Emit target region as a standalone region. |
3427 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
3428 | D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen); |
3429 | assert(Fn && Addr && "Target device function emission failed." ); |
3430 | } |
3431 | |
3432 | void CodeGenFunction::EmitOMPTargetSimdDirective( |
3433 | const OMPTargetSimdDirective &S) { |
3434 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3435 | emitOMPSimdRegion(CGF, S, Action); |
3436 | }; |
3437 | emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen); |
3438 | } |
3439 | |
3440 | namespace { |
3441 | struct ScheduleKindModifiersTy { |
3442 | OpenMPScheduleClauseKind Kind; |
3443 | OpenMPScheduleClauseModifier M1; |
3444 | OpenMPScheduleClauseModifier M2; |
3445 | ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, |
3446 | OpenMPScheduleClauseModifier M1, |
3447 | OpenMPScheduleClauseModifier M2) |
3448 | : Kind(Kind), M1(M1), M2(M2) {} |
3449 | }; |
3450 | } // namespace |
3451 | |
3452 | bool CodeGenFunction::EmitOMPWorksharingLoop( |
3453 | const OMPLoopDirective &S, Expr *EUB, |
3454 | const CodeGenLoopBoundsTy &CodeGenLoopBounds, |
3455 | const CodeGenDispatchBoundsTy &CGDispatchBounds) { |
3456 | // Emit the loop iteration variable. |
3457 | const auto *IVExpr = cast<DeclRefExpr>(Val: S.getIterationVariable()); |
3458 | const auto *IVDecl = cast<VarDecl>(Val: IVExpr->getDecl()); |
3459 | EmitVarDecl(D: *IVDecl); |
3460 | |
3461 | // Emit the iterations count variable. |
3462 | // If it is not a variable, Sema decided to calculate iterations count on each |
3463 | // iteration (e.g., it is foldable into a constant). |
3464 | if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) { |
3465 | EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl())); |
3466 | // Emit calculation of the iterations count. |
3467 | EmitIgnoredExpr(E: S.getCalcLastIteration()); |
3468 | } |
3469 | |
3470 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
3471 | |
3472 | bool HasLastprivateClause; |
3473 | // Check pre-condition. |
3474 | { |
3475 | OMPLoopScope PreInitScope(*this, S); |
3476 | // Skip the entire loop if we don't meet the precondition. |
3477 | // If the condition constant folds and can be elided, avoid emitting the |
3478 | // whole loop. |
3479 | bool CondConstant; |
3480 | llvm::BasicBlock *ContBlock = nullptr; |
3481 | if (ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) { |
3482 | if (!CondConstant) |
3483 | return false; |
3484 | } else { |
3485 | llvm::BasicBlock *ThenBlock = createBasicBlock(name: "omp.precond.then" ); |
3486 | ContBlock = createBasicBlock(name: "omp.precond.end" ); |
3487 | emitPreCond(CGF&: *this, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock, |
3488 | TrueCount: getProfileCount(S: &S)); |
3489 | EmitBlock(BB: ThenBlock); |
3490 | incrementProfileCounter(S: &S); |
3491 | } |
3492 | |
3493 | RunCleanupsScope DoacrossCleanupScope(*this); |
3494 | bool Ordered = false; |
3495 | if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { |
3496 | if (OrderedClause->getNumForLoops()) |
3497 | RT.emitDoacrossInit(CGF&: *this, D: S, NumIterations: OrderedClause->getLoopNumIterations()); |
3498 | else |
3499 | Ordered = true; |
3500 | } |
3501 | |
3502 | emitAlignedClause(CGF&: *this, D: S); |
3503 | bool HasLinears = EmitOMPLinearClauseInit(D: S); |
3504 | // Emit helper vars inits. |
3505 | |
3506 | std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); |
3507 | LValue LB = Bounds.first; |
3508 | LValue UB = Bounds.second; |
3509 | LValue ST = |
3510 | EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable())); |
3511 | LValue IL = |
3512 | EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable())); |
3513 | |
3514 | // Emit 'then' code. |
3515 | { |
3516 | OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); |
3517 | OMPPrivateScope LoopScope(*this); |
3518 | if (EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope) || HasLinears) { |
3519 | // Emit implicit barrier to synchronize threads and avoid data races on |
3520 | // initialization of firstprivate variables and post-update of |
3521 | // lastprivate variables. |
3522 | CGM.getOpenMPRuntime().emitBarrierCall( |
3523 | CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false, |
3524 | /*ForceSimpleCall=*/true); |
3525 | } |
3526 | EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope); |
3527 | CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( |
3528 | *this, S, EmitLValue(E: S.getIterationVariable())); |
3529 | HasLastprivateClause = EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope); |
3530 | EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope); |
3531 | EmitOMPPrivateLoopCounters(S, LoopScope); |
3532 | EmitOMPLinearClause(D: S, PrivateScope&: LoopScope); |
3533 | (void)LoopScope.Privatize(); |
3534 | if (isOpenMPTargetExecutionDirective(DKind: EKind)) |
3535 | CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF&: *this, D: S); |
3536 | |
3537 | // Detect the loop schedule kind and chunk. |
3538 | const Expr *ChunkExpr = nullptr; |
3539 | OpenMPScheduleTy ScheduleKind; |
3540 | if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { |
3541 | ScheduleKind.Schedule = C->getScheduleKind(); |
3542 | ScheduleKind.M1 = C->getFirstScheduleModifier(); |
3543 | ScheduleKind.M2 = C->getSecondScheduleModifier(); |
3544 | ChunkExpr = C->getChunkSize(); |
3545 | } else { |
3546 | // Default behaviour for schedule clause. |
3547 | CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( |
3548 | CGF&: *this, S, ScheduleKind&: ScheduleKind.Schedule, ChunkExpr); |
3549 | } |
3550 | bool HasChunkSizeOne = false; |
3551 | llvm::Value *Chunk = nullptr; |
3552 | if (ChunkExpr) { |
3553 | Chunk = EmitScalarExpr(E: ChunkExpr); |
3554 | Chunk = EmitScalarConversion(Src: Chunk, SrcTy: ChunkExpr->getType(), |
3555 | DstTy: S.getIterationVariable()->getType(), |
3556 | Loc: S.getBeginLoc()); |
3557 | Expr::EvalResult Result; |
3558 | if (ChunkExpr->EvaluateAsInt(Result, Ctx: getContext())) { |
3559 | llvm::APSInt EvaluatedChunk = Result.Val.getInt(); |
3560 | HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1); |
3561 | } |
3562 | } |
3563 | const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType()); |
3564 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
3565 | // OpenMP 4.5, 2.7.1 Loop Construct, Description. |
3566 | // If the static schedule kind is specified or if the ordered clause is |
3567 | // specified, and if no monotonic modifier is specified, the effect will |
3568 | // be as if the monotonic modifier was specified. |
3569 | bool StaticChunkedOne = |
3570 | RT.isStaticChunked(ScheduleKind: ScheduleKind.Schedule, |
3571 | /* Chunked */ Chunk != nullptr) && |
3572 | HasChunkSizeOne && isOpenMPLoopBoundSharingDirective(Kind: EKind); |
3573 | bool IsMonotonic = |
3574 | Ordered || |
3575 | (ScheduleKind.Schedule == OMPC_SCHEDULE_static && |
3576 | !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || |
3577 | ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || |
3578 | ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || |
3579 | ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; |
3580 | if ((RT.isStaticNonchunked(ScheduleKind: ScheduleKind.Schedule, |
3581 | /* Chunked */ Chunk != nullptr) || |
3582 | StaticChunkedOne) && |
3583 | !Ordered) { |
3584 | JumpDest LoopExit = |
3585 | getJumpDestInCurrentScope(Target: createBasicBlock(name: "omp.loop.exit" )); |
3586 | emitCommonSimdLoop( |
3587 | CGF&: *this, S, |
3588 | SimdInitGen: [&S, EKind](CodeGenFunction &CGF, PrePostActionTy &) { |
3589 | if (isOpenMPSimdDirective(DKind: EKind)) { |
3590 | CGF.EmitOMPSimdInit(D: S); |
3591 | } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { |
3592 | if (C->getKind() == OMPC_ORDER_concurrent) |
3593 | CGF.LoopStack.setParallel(/*Enable=*/true); |
3594 | } |
3595 | }, |
3596 | BodyCodeGen: [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk, |
3597 | &S, ScheduleKind, LoopExit, EKind, |
3598 | &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
3599 | // OpenMP [2.7.1, Loop Construct, Description, table 2-1] |
3600 | // When no chunk_size is specified, the iteration space is divided |
3601 | // into chunks that are approximately equal in size, and at most |
3602 | // one chunk is distributed to each thread. Note that the size of |
3603 | // the chunks is unspecified in this case. |
3604 | CGOpenMPRuntime::StaticRTInput StaticInit( |
3605 | IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), |
3606 | UB.getAddress(), ST.getAddress(), |
3607 | StaticChunkedOne ? Chunk : nullptr); |
3608 | CGF.CGM.getOpenMPRuntime().emitForStaticInit( |
3609 | CGF, Loc: S.getBeginLoc(), DKind: EKind, ScheduleKind, Values: StaticInit); |
3610 | // UB = min(UB, GlobalUB); |
3611 | if (!StaticChunkedOne) |
3612 | CGF.EmitIgnoredExpr(E: S.getEnsureUpperBound()); |
3613 | // IV = LB; |
3614 | CGF.EmitIgnoredExpr(E: S.getInit()); |
3615 | // For unchunked static schedule generate: |
3616 | // |
3617 | // while (idx <= UB) { |
3618 | // BODY; |
3619 | // ++idx; |
3620 | // } |
3621 | // |
3622 | // For static schedule with chunk one: |
3623 | // |
3624 | // while (IV <= PrevUB) { |
3625 | // BODY; |
3626 | // IV += ST; |
3627 | // } |
3628 | CGF.EmitOMPInnerLoop( |
3629 | S, RequiresCleanup: LoopScope.requiresCleanups(), |
3630 | LoopCond: StaticChunkedOne ? S.getCombinedParForInDistCond() |
3631 | : S.getCond(), |
3632 | IncExpr: StaticChunkedOne ? S.getDistInc() : S.getInc(), |
3633 | BodyGen: [&S, LoopExit](CodeGenFunction &CGF) { |
3634 | emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit); |
3635 | }, |
3636 | PostIncGen: [](CodeGenFunction &) {}); |
3637 | }); |
3638 | EmitBlock(BB: LoopExit.getBlock()); |
3639 | // Tell the runtime we are done. |
3640 | auto &&CodeGen = [&S](CodeGenFunction &CGF) { |
3641 | CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, Loc: S.getEndLoc(), |
3642 | DKind: OMPD_for); |
3643 | }; |
3644 | OMPCancelStack.emitExit(CGF&: *this, Kind: EKind, CodeGen); |
3645 | } else { |
3646 | // Emit the outer loop, which requests its work chunk [LB..UB] from |
3647 | // runtime and runs the inner loop to process it. |
3648 | OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), |
3649 | ST.getAddress(), IL.getAddress(), Chunk, |
3650 | EUB); |
3651 | LoopArguments.DKind = OMPD_for; |
3652 | EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, |
3653 | LoopArgs: LoopArguments, CGDispatchBounds); |
3654 | } |
3655 | if (isOpenMPSimdDirective(DKind: EKind)) { |
3656 | EmitOMPSimdFinal(D: S, CondGen: [IL, &S](CodeGenFunction &CGF) { |
3657 | return CGF.Builder.CreateIsNotNull( |
3658 | Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())); |
3659 | }); |
3660 | } |
3661 | EmitOMPReductionClauseFinal( |
3662 | D: S, /*ReductionKind=*/isOpenMPSimdDirective(DKind: EKind) |
3663 | ? /*Parallel and Simd*/ OMPD_parallel_for_simd |
3664 | : /*Parallel only*/ OMPD_parallel); |
3665 | // Emit post-update of the reduction variables if IsLastIter != 0. |
3666 | emitPostUpdateForReductionClause( |
3667 | CGF&: *this, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) { |
3668 | return CGF.Builder.CreateIsNotNull( |
3669 | Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())); |
3670 | }); |
3671 | // Emit final copy of the lastprivate variables if IsLastIter != 0. |
3672 | if (HasLastprivateClause) |
3673 | EmitOMPLastprivateClauseFinal( |
3674 | D: S, NoFinals: isOpenMPSimdDirective(DKind: EKind), |
3675 | IsLastIterCond: Builder.CreateIsNotNull(Arg: EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()))); |
3676 | LoopScope.restoreMap(); |
3677 | EmitOMPLinearClauseFinal(D: S, CondGen: [IL, &S](CodeGenFunction &CGF) { |
3678 | return CGF.Builder.CreateIsNotNull( |
3679 | Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())); |
3680 | }); |
3681 | } |
3682 | DoacrossCleanupScope.ForceCleanup(); |
3683 | // We're now done with the loop, so jump to the continuation block. |
3684 | if (ContBlock) { |
3685 | EmitBranch(Block: ContBlock); |
3686 | EmitBlock(BB: ContBlock, /*IsFinished=*/true); |
3687 | } |
3688 | } |
3689 | return HasLastprivateClause; |
3690 | } |
3691 | |
3692 | /// The following two functions generate expressions for the loop lower |
3693 | /// and upper bounds in case of static and dynamic (dispatch) schedule |
3694 | /// of the associated 'for' or 'distribute' loop. |
3695 | static std::pair<LValue, LValue> |
3696 | emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
3697 | const auto &LS = cast<OMPLoopDirective>(Val: S); |
3698 | LValue LB = |
3699 | EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getLowerBoundVariable())); |
3700 | LValue UB = |
3701 | EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getUpperBoundVariable())); |
3702 | return {LB, UB}; |
3703 | } |
3704 | |
3705 | /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not |
3706 | /// consider the lower and upper bound expressions generated by the |
3707 | /// worksharing loop support, but we use 0 and the iteration space size as |
3708 | /// constants |
3709 | static std::pair<llvm::Value *, llvm::Value *> |
3710 | emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, |
3711 | Address LB, Address UB) { |
3712 | const auto &LS = cast<OMPLoopDirective>(Val: S); |
3713 | const Expr *IVExpr = LS.getIterationVariable(); |
3714 | const unsigned IVSize = CGF.getContext().getTypeSize(T: IVExpr->getType()); |
3715 | llvm::Value *LBVal = CGF.Builder.getIntN(N: IVSize, C: 0); |
3716 | llvm::Value *UBVal = CGF.EmitScalarExpr(E: LS.getLastIteration()); |
3717 | return {LBVal, UBVal}; |
3718 | } |
3719 | |
3720 | /// Emits internal temp array declarations for the directive with inscan |
3721 | /// reductions. |
3722 | /// The code is the following: |
3723 | /// \code |
3724 | /// size num_iters = <num_iters>; |
3725 | /// <type> buffer[num_iters]; |
3726 | /// \endcode |
3727 | static void emitScanBasedDirectiveDecls( |
3728 | CodeGenFunction &CGF, const OMPLoopDirective &S, |
3729 | llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) { |
3730 | llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( |
3731 | V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false); |
3732 | SmallVector<const Expr *, 4> Shareds; |
3733 | SmallVector<const Expr *, 4> Privates; |
3734 | SmallVector<const Expr *, 4> ReductionOps; |
3735 | SmallVector<const Expr *, 4> CopyArrayTemps; |
3736 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
3737 | assert(C->getModifier() == OMPC_REDUCTION_inscan && |
3738 | "Only inscan reductions are expected." ); |
3739 | Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
3740 | Privates.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
3741 | ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end()); |
3742 | CopyArrayTemps.append(in_start: C->copy_array_temps().begin(), |
3743 | in_end: C->copy_array_temps().end()); |
3744 | } |
3745 | { |
3746 | // Emit buffers for each reduction variables. |
3747 | // ReductionCodeGen is required to emit correctly the code for array |
3748 | // reductions. |
3749 | ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); |
3750 | unsigned Count = 0; |
3751 | auto *ITA = CopyArrayTemps.begin(); |
3752 | for (const Expr *IRef : Privates) { |
3753 | const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IRef)->getDecl()); |
3754 | // Emit variably modified arrays, used for arrays/array sections |
3755 | // reductions. |
3756 | if (PrivateVD->getType()->isVariablyModifiedType()) { |
3757 | RedCG.emitSharedOrigLValue(CGF, N: Count); |
3758 | RedCG.emitAggregateType(CGF, N: Count); |
3759 | } |
3760 | CodeGenFunction::OpaqueValueMapping DimMapping( |
3761 | CGF, |
3762 | cast<OpaqueValueExpr>( |
3763 | Val: cast<VariableArrayType>(Val: (*ITA)->getType()->getAsArrayTypeUnsafe()) |
3764 | ->getSizeExpr()), |
3765 | RValue::get(V: OMPScanNumIterations)); |
3766 | // Emit temp buffer. |
3767 | CGF.EmitVarDecl(D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ITA)->getDecl())); |
3768 | ++ITA; |
3769 | ++Count; |
3770 | } |
3771 | } |
3772 | } |
3773 | |
3774 | /// Copies final inscan reductions values to the original variables. |
3775 | /// The code is the following: |
3776 | /// \code |
3777 | /// <orig_var> = buffer[num_iters-1]; |
3778 | /// \endcode |
3779 | static void emitScanBasedDirectiveFinals( |
3780 | CodeGenFunction &CGF, const OMPLoopDirective &S, |
3781 | llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) { |
3782 | llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( |
3783 | V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false); |
3784 | SmallVector<const Expr *, 4> Shareds; |
3785 | SmallVector<const Expr *, 4> LHSs; |
3786 | SmallVector<const Expr *, 4> RHSs; |
3787 | SmallVector<const Expr *, 4> Privates; |
3788 | SmallVector<const Expr *, 4> CopyOps; |
3789 | SmallVector<const Expr *, 4> CopyArrayElems; |
3790 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
3791 | assert(C->getModifier() == OMPC_REDUCTION_inscan && |
3792 | "Only inscan reductions are expected." ); |
3793 | Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
3794 | LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end()); |
3795 | RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end()); |
3796 | Privates.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
3797 | CopyOps.append(in_start: C->copy_ops().begin(), in_end: C->copy_ops().end()); |
3798 | CopyArrayElems.append(in_start: C->copy_array_elems().begin(), |
3799 | in_end: C->copy_array_elems().end()); |
3800 | } |
3801 | // Create temp var and copy LHS value to this temp value. |
3802 | // LHS = TMP[LastIter]; |
3803 | llvm::Value *OMPLast = CGF.Builder.CreateNSWSub( |
3804 | LHS: OMPScanNumIterations, |
3805 | RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1, /*isSigned=*/IsSigned: false)); |
3806 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
3807 | const Expr *PrivateExpr = Privates[I]; |
3808 | const Expr *OrigExpr = Shareds[I]; |
3809 | const Expr *CopyArrayElem = CopyArrayElems[I]; |
3810 | CodeGenFunction::OpaqueValueMapping IdxMapping( |
3811 | CGF, |
3812 | cast<OpaqueValueExpr>( |
3813 | Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()), |
3814 | RValue::get(V: OMPLast)); |
3815 | LValue DestLVal = CGF.EmitLValue(E: OrigExpr); |
3816 | LValue SrcLVal = CGF.EmitLValue(E: CopyArrayElem); |
3817 | CGF.EmitOMPCopy( |
3818 | OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(), |
3819 | DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()), |
3820 | SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]); |
3821 | } |
3822 | } |
3823 | |
3824 | /// Emits the code for the directive with inscan reductions. |
3825 | /// The code is the following: |
3826 | /// \code |
3827 | /// #pragma omp ... |
3828 | /// for (i: 0..<num_iters>) { |
3829 | /// <input phase>; |
3830 | /// buffer[i] = red; |
3831 | /// } |
3832 | /// #pragma omp master // in parallel region |
3833 | /// for (int k = 0; k != ceil(log2(num_iters)); ++k) |
3834 | /// for (size cnt = last_iter; cnt >= pow(2, k); --k) |
3835 | /// buffer[i] op= buffer[i-pow(2,k)]; |
3836 | /// #pragma omp barrier // in parallel region |
3837 | /// #pragma omp ... |
3838 | /// for (0..<num_iters>) { |
3839 | /// red = InclusiveScan ? buffer[i] : buffer[i-1]; |
3840 | /// <scan phase>; |
3841 | /// } |
3842 | /// \endcode |
3843 | static void emitScanBasedDirective( |
3844 | CodeGenFunction &CGF, const OMPLoopDirective &S, |
3845 | llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen, |
3846 | llvm::function_ref<void(CodeGenFunction &)> FirstGen, |
3847 | llvm::function_ref<void(CodeGenFunction &)> SecondGen) { |
3848 | llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( |
3849 | V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false); |
3850 | SmallVector<const Expr *, 4> Privates; |
3851 | SmallVector<const Expr *, 4> ReductionOps; |
3852 | SmallVector<const Expr *, 4> LHSs; |
3853 | SmallVector<const Expr *, 4> RHSs; |
3854 | SmallVector<const Expr *, 4> CopyArrayElems; |
3855 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
3856 | assert(C->getModifier() == OMPC_REDUCTION_inscan && |
3857 | "Only inscan reductions are expected." ); |
3858 | Privates.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
3859 | ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end()); |
3860 | LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end()); |
3861 | RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end()); |
3862 | CopyArrayElems.append(in_start: C->copy_array_elems().begin(), |
3863 | in_end: C->copy_array_elems().end()); |
3864 | } |
3865 | CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S); |
3866 | { |
3867 | // Emit loop with input phase: |
3868 | // #pragma omp ... |
3869 | // for (i: 0..<num_iters>) { |
3870 | // <input phase>; |
3871 | // buffer[i] = red; |
3872 | // } |
3873 | CGF.OMPFirstScanLoop = true; |
3874 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
3875 | FirstGen(CGF); |
3876 | } |
3877 | // #pragma omp barrier // in parallel region |
3878 | auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems, |
3879 | &ReductionOps, |
3880 | &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3881 | Action.Enter(CGF); |
3882 | // Emit prefix reduction: |
3883 | // #pragma omp master // in parallel region |
3884 | // for (int k = 0; k <= ceil(log2(n)); ++k) |
3885 | llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock(); |
3886 | llvm::BasicBlock *LoopBB = CGF.createBasicBlock(name: "omp.outer.log.scan.body" ); |
3887 | llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: "omp.outer.log.scan.exit" ); |
3888 | llvm::Function *F = |
3889 | CGF.CGM.getIntrinsic(IID: llvm::Intrinsic::log2, Tys: CGF.DoubleTy); |
3890 | llvm::Value *Arg = |
3891 | CGF.Builder.CreateUIToFP(V: OMPScanNumIterations, DestTy: CGF.DoubleTy); |
3892 | llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(callee: F, args: Arg); |
3893 | F = CGF.CGM.getIntrinsic(IID: llvm::Intrinsic::ceil, Tys: CGF.DoubleTy); |
3894 | LogVal = CGF.EmitNounwindRuntimeCall(callee: F, args: LogVal); |
3895 | LogVal = CGF.Builder.CreateFPToUI(V: LogVal, DestTy: CGF.IntTy); |
3896 | llvm::Value *NMin1 = CGF.Builder.CreateNUWSub( |
3897 | LHS: OMPScanNumIterations, RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1)); |
3898 | auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: S.getBeginLoc()); |
3899 | CGF.EmitBlock(BB: LoopBB); |
3900 | auto *Counter = CGF.Builder.CreatePHI(Ty: CGF.IntTy, NumReservedValues: 2); |
3901 | // size pow2k = 1; |
3902 | auto *Pow2K = CGF.Builder.CreatePHI(Ty: CGF.SizeTy, NumReservedValues: 2); |
3903 | Counter->addIncoming(V: llvm::ConstantInt::get(Ty: CGF.IntTy, V: 0), BB: InputBB); |
3904 | Pow2K->addIncoming(V: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1), BB: InputBB); |
3905 | // for (size i = n - 1; i >= 2 ^ k; --i) |
3906 | // tmp[i] op= tmp[i-pow2k]; |
3907 | llvm::BasicBlock *InnerLoopBB = |
3908 | CGF.createBasicBlock(name: "omp.inner.log.scan.body" ); |
3909 | llvm::BasicBlock *InnerExitBB = |
3910 | CGF.createBasicBlock(name: "omp.inner.log.scan.exit" ); |
3911 | llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(LHS: NMin1, RHS: Pow2K); |
3912 | CGF.Builder.CreateCondBr(Cond: CmpI, True: InnerLoopBB, False: InnerExitBB); |
3913 | CGF.EmitBlock(BB: InnerLoopBB); |
3914 | auto *IVal = CGF.Builder.CreatePHI(Ty: CGF.SizeTy, NumReservedValues: 2); |
3915 | IVal->addIncoming(V: NMin1, BB: LoopBB); |
3916 | { |
3917 | CodeGenFunction::OMPPrivateScope PrivScope(CGF); |
3918 | auto *ILHS = LHSs.begin(); |
3919 | auto *IRHS = RHSs.begin(); |
3920 | for (const Expr *CopyArrayElem : CopyArrayElems) { |
3921 | const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl()); |
3922 | const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl()); |
3923 | Address LHSAddr = Address::invalid(); |
3924 | { |
3925 | CodeGenFunction::OpaqueValueMapping IdxMapping( |
3926 | CGF, |
3927 | cast<OpaqueValueExpr>( |
3928 | Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()), |
3929 | RValue::get(V: IVal)); |
3930 | LHSAddr = CGF.EmitLValue(E: CopyArrayElem).getAddress(); |
3931 | } |
3932 | PrivScope.addPrivate(LocalVD: LHSVD, Addr: LHSAddr); |
3933 | Address RHSAddr = Address::invalid(); |
3934 | { |
3935 | llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(LHS: IVal, RHS: Pow2K); |
3936 | CodeGenFunction::OpaqueValueMapping IdxMapping( |
3937 | CGF, |
3938 | cast<OpaqueValueExpr>( |
3939 | Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()), |
3940 | RValue::get(V: OffsetIVal)); |
3941 | RHSAddr = CGF.EmitLValue(E: CopyArrayElem).getAddress(); |
3942 | } |
3943 | PrivScope.addPrivate(LocalVD: RHSVD, Addr: RHSAddr); |
3944 | ++ILHS; |
3945 | ++IRHS; |
3946 | } |
3947 | PrivScope.Privatize(); |
3948 | CGF.CGM.getOpenMPRuntime().emitReduction( |
3949 | CGF, Loc: S.getEndLoc(), Privates, LHSExprs: LHSs, RHSExprs: RHSs, ReductionOps, |
3950 | Options: {/*WithNowait=*/true, /*SimpleReduction=*/true, |
3951 | /*IsPrivateVarReduction*/ {}, .ReductionKind: OMPD_unknown}); |
3952 | } |
3953 | llvm::Value *NextIVal = |
3954 | CGF.Builder.CreateNUWSub(LHS: IVal, RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1)); |
3955 | IVal->addIncoming(V: NextIVal, BB: CGF.Builder.GetInsertBlock()); |
3956 | CmpI = CGF.Builder.CreateICmpUGE(LHS: NextIVal, RHS: Pow2K); |
3957 | CGF.Builder.CreateCondBr(Cond: CmpI, True: InnerLoopBB, False: InnerExitBB); |
3958 | CGF.EmitBlock(BB: InnerExitBB); |
3959 | llvm::Value *Next = |
3960 | CGF.Builder.CreateNUWAdd(LHS: Counter, RHS: llvm::ConstantInt::get(Ty: CGF.IntTy, V: 1)); |
3961 | Counter->addIncoming(V: Next, BB: CGF.Builder.GetInsertBlock()); |
3962 | // pow2k <<= 1; |
3963 | llvm::Value *NextPow2K = |
3964 | CGF.Builder.CreateShl(LHS: Pow2K, RHS: 1, Name: "" , /*HasNUW=*/true); |
3965 | Pow2K->addIncoming(V: NextPow2K, BB: CGF.Builder.GetInsertBlock()); |
3966 | llvm::Value *Cmp = CGF.Builder.CreateICmpNE(LHS: Next, RHS: LogVal); |
3967 | CGF.Builder.CreateCondBr(Cond: Cmp, True: LoopBB, False: ExitBB); |
3968 | auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: S.getEndLoc()); |
3969 | CGF.EmitBlock(BB: ExitBB); |
3970 | }; |
3971 | OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); |
3972 | if (isOpenMPParallelDirective(DKind: EKind)) { |
3973 | CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: CodeGen, Loc: S.getBeginLoc()); |
3974 | CGF.CGM.getOpenMPRuntime().emitBarrierCall( |
3975 | CGF, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false, |
3976 | /*ForceSimpleCall=*/true); |
3977 | } else { |
3978 | RegionCodeGenTy RCG(CodeGen); |
3979 | RCG(CGF); |
3980 | } |
3981 | |
3982 | CGF.OMPFirstScanLoop = false; |
3983 | SecondGen(CGF); |
3984 | } |
3985 | |
3986 | static bool emitWorksharingDirective(CodeGenFunction &CGF, |
3987 | const OMPLoopDirective &S, |
3988 | bool HasCancel) { |
3989 | bool HasLastprivates; |
3990 | OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); |
3991 | if (llvm::any_of(Range: S.getClausesOfKind<OMPReductionClause>(), |
3992 | P: [](const OMPReductionClause *C) { |
3993 | return C->getModifier() == OMPC_REDUCTION_inscan; |
3994 | })) { |
3995 | const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { |
3996 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
3997 | OMPLoopScope LoopScope(CGF, S); |
3998 | return CGF.EmitScalarExpr(E: S.getNumIterations()); |
3999 | }; |
4000 | const auto &&FirstGen = [&S, HasCancel, EKind](CodeGenFunction &CGF) { |
4001 | CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel); |
4002 | (void)CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), |
4003 | CodeGenLoopBounds: emitForLoopBounds, |
4004 | CGDispatchBounds: emitDispatchForLoopBounds); |
4005 | // Emit an implicit barrier at the end. |
4006 | CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc: S.getBeginLoc(), |
4007 | Kind: OMPD_for); |
4008 | }; |
4009 | const auto &&SecondGen = [&S, HasCancel, EKind, |
4010 | &HasLastprivates](CodeGenFunction &CGF) { |
4011 | CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel); |
4012 | HasLastprivates = CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), |
4013 | CodeGenLoopBounds: emitForLoopBounds, |
4014 | CGDispatchBounds: emitDispatchForLoopBounds); |
4015 | }; |
4016 | if (!isOpenMPParallelDirective(DKind: EKind)) |
4017 | emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen); |
4018 | emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen); |
4019 | if (!isOpenMPParallelDirective(DKind: EKind)) |
4020 | emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen); |
4021 | } else { |
4022 | CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel); |
4023 | HasLastprivates = CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), |
4024 | CodeGenLoopBounds: emitForLoopBounds, |
4025 | CGDispatchBounds: emitDispatchForLoopBounds); |
4026 | } |
4027 | return HasLastprivates; |
4028 | } |
4029 | |
4030 | // Pass OMPLoopDirective (instead of OMPForDirective) to make this check |
4031 | // available for "loop bind(parallel)", which maps to "for". |
4032 | static bool isForSupportedByOpenMPIRBuilder(const OMPLoopDirective &S, |
4033 | bool HasCancel) { |
4034 | if (HasCancel) |
4035 | return false; |
4036 | for (OMPClause *C : S.clauses()) { |
4037 | if (isa<OMPNowaitClause, OMPBindClause>(Val: C)) |
4038 | continue; |
4039 | |
4040 | if (auto *SC = dyn_cast<OMPScheduleClause>(Val: C)) { |
4041 | if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown) |
4042 | return false; |
4043 | if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown) |
4044 | return false; |
4045 | switch (SC->getScheduleKind()) { |
4046 | case OMPC_SCHEDULE_auto: |
4047 | case OMPC_SCHEDULE_dynamic: |
4048 | case OMPC_SCHEDULE_runtime: |
4049 | case OMPC_SCHEDULE_guided: |
4050 | case OMPC_SCHEDULE_static: |
4051 | continue; |
4052 | case OMPC_SCHEDULE_unknown: |
4053 | return false; |
4054 | } |
4055 | } |
4056 | |
4057 | return false; |
4058 | } |
4059 | |
4060 | return true; |
4061 | } |
4062 | |
4063 | static llvm::omp::ScheduleKind |
4064 | convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) { |
4065 | switch (ScheduleClauseKind) { |
4066 | case OMPC_SCHEDULE_unknown: |
4067 | return llvm::omp::OMP_SCHEDULE_Default; |
4068 | case OMPC_SCHEDULE_auto: |
4069 | return llvm::omp::OMP_SCHEDULE_Auto; |
4070 | case OMPC_SCHEDULE_dynamic: |
4071 | return llvm::omp::OMP_SCHEDULE_Dynamic; |
4072 | case OMPC_SCHEDULE_guided: |
4073 | return llvm::omp::OMP_SCHEDULE_Guided; |
4074 | case OMPC_SCHEDULE_runtime: |
4075 | return llvm::omp::OMP_SCHEDULE_Runtime; |
4076 | case OMPC_SCHEDULE_static: |
4077 | return llvm::omp::OMP_SCHEDULE_Static; |
4078 | } |
4079 | llvm_unreachable("Unhandled schedule kind" ); |
4080 | } |
4081 | |
4082 | // Pass OMPLoopDirective (instead of OMPForDirective) to make this function |
4083 | // available for "loop bind(parallel)", which maps to "for". |
4084 | static void emitOMPForDirective(const OMPLoopDirective &S, CodeGenFunction &CGF, |
4085 | CodeGenModule &CGM, bool HasCancel) { |
4086 | bool HasLastprivates = false; |
4087 | bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder && |
4088 | isForSupportedByOpenMPIRBuilder(S, HasCancel); |
4089 | auto &&CodeGen = [&S, &CGM, HasCancel, &HasLastprivates, |
4090 | UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) { |
4091 | // Use the OpenMPIRBuilder if enabled. |
4092 | if (UseOMPIRBuilder) { |
4093 | bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>(); |
4094 | |
4095 | llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default; |
4096 | llvm::Value *ChunkSize = nullptr; |
4097 | if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) { |
4098 | SchedKind = |
4099 | convertClauseKindToSchedKind(ScheduleClauseKind: SchedClause->getScheduleKind()); |
4100 | if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize()) |
4101 | ChunkSize = CGF.EmitScalarExpr(E: ChunkSizeExpr); |
4102 | } |
4103 | |
4104 | // Emit the associated statement and get its loop representation. |
4105 | const Stmt *Inner = S.getRawStmt(); |
4106 | llvm::CanonicalLoopInfo *CLI = |
4107 | CGF.EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1); |
4108 | |
4109 | llvm::OpenMPIRBuilder &OMPBuilder = |
4110 | CGM.getOpenMPRuntime().getOMPBuilder(); |
4111 | llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( |
4112 | CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator()); |
4113 | cantFail(ValOrErr: OMPBuilder.applyWorkshareLoop( |
4114 | DL: CGF.Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier, |
4115 | SchedKind, ChunkSize, /*HasSimdModifier=*/false, |
4116 | /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false, |
4117 | /*HasOrderedClause=*/false)); |
4118 | return; |
4119 | } |
4120 | |
4121 | HasLastprivates = emitWorksharingDirective(CGF, S, HasCancel); |
4122 | }; |
4123 | { |
4124 | auto LPCRegion = |
4125 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); |
4126 | OMPLexicalScope Scope(CGF, S, OMPD_unknown); |
4127 | CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_for, CodeGen, |
4128 | HasCancel); |
4129 | } |
4130 | |
4131 | if (!UseOMPIRBuilder) { |
4132 | // Emit an implicit barrier at the end. |
4133 | if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) |
4134 | CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc: S.getBeginLoc(), Kind: OMPD_for); |
4135 | } |
4136 | // Check for outer lastprivate conditional update. |
4137 | checkForLastprivateConditionalUpdate(CGF, S); |
4138 | } |
4139 | |
4140 | void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { |
4141 | return emitOMPForDirective(S, CGF&: *this, CGM, HasCancel: S.hasCancel()); |
4142 | } |
4143 | |
4144 | void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { |
4145 | bool HasLastprivates = false; |
4146 | auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, |
4147 | PrePostActionTy &) { |
4148 | HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false); |
4149 | }; |
4150 | { |
4151 | auto LPCRegion = |
4152 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
4153 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
4154 | CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_simd, CodeGen); |
4155 | } |
4156 | |
4157 | // Emit an implicit barrier at the end. |
4158 | if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) |
4159 | CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_for); |
4160 | // Check for outer lastprivate conditional update. |
4161 | checkForLastprivateConditionalUpdate(CGF&: *this, S); |
4162 | } |
4163 | |
4164 | static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, |
4165 | const Twine &Name, |
4166 | llvm::Value *Init = nullptr) { |
4167 | LValue LVal = CGF.MakeAddrLValue(Addr: CGF.CreateMemTemp(T: Ty, Name), T: Ty); |
4168 | if (Init) |
4169 | CGF.EmitStoreThroughLValue(Src: RValue::get(V: Init), Dst: LVal, /*isInit*/ true); |
4170 | return LVal; |
4171 | } |
4172 | |
4173 | void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { |
4174 | const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); |
4175 | const auto *CS = dyn_cast<CompoundStmt>(Val: CapturedStmt); |
4176 | bool HasLastprivates = false; |
4177 | OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); |
4178 | auto &&CodeGen = [&S, CapturedStmt, CS, EKind, |
4179 | &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { |
4180 | const ASTContext &C = CGF.getContext(); |
4181 | QualType KmpInt32Ty = |
4182 | C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); |
4183 | // Emit helper vars inits. |
4184 | LValue LB = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.lb." , |
4185 | Init: CGF.Builder.getInt32(C: 0)); |
4186 | llvm::ConstantInt *GlobalUBVal = CS != nullptr |
4187 | ? CGF.Builder.getInt32(C: CS->size() - 1) |
4188 | : CGF.Builder.getInt32(C: 0); |
4189 | LValue UB = |
4190 | createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.ub." , Init: GlobalUBVal); |
4191 | LValue ST = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.st." , |
4192 | Init: CGF.Builder.getInt32(C: 1)); |
4193 | LValue IL = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.il." , |
4194 | Init: CGF.Builder.getInt32(C: 0)); |
4195 | // Loop counter. |
4196 | LValue IV = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.iv." ); |
4197 | OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); |
4198 | CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); |
4199 | OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); |
4200 | CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); |
4201 | // Generate condition for loop. |
4202 | BinaryOperator *Cond = BinaryOperator::Create( |
4203 | C, lhs: &IVRefExpr, rhs: &UBRefExpr, opc: BO_LE, ResTy: C.BoolTy, VK: VK_PRValue, OK: OK_Ordinary, |
4204 | opLoc: S.getBeginLoc(), FPFeatures: FPOptionsOverride()); |
4205 | // Increment for loop counter. |
4206 | UnaryOperator *Inc = UnaryOperator::Create( |
4207 | C, input: &IVRefExpr, opc: UO_PreInc, type: KmpInt32Ty, VK: VK_PRValue, OK: OK_Ordinary, |
4208 | l: S.getBeginLoc(), CanOverflow: true, FPFeatures: FPOptionsOverride()); |
4209 | auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { |
4210 | // Iterate through all sections and emit a switch construct: |
4211 | // switch (IV) { |
4212 | // case 0: |
4213 | // <SectionStmt[0]>; |
4214 | // break; |
4215 | // ... |
4216 | // case <NumSection> - 1: |
4217 | // <SectionStmt[<NumSection> - 1]>; |
4218 | // break; |
4219 | // } |
4220 | // .omp.sections.exit: |
4221 | llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".omp.sections.exit" ); |
4222 | llvm::SwitchInst *SwitchStmt = |
4223 | CGF.Builder.CreateSwitch(V: CGF.EmitLoadOfScalar(lvalue: IV, Loc: S.getBeginLoc()), |
4224 | Dest: ExitBB, NumCases: CS == nullptr ? 1 : CS->size()); |
4225 | if (CS) { |
4226 | unsigned CaseNumber = 0; |
4227 | for (const Stmt *SubStmt : CS->children()) { |
4228 | auto CaseBB = CGF.createBasicBlock(name: ".omp.sections.case" ); |
4229 | CGF.EmitBlock(BB: CaseBB); |
4230 | SwitchStmt->addCase(OnVal: CGF.Builder.getInt32(C: CaseNumber), Dest: CaseBB); |
4231 | CGF.EmitStmt(S: SubStmt); |
4232 | CGF.EmitBranch(Block: ExitBB); |
4233 | ++CaseNumber; |
4234 | } |
4235 | } else { |
4236 | llvm::BasicBlock *CaseBB = CGF.createBasicBlock(name: ".omp.sections.case" ); |
4237 | CGF.EmitBlock(BB: CaseBB); |
4238 | SwitchStmt->addCase(OnVal: CGF.Builder.getInt32(C: 0), Dest: CaseBB); |
4239 | CGF.EmitStmt(S: CapturedStmt); |
4240 | CGF.EmitBranch(Block: ExitBB); |
4241 | } |
4242 | CGF.EmitBlock(BB: ExitBB, /*IsFinished=*/true); |
4243 | }; |
4244 | |
4245 | CodeGenFunction::OMPPrivateScope LoopScope(CGF); |
4246 | if (CGF.EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope)) { |
4247 | // Emit implicit barrier to synchronize threads and avoid data races on |
4248 | // initialization of firstprivate variables and post-update of lastprivate |
4249 | // variables. |
4250 | CGF.CGM.getOpenMPRuntime().emitBarrierCall( |
4251 | CGF, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false, |
4252 | /*ForceSimpleCall=*/true); |
4253 | } |
4254 | CGF.EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope); |
4255 | CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV); |
4256 | HasLastprivates = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope); |
4257 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope); |
4258 | (void)LoopScope.Privatize(); |
4259 | if (isOpenMPTargetExecutionDirective(DKind: EKind)) |
4260 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S); |
4261 | |
4262 | // Emit static non-chunked loop. |
4263 | OpenMPScheduleTy ScheduleKind; |
4264 | ScheduleKind.Schedule = OMPC_SCHEDULE_static; |
4265 | CGOpenMPRuntime::StaticRTInput StaticInit( |
4266 | /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), |
4267 | LB.getAddress(), UB.getAddress(), ST.getAddress()); |
4268 | CGF.CGM.getOpenMPRuntime().emitForStaticInit(CGF, Loc: S.getBeginLoc(), DKind: EKind, |
4269 | ScheduleKind, Values: StaticInit); |
4270 | // UB = min(UB, GlobalUB); |
4271 | llvm::Value *UBVal = CGF.EmitLoadOfScalar(lvalue: UB, Loc: S.getBeginLoc()); |
4272 | llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect( |
4273 | C: CGF.Builder.CreateICmpSLT(LHS: UBVal, RHS: GlobalUBVal), True: UBVal, False: GlobalUBVal); |
4274 | CGF.EmitStoreOfScalar(value: MinUBGlobalUB, lvalue: UB); |
4275 | // IV = LB; |
4276 | CGF.EmitStoreOfScalar(value: CGF.EmitLoadOfScalar(lvalue: LB, Loc: S.getBeginLoc()), lvalue: IV); |
4277 | // while (idx <= UB) { BODY; ++idx; } |
4278 | CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, LoopCond: Cond, IncExpr: Inc, BodyGen, |
4279 | PostIncGen: [](CodeGenFunction &) {}); |
4280 | // Tell the runtime we are done. |
4281 | auto &&CodeGen = [&S](CodeGenFunction &CGF) { |
4282 | CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, Loc: S.getEndLoc(), |
4283 | DKind: OMPD_sections); |
4284 | }; |
4285 | CGF.OMPCancelStack.emitExit(CGF, Kind: EKind, CodeGen); |
4286 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel); |
4287 | // Emit post-update of the reduction variables if IsLastIter != 0. |
4288 | emitPostUpdateForReductionClause(CGF, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) { |
4289 | return CGF.Builder.CreateIsNotNull( |
4290 | Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())); |
4291 | }); |
4292 | |
4293 | // Emit final copy of the lastprivate variables if IsLastIter != 0. |
4294 | if (HasLastprivates) |
4295 | CGF.EmitOMPLastprivateClauseFinal( |
4296 | D: S, /*NoFinals=*/false, |
4297 | IsLastIterCond: CGF.Builder.CreateIsNotNull( |
4298 | Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()))); |
4299 | }; |
4300 | |
4301 | bool HasCancel = false; |
4302 | if (auto *OSD = dyn_cast<OMPSectionsDirective>(Val: &S)) |
4303 | HasCancel = OSD->hasCancel(); |
4304 | else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(Val: &S)) |
4305 | HasCancel = OPSD->hasCancel(); |
4306 | OMPCancelStackRAII CancelRegion(*this, EKind, HasCancel); |
4307 | CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_sections, CodeGen, |
4308 | HasCancel); |
4309 | // Emit barrier for lastprivates only if 'sections' directive has 'nowait' |
4310 | // clause. Otherwise the barrier will be generated by the codegen for the |
4311 | // directive. |
4312 | if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { |
4313 | // Emit implicit barrier to synchronize threads and avoid data races on |
4314 | // initialization of firstprivate variables. |
4315 | CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), |
4316 | Kind: OMPD_unknown); |
4317 | } |
4318 | } |
4319 | |
4320 | void CodeGenFunction::EmitOMPScopeDirective(const OMPScopeDirective &S) { |
4321 | { |
4322 | // Emit code for 'scope' region |
4323 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4324 | Action.Enter(CGF); |
4325 | OMPPrivateScope PrivateScope(CGF); |
4326 | (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope); |
4327 | CGF.EmitOMPPrivateClause(D: S, PrivateScope); |
4328 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
4329 | (void)PrivateScope.Privatize(); |
4330 | CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt()); |
4331 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel); |
4332 | }; |
4333 | auto LPCRegion = |
4334 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
4335 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
4336 | CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_scope, CodeGen); |
4337 | } |
4338 | // Emit an implicit barrier at the end. |
4339 | if (!S.getSingleClause<OMPNowaitClause>()) { |
4340 | CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_scope); |
4341 | } |
4342 | // Check for outer lastprivate conditional update. |
4343 | checkForLastprivateConditionalUpdate(CGF&: *this, S); |
4344 | } |
4345 | |
4346 | void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { |
4347 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4348 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4349 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4350 | using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; |
4351 | |
4352 | auto FiniCB = [](InsertPointTy IP) { |
4353 | // Don't FinalizeOMPRegion because this is done inside of OMPIRBuilder for |
4354 | // sections. |
4355 | return llvm::Error::success(); |
4356 | }; |
4357 | |
4358 | const CapturedStmt *ICS = S.getInnermostCapturedStmt(); |
4359 | const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); |
4360 | const auto *CS = dyn_cast<CompoundStmt>(Val: CapturedStmt); |
4361 | llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; |
4362 | if (CS) { |
4363 | for (const Stmt *SubStmt : CS->children()) { |
4364 | auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP, |
4365 | InsertPointTy CodeGenIP) { |
4366 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4367 | CGF&: *this, RegionBodyStmt: SubStmt, AllocaIP, CodeGenIP, RegionName: "section" ); |
4368 | return llvm::Error::success(); |
4369 | }; |
4370 | SectionCBVector.push_back(Elt: SectionCB); |
4371 | } |
4372 | } else { |
4373 | auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP, |
4374 | InsertPointTy CodeGenIP) { |
4375 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4376 | CGF&: *this, RegionBodyStmt: CapturedStmt, AllocaIP, CodeGenIP, RegionName: "section" ); |
4377 | return llvm::Error::success(); |
4378 | }; |
4379 | SectionCBVector.push_back(Elt: SectionCB); |
4380 | } |
4381 | |
4382 | // Privatization callback that performs appropriate action for |
4383 | // shared/private/firstprivate/lastprivate/copyin/... variables. |
4384 | // |
4385 | // TODO: This defaults to shared right now. |
4386 | auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, |
4387 | llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { |
4388 | // The next line is appropriate only for variables (Val) with the |
4389 | // data-sharing attribute "shared". |
4390 | ReplVal = &Val; |
4391 | |
4392 | return CodeGenIP; |
4393 | }; |
4394 | |
4395 | CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP); |
4396 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); |
4397 | llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( |
4398 | AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); |
4399 | llvm::OpenMPIRBuilder::InsertPointTy AfterIP = |
4400 | cantFail(ValOrErr: OMPBuilder.createSections( |
4401 | Loc: Builder, AllocaIP, SectionCBs: SectionCBVector, PrivCB, FiniCB, IsCancellable: S.hasCancel(), |
4402 | IsNowait: S.getSingleClause<OMPNowaitClause>())); |
4403 | Builder.restoreIP(IP: AfterIP); |
4404 | return; |
4405 | } |
4406 | { |
4407 | auto LPCRegion = |
4408 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
4409 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
4410 | EmitSections(S); |
4411 | } |
4412 | // Emit an implicit barrier at the end. |
4413 | if (!S.getSingleClause<OMPNowaitClause>()) { |
4414 | CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), |
4415 | Kind: OMPD_sections); |
4416 | } |
4417 | // Check for outer lastprivate conditional update. |
4418 | checkForLastprivateConditionalUpdate(CGF&: *this, S); |
4419 | } |
4420 | |
4421 | void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { |
4422 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4423 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4424 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4425 | |
4426 | const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt(); |
4427 | auto FiniCB = [this](InsertPointTy IP) { |
4428 | OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP); |
4429 | return llvm::Error::success(); |
4430 | }; |
4431 | |
4432 | auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP, |
4433 | InsertPointTy CodeGenIP) { |
4434 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4435 | CGF&: *this, RegionBodyStmt: SectionRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "section" ); |
4436 | return llvm::Error::success(); |
4437 | }; |
4438 | |
4439 | LexicalScope Scope(*this, S.getSourceRange()); |
4440 | EmitStopPoint(S: &S); |
4441 | llvm::OpenMPIRBuilder::InsertPointTy AfterIP = |
4442 | cantFail(ValOrErr: OMPBuilder.createSection(Loc: Builder, BodyGenCB, FiniCB)); |
4443 | Builder.restoreIP(IP: AfterIP); |
4444 | |
4445 | return; |
4446 | } |
4447 | LexicalScope Scope(*this, S.getSourceRange()); |
4448 | EmitStopPoint(S: &S); |
4449 | EmitStmt(S: S.getAssociatedStmt()); |
4450 | } |
4451 | |
4452 | void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { |
4453 | llvm::SmallVector<const Expr *, 8> CopyprivateVars; |
4454 | llvm::SmallVector<const Expr *, 8> DestExprs; |
4455 | llvm::SmallVector<const Expr *, 8> SrcExprs; |
4456 | llvm::SmallVector<const Expr *, 8> AssignmentOps; |
4457 | // Check if there are any 'copyprivate' clauses associated with this |
4458 | // 'single' construct. |
4459 | // Build a list of copyprivate variables along with helper expressions |
4460 | // (<source>, <destination>, <destination>=<source> expressions) |
4461 | for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { |
4462 | CopyprivateVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
4463 | DestExprs.append(in_start: C->destination_exprs().begin(), |
4464 | in_end: C->destination_exprs().end()); |
4465 | SrcExprs.append(in_start: C->source_exprs().begin(), in_end: C->source_exprs().end()); |
4466 | AssignmentOps.append(in_start: C->assignment_ops().begin(), |
4467 | in_end: C->assignment_ops().end()); |
4468 | } |
4469 | // Emit code for 'single' region along with 'copyprivate' clauses |
4470 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4471 | Action.Enter(CGF); |
4472 | OMPPrivateScope SingleScope(CGF); |
4473 | (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope&: SingleScope); |
4474 | CGF.EmitOMPPrivateClause(D: S, PrivateScope&: SingleScope); |
4475 | (void)SingleScope.Privatize(); |
4476 | CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt()); |
4477 | }; |
4478 | { |
4479 | auto LPCRegion = |
4480 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
4481 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
4482 | CGM.getOpenMPRuntime().emitSingleRegion(CGF&: *this, SingleOpGen: CodeGen, Loc: S.getBeginLoc(), |
4483 | CopyprivateVars, DestExprs, |
4484 | SrcExprs, AssignmentOps); |
4485 | } |
4486 | // Emit an implicit barrier at the end (to avoid data race on firstprivate |
4487 | // init or if no 'nowait' clause was specified and no 'copyprivate' clause). |
4488 | if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { |
4489 | CGM.getOpenMPRuntime().emitBarrierCall( |
4490 | CGF&: *this, Loc: S.getBeginLoc(), |
4491 | Kind: S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); |
4492 | } |
4493 | // Check for outer lastprivate conditional update. |
4494 | checkForLastprivateConditionalUpdate(CGF&: *this, S); |
4495 | } |
4496 | |
4497 | static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
4498 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4499 | Action.Enter(CGF); |
4500 | CGF.EmitStmt(S: S.getRawStmt()); |
4501 | }; |
4502 | CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: CodeGen, Loc: S.getBeginLoc()); |
4503 | } |
4504 | |
4505 | void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { |
4506 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4507 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4508 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4509 | |
4510 | const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt(); |
4511 | |
4512 | auto FiniCB = [this](InsertPointTy IP) { |
4513 | OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP); |
4514 | return llvm::Error::success(); |
4515 | }; |
4516 | |
4517 | auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP, |
4518 | InsertPointTy CodeGenIP) { |
4519 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4520 | CGF&: *this, RegionBodyStmt: MasterRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "master" ); |
4521 | return llvm::Error::success(); |
4522 | }; |
4523 | |
4524 | LexicalScope Scope(*this, S.getSourceRange()); |
4525 | EmitStopPoint(S: &S); |
4526 | llvm::OpenMPIRBuilder::InsertPointTy AfterIP = |
4527 | cantFail(ValOrErr: OMPBuilder.createMaster(Loc: Builder, BodyGenCB, FiniCB)); |
4528 | Builder.restoreIP(IP: AfterIP); |
4529 | |
4530 | return; |
4531 | } |
4532 | LexicalScope Scope(*this, S.getSourceRange()); |
4533 | EmitStopPoint(S: &S); |
4534 | emitMaster(CGF&: *this, S); |
4535 | } |
4536 | |
4537 | static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
4538 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4539 | Action.Enter(CGF); |
4540 | CGF.EmitStmt(S: S.getRawStmt()); |
4541 | }; |
4542 | Expr *Filter = nullptr; |
4543 | if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) |
4544 | Filter = FilterClause->getThreadID(); |
4545 | CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, MaskedOpGen: CodeGen, Loc: S.getBeginLoc(), |
4546 | Filter); |
4547 | } |
4548 | |
4549 | void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) { |
4550 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4551 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4552 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4553 | |
4554 | const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt(); |
4555 | const Expr *Filter = nullptr; |
4556 | if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) |
4557 | Filter = FilterClause->getThreadID(); |
4558 | llvm::Value *FilterVal = Filter |
4559 | ? EmitScalarExpr(E: Filter, IgnoreResultAssign: CGM.Int32Ty) |
4560 | : llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/0); |
4561 | |
4562 | auto FiniCB = [this](InsertPointTy IP) { |
4563 | OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP); |
4564 | return llvm::Error::success(); |
4565 | }; |
4566 | |
4567 | auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP, |
4568 | InsertPointTy CodeGenIP) { |
4569 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4570 | CGF&: *this, RegionBodyStmt: MaskedRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "masked" ); |
4571 | return llvm::Error::success(); |
4572 | }; |
4573 | |
4574 | LexicalScope Scope(*this, S.getSourceRange()); |
4575 | EmitStopPoint(S: &S); |
4576 | llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail( |
4577 | ValOrErr: OMPBuilder.createMasked(Loc: Builder, BodyGenCB, FiniCB, Filter: FilterVal)); |
4578 | Builder.restoreIP(IP: AfterIP); |
4579 | |
4580 | return; |
4581 | } |
4582 | LexicalScope Scope(*this, S.getSourceRange()); |
4583 | EmitStopPoint(S: &S); |
4584 | emitMasked(CGF&: *this, S); |
4585 | } |
4586 | |
4587 | void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { |
4588 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4589 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4590 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4591 | |
4592 | const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt(); |
4593 | const Expr *Hint = nullptr; |
4594 | if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) |
4595 | Hint = HintClause->getHint(); |
4596 | |
4597 | // TODO: This is slightly different from what's currently being done in |
4598 | // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything |
4599 | // about typing is final. |
4600 | llvm::Value *HintInst = nullptr; |
4601 | if (Hint) |
4602 | HintInst = |
4603 | Builder.CreateIntCast(V: EmitScalarExpr(E: Hint), DestTy: CGM.Int32Ty, isSigned: false); |
4604 | |
4605 | auto FiniCB = [this](InsertPointTy IP) { |
4606 | OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP); |
4607 | return llvm::Error::success(); |
4608 | }; |
4609 | |
4610 | auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP, |
4611 | InsertPointTy CodeGenIP) { |
4612 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4613 | CGF&: *this, RegionBodyStmt: CriticalRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "critical" ); |
4614 | return llvm::Error::success(); |
4615 | }; |
4616 | |
4617 | LexicalScope Scope(*this, S.getSourceRange()); |
4618 | EmitStopPoint(S: &S); |
4619 | llvm::OpenMPIRBuilder::InsertPointTy AfterIP = |
4620 | cantFail(ValOrErr: OMPBuilder.createCritical(Loc: Builder, BodyGenCB, FiniCB, |
4621 | CriticalName: S.getDirectiveName().getAsString(), |
4622 | HintInst)); |
4623 | Builder.restoreIP(IP: AfterIP); |
4624 | |
4625 | return; |
4626 | } |
4627 | |
4628 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4629 | Action.Enter(CGF); |
4630 | CGF.EmitStmt(S: S.getAssociatedStmt()); |
4631 | }; |
4632 | const Expr *Hint = nullptr; |
4633 | if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) |
4634 | Hint = HintClause->getHint(); |
4635 | LexicalScope Scope(*this, S.getSourceRange()); |
4636 | EmitStopPoint(S: &S); |
4637 | CGM.getOpenMPRuntime().emitCriticalRegion(CGF&: *this, |
4638 | CriticalName: S.getDirectiveName().getAsString(), |
4639 | CriticalOpGen: CodeGen, Loc: S.getBeginLoc(), Hint); |
4640 | } |
4641 | |
4642 | void CodeGenFunction::EmitOMPParallelForDirective( |
4643 | const OMPParallelForDirective &S) { |
4644 | // Emit directive as a combined directive that consists of two implicit |
4645 | // directives: 'parallel' with 'for' directive. |
4646 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4647 | Action.Enter(CGF); |
4648 | emitOMPCopyinClause(CGF, S); |
4649 | (void)emitWorksharingDirective(CGF, S, HasCancel: S.hasCancel()); |
4650 | }; |
4651 | { |
4652 | const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { |
4653 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
4654 | CGCapturedStmtInfo CGSI(CR_OpenMP); |
4655 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); |
4656 | OMPLoopScope LoopScope(CGF, S); |
4657 | return CGF.EmitScalarExpr(E: S.getNumIterations()); |
4658 | }; |
4659 | bool IsInscan = llvm::any_of(Range: S.getClausesOfKind<OMPReductionClause>(), |
4660 | P: [](const OMPReductionClause *C) { |
4661 | return C->getModifier() == OMPC_REDUCTION_inscan; |
4662 | }); |
4663 | if (IsInscan) |
4664 | emitScanBasedDirectiveDecls(CGF&: *this, S, NumIteratorsGen); |
4665 | auto LPCRegion = |
4666 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
4667 | emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_for, CodeGen, |
4668 | CodeGenBoundParameters: emitEmptyBoundParameters); |
4669 | if (IsInscan) |
4670 | emitScanBasedDirectiveFinals(CGF&: *this, S, NumIteratorsGen); |
4671 | } |
4672 | // Check for outer lastprivate conditional update. |
4673 | checkForLastprivateConditionalUpdate(CGF&: *this, S); |
4674 | } |
4675 | |
4676 | void CodeGenFunction::EmitOMPParallelForSimdDirective( |
4677 | const OMPParallelForSimdDirective &S) { |
4678 | // Emit directive as a combined directive that consists of two implicit |
4679 | // directives: 'parallel' with 'for' directive. |
4680 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4681 | Action.Enter(CGF); |
4682 | emitOMPCopyinClause(CGF, S); |
4683 | (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); |
4684 | }; |
4685 | { |
4686 | const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { |
4687 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
4688 | CGCapturedStmtInfo CGSI(CR_OpenMP); |
4689 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); |
4690 | OMPLoopScope LoopScope(CGF, S); |
4691 | return CGF.EmitScalarExpr(E: S.getNumIterations()); |
4692 | }; |
4693 | bool IsInscan = llvm::any_of(Range: S.getClausesOfKind<OMPReductionClause>(), |
4694 | P: [](const OMPReductionClause *C) { |
4695 | return C->getModifier() == OMPC_REDUCTION_inscan; |
4696 | }); |
4697 | if (IsInscan) |
4698 | emitScanBasedDirectiveDecls(CGF&: *this, S, NumIteratorsGen); |
4699 | auto LPCRegion = |
4700 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
4701 | emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_for_simd, CodeGen, |
4702 | CodeGenBoundParameters: emitEmptyBoundParameters); |
4703 | if (IsInscan) |
4704 | emitScanBasedDirectiveFinals(CGF&: *this, S, NumIteratorsGen); |
4705 | } |
4706 | // Check for outer lastprivate conditional update. |
4707 | checkForLastprivateConditionalUpdate(CGF&: *this, S); |
4708 | } |
4709 | |
4710 | void CodeGenFunction::EmitOMPParallelMasterDirective( |
4711 | const OMPParallelMasterDirective &S) { |
4712 | // Emit directive as a combined directive that consists of two implicit |
4713 | // directives: 'parallel' with 'master' directive. |
4714 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4715 | Action.Enter(CGF); |
4716 | OMPPrivateScope PrivateScope(CGF); |
4717 | emitOMPCopyinClause(CGF, S); |
4718 | (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope); |
4719 | CGF.EmitOMPPrivateClause(D: S, PrivateScope); |
4720 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
4721 | (void)PrivateScope.Privatize(); |
4722 | emitMaster(CGF, S); |
4723 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel); |
4724 | }; |
4725 | { |
4726 | auto LPCRegion = |
4727 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
4728 | emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_master, CodeGen, |
4729 | CodeGenBoundParameters: emitEmptyBoundParameters); |
4730 | emitPostUpdateForReductionClause(CGF&: *this, D: S, |
4731 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
4732 | } |
4733 | // Check for outer lastprivate conditional update. |
4734 | checkForLastprivateConditionalUpdate(CGF&: *this, S); |
4735 | } |
4736 | |
4737 | void CodeGenFunction::EmitOMPParallelMaskedDirective( |
4738 | const OMPParallelMaskedDirective &S) { |
4739 | // Emit directive as a combined directive that consists of two implicit |
4740 | // directives: 'parallel' with 'masked' directive. |
4741 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4742 | Action.Enter(CGF); |
4743 | OMPPrivateScope PrivateScope(CGF); |
4744 | emitOMPCopyinClause(CGF, S); |
4745 | (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope); |
4746 | CGF.EmitOMPPrivateClause(D: S, PrivateScope); |
4747 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
4748 | (void)PrivateScope.Privatize(); |
4749 | emitMasked(CGF, S); |
4750 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel); |
4751 | }; |
4752 | { |
4753 | auto LPCRegion = |
4754 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
4755 | emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_masked, CodeGen, |
4756 | CodeGenBoundParameters: emitEmptyBoundParameters); |
4757 | emitPostUpdateForReductionClause(CGF&: *this, D: S, |
4758 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
4759 | } |
4760 | // Check for outer lastprivate conditional update. |
4761 | checkForLastprivateConditionalUpdate(CGF&: *this, S); |
4762 | } |
4763 | |
4764 | void CodeGenFunction::EmitOMPParallelSectionsDirective( |
4765 | const OMPParallelSectionsDirective &S) { |
4766 | // Emit directive as a combined directive that consists of two implicit |
4767 | // directives: 'parallel' with 'sections' directive. |
4768 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4769 | Action.Enter(CGF); |
4770 | emitOMPCopyinClause(CGF, S); |
4771 | CGF.EmitSections(S); |
4772 | }; |
4773 | { |
4774 | auto LPCRegion = |
4775 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
4776 | emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_sections, CodeGen, |
4777 | CodeGenBoundParameters: emitEmptyBoundParameters); |
4778 | } |
4779 | // Check for outer lastprivate conditional update. |
4780 | checkForLastprivateConditionalUpdate(CGF&: *this, S); |
4781 | } |
4782 | |
4783 | namespace { |
4784 | /// Get the list of variables declared in the context of the untied tasks. |
4785 | class CheckVarsEscapingUntiedTaskDeclContext final |
4786 | : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> { |
4787 | llvm::SmallVector<const VarDecl *, 4> PrivateDecls; |
4788 | |
4789 | public: |
4790 | explicit CheckVarsEscapingUntiedTaskDeclContext() = default; |
4791 | ~CheckVarsEscapingUntiedTaskDeclContext() = default; |
4792 | void VisitDeclStmt(const DeclStmt *S) { |
4793 | if (!S) |
4794 | return; |
4795 | // Need to privatize only local vars, static locals can be processed as is. |
4796 | for (const Decl *D : S->decls()) { |
4797 | if (const auto *VD = dyn_cast_or_null<VarDecl>(Val: D)) |
4798 | if (VD->hasLocalStorage()) |
4799 | PrivateDecls.push_back(Elt: VD); |
4800 | } |
4801 | } |
4802 | void VisitOMPExecutableDirective(const OMPExecutableDirective *) {} |
4803 | void VisitCapturedStmt(const CapturedStmt *) {} |
4804 | void VisitLambdaExpr(const LambdaExpr *) {} |
4805 | void VisitBlockExpr(const BlockExpr *) {} |
4806 | void VisitStmt(const Stmt *S) { |
4807 | if (!S) |
4808 | return; |
4809 | for (const Stmt *Child : S->children()) |
4810 | if (Child) |
4811 | Visit(S: Child); |
4812 | } |
4813 | |
4814 | /// Swaps list of vars with the provided one. |
4815 | ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; } |
4816 | }; |
4817 | } // anonymous namespace |
4818 | |
4819 | static void buildDependences(const OMPExecutableDirective &S, |
4820 | OMPTaskDataTy &Data) { |
4821 | |
4822 | // First look for 'omp_all_memory' and add this first. |
4823 | bool OmpAllMemory = false; |
4824 | if (llvm::any_of( |
4825 | Range: S.getClausesOfKind<OMPDependClause>(), P: [](const OMPDependClause *C) { |
4826 | return C->getDependencyKind() == OMPC_DEPEND_outallmemory || |
4827 | C->getDependencyKind() == OMPC_DEPEND_inoutallmemory; |
4828 | })) { |
4829 | OmpAllMemory = true; |
4830 | // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are |
4831 | // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to |
4832 | // simplify. |
4833 | OMPTaskDataTy::DependData &DD = |
4834 | Data.Dependences.emplace_back(Args: OMPC_DEPEND_outallmemory, |
4835 | /*IteratorExpr=*/Args: nullptr); |
4836 | // Add a nullptr Expr to simplify the codegen in emitDependData. |
4837 | DD.DepExprs.push_back(Elt: nullptr); |
4838 | } |
4839 | // Add remaining dependences skipping any 'out' or 'inout' if they are |
4840 | // overridden by 'omp_all_memory'. |
4841 | for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { |
4842 | OpenMPDependClauseKind Kind = C->getDependencyKind(); |
4843 | if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory) |
4844 | continue; |
4845 | if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout)) |
4846 | continue; |
4847 | OMPTaskDataTy::DependData &DD = |
4848 | Data.Dependences.emplace_back(Args: C->getDependencyKind(), Args: C->getModifier()); |
4849 | DD.DepExprs.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
4850 | } |
4851 | } |
4852 | |
4853 | void CodeGenFunction::EmitOMPTaskBasedDirective( |
4854 | const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, |
4855 | const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, |
4856 | OMPTaskDataTy &Data) { |
4857 | // Emit outlined function for task construct. |
4858 | const CapturedStmt *CS = S.getCapturedStmt(RegionKind: CapturedRegion); |
4859 | auto I = CS->getCapturedDecl()->param_begin(); |
4860 | auto PartId = std::next(x: I); |
4861 | auto TaskT = std::next(x: I, n: 4); |
4862 | // Check if the task is final |
4863 | if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { |
4864 | // If the condition constant folds and can be elided, try to avoid emitting |
4865 | // the condition and the dead arm of the if/else. |
4866 | const Expr *Cond = Clause->getCondition(); |
4867 | bool CondConstant; |
4868 | if (ConstantFoldsToSimpleInteger(Cond, Result&: CondConstant)) |
4869 | Data.Final.setInt(CondConstant); |
4870 | else |
4871 | Data.Final.setPointer(EvaluateExprAsBool(E: Cond)); |
4872 | } else { |
4873 | // By default the task is not final. |
4874 | Data.Final.setInt(/*IntVal=*/false); |
4875 | } |
4876 | // Check if the task has 'priority' clause. |
4877 | if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { |
4878 | const Expr *Prio = Clause->getPriority(); |
4879 | Data.Priority.setInt(/*IntVal=*/true); |
4880 | Data.Priority.setPointer(EmitScalarConversion( |
4881 | Src: EmitScalarExpr(E: Prio), SrcTy: Prio->getType(), |
4882 | DstTy: getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), |
4883 | Loc: Prio->getExprLoc())); |
4884 | } |
4885 | // The first function argument for tasks is a thread id, the second one is a |
4886 | // part id (0 for tied tasks, >=0 for untied task). |
4887 | llvm::DenseSet<const VarDecl *> EmittedAsPrivate; |
4888 | // Get list of private variables. |
4889 | for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { |
4890 | auto IRef = C->varlist_begin(); |
4891 | for (const Expr *IInit : C->private_copies()) { |
4892 | const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl()); |
4893 | if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) { |
4894 | Data.PrivateVars.push_back(Elt: *IRef); |
4895 | Data.PrivateCopies.push_back(Elt: IInit); |
4896 | } |
4897 | ++IRef; |
4898 | } |
4899 | } |
4900 | EmittedAsPrivate.clear(); |
4901 | // Get list of firstprivate variables. |
4902 | for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { |
4903 | auto IRef = C->varlist_begin(); |
4904 | auto IElemInitRef = C->inits().begin(); |
4905 | for (const Expr *IInit : C->private_copies()) { |
4906 | const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl()); |
4907 | if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) { |
4908 | Data.FirstprivateVars.push_back(Elt: *IRef); |
4909 | Data.FirstprivateCopies.push_back(Elt: IInit); |
4910 | Data.FirstprivateInits.push_back(Elt: *IElemInitRef); |
4911 | } |
4912 | ++IRef; |
4913 | ++IElemInitRef; |
4914 | } |
4915 | } |
4916 | // Get list of lastprivate variables (for taskloops). |
4917 | llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; |
4918 | for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { |
4919 | auto IRef = C->varlist_begin(); |
4920 | auto ID = C->destination_exprs().begin(); |
4921 | for (const Expr *IInit : C->private_copies()) { |
4922 | const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl()); |
4923 | if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) { |
4924 | Data.LastprivateVars.push_back(Elt: *IRef); |
4925 | Data.LastprivateCopies.push_back(Elt: IInit); |
4926 | } |
4927 | LastprivateDstsOrigs.insert( |
4928 | KV: std::make_pair(x: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ID)->getDecl()), |
4929 | y: cast<DeclRefExpr>(Val: *IRef))); |
4930 | ++IRef; |
4931 | ++ID; |
4932 | } |
4933 | } |
4934 | SmallVector<const Expr *, 4> LHSs; |
4935 | SmallVector<const Expr *, 4> RHSs; |
4936 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
4937 | Data.ReductionVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
4938 | Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
4939 | Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
4940 | Data.ReductionOps.append(in_start: C->reduction_ops().begin(), |
4941 | in_end: C->reduction_ops().end()); |
4942 | LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end()); |
4943 | RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end()); |
4944 | } |
4945 | Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( |
4946 | CGF&: *this, Loc: S.getBeginLoc(), LHSExprs: LHSs, RHSExprs: RHSs, Data); |
4947 | // Build list of dependences. |
4948 | buildDependences(S, Data); |
4949 | // Get list of local vars for untied tasks. |
4950 | if (!Data.Tied) { |
4951 | CheckVarsEscapingUntiedTaskDeclContext Checker; |
4952 | Checker.Visit(S: S.getInnermostCapturedStmt()->getCapturedStmt()); |
4953 | Data.PrivateLocals.append(in_start: Checker.getPrivateDecls().begin(), |
4954 | in_end: Checker.getPrivateDecls().end()); |
4955 | } |
4956 | auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, |
4957 | CapturedRegion](CodeGenFunction &CGF, |
4958 | PrePostActionTy &Action) { |
4959 | llvm::MapVector<CanonicalDeclPtr<const VarDecl>, |
4960 | std::pair<Address, Address>> |
4961 | UntiedLocalVars; |
4962 | // Set proper addresses for generated private copies. |
4963 | OMPPrivateScope Scope(CGF); |
4964 | // Generate debug info for variables present in shared clause. |
4965 | if (auto *DI = CGF.getDebugInfo()) { |
4966 | llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields = |
4967 | CGF.CapturedStmtInfo->getCaptureFields(); |
4968 | llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue(); |
4969 | if (CaptureFields.size() && ContextValue) { |
4970 | unsigned CharWidth = CGF.getContext().getCharWidth(); |
4971 | // The shared variables are packed together as members of structure. |
4972 | // So the address of each shared variable can be computed by adding |
4973 | // offset of it (within record) to the base address of record. For each |
4974 | // shared variable, debug intrinsic llvm.dbg.declare is generated with |
4975 | // appropriate expressions (DIExpression). |
4976 | // Ex: |
4977 | // %12 = load %struct.anon*, %struct.anon** %__context.addr.i |
4978 | // call void @llvm.dbg.declare(metadata %struct.anon* %12, |
4979 | // metadata !svar1, |
4980 | // metadata !DIExpression(DW_OP_deref)) |
4981 | // call void @llvm.dbg.declare(metadata %struct.anon* %12, |
4982 | // metadata !svar2, |
4983 | // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref)) |
4984 | for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) { |
4985 | const VarDecl *SharedVar = It->first; |
4986 | RecordDecl *CaptureRecord = It->second->getParent(); |
4987 | const ASTRecordLayout &Layout = |
4988 | CGF.getContext().getASTRecordLayout(D: CaptureRecord); |
4989 | unsigned Offset = |
4990 | Layout.getFieldOffset(FieldNo: It->second->getFieldIndex()) / CharWidth; |
4991 | if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo()) |
4992 | (void)DI->EmitDeclareOfAutoVariable(Decl: SharedVar, AI: ContextValue, |
4993 | Builder&: CGF.Builder, UsePointerValue: false); |
4994 | // Get the call dbg.declare instruction we just created and update |
4995 | // its DIExpression to add offset to base address. |
4996 | auto UpdateExpr = [](llvm::LLVMContext &Ctx, auto *Declare, |
4997 | unsigned Offset) { |
4998 | SmallVector<uint64_t, 8> Ops; |
4999 | // Add offset to the base address if non zero. |
5000 | if (Offset) { |
5001 | Ops.push_back(Elt: llvm::dwarf::DW_OP_plus_uconst); |
5002 | Ops.push_back(Elt: Offset); |
5003 | } |
5004 | Ops.push_back(Elt: llvm::dwarf::DW_OP_deref); |
5005 | Declare->setExpression(llvm::DIExpression::get(Context&: Ctx, Elements: Ops)); |
5006 | }; |
5007 | llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back(); |
5008 | if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(Val: &Last)) |
5009 | UpdateExpr(DDI->getContext(), DDI, Offset); |
5010 | // If we're emitting using the new debug info format into a block |
5011 | // without a terminator, the record will be "trailing". |
5012 | assert(!Last.isTerminator() && "unexpected terminator" ); |
5013 | if (auto *Marker = |
5014 | CGF.Builder.GetInsertBlock()->getTrailingDbgRecords()) { |
5015 | for (llvm::DbgVariableRecord &DVR : llvm::reverse( |
5016 | C: llvm::filterDbgVars(R: Marker->getDbgRecordRange()))) { |
5017 | UpdateExpr(Last.getContext(), &DVR, Offset); |
5018 | break; |
5019 | } |
5020 | } |
5021 | } |
5022 | } |
5023 | } |
5024 | llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs; |
5025 | if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || |
5026 | !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) { |
5027 | enum { PrivatesParam = 2, CopyFnParam = 3 }; |
5028 | llvm::Value *CopyFn = CGF.Builder.CreateLoad( |
5029 | Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: CopyFnParam))); |
5030 | llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar( |
5031 | VD: CS->getCapturedDecl()->getParam(i: PrivatesParam))); |
5032 | // Map privates. |
5033 | llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; |
5034 | llvm::SmallVector<llvm::Value *, 16> CallArgs; |
5035 | llvm::SmallVector<llvm::Type *, 4> ParamTypes; |
5036 | CallArgs.push_back(Elt: PrivatesPtr); |
5037 | ParamTypes.push_back(Elt: PrivatesPtr->getType()); |
5038 | for (const Expr *E : Data.PrivateVars) { |
5039 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
5040 | RawAddress PrivatePtr = CGF.CreateMemTemp( |
5041 | T: CGF.getContext().getPointerType(T: E->getType()), Name: ".priv.ptr.addr" ); |
5042 | PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr); |
5043 | CallArgs.push_back(Elt: PrivatePtr.getPointer()); |
5044 | ParamTypes.push_back(Elt: PrivatePtr.getType()); |
5045 | } |
5046 | for (const Expr *E : Data.FirstprivateVars) { |
5047 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
5048 | RawAddress PrivatePtr = |
5049 | CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()), |
5050 | Name: ".firstpriv.ptr.addr" ); |
5051 | PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr); |
5052 | FirstprivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr); |
5053 | CallArgs.push_back(Elt: PrivatePtr.getPointer()); |
5054 | ParamTypes.push_back(Elt: PrivatePtr.getType()); |
5055 | } |
5056 | for (const Expr *E : Data.LastprivateVars) { |
5057 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
5058 | RawAddress PrivatePtr = |
5059 | CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()), |
5060 | Name: ".lastpriv.ptr.addr" ); |
5061 | PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr); |
5062 | CallArgs.push_back(Elt: PrivatePtr.getPointer()); |
5063 | ParamTypes.push_back(Elt: PrivatePtr.getType()); |
5064 | } |
5065 | for (const VarDecl *VD : Data.PrivateLocals) { |
5066 | QualType Ty = VD->getType().getNonReferenceType(); |
5067 | if (VD->getType()->isLValueReferenceType()) |
5068 | Ty = CGF.getContext().getPointerType(T: Ty); |
5069 | if (isAllocatableDecl(VD)) |
5070 | Ty = CGF.getContext().getPointerType(T: Ty); |
5071 | RawAddress PrivatePtr = CGF.CreateMemTemp( |
5072 | T: CGF.getContext().getPointerType(T: Ty), Name: ".local.ptr.addr" ); |
5073 | auto Result = UntiedLocalVars.insert( |
5074 | KV: std::make_pair(x&: VD, y: std::make_pair(x&: PrivatePtr, y: Address::invalid()))); |
5075 | // If key exists update in place. |
5076 | if (Result.second == false) |
5077 | *Result.first = std::make_pair( |
5078 | x&: VD, y: std::make_pair(x&: PrivatePtr, y: Address::invalid())); |
5079 | CallArgs.push_back(Elt: PrivatePtr.getPointer()); |
5080 | ParamTypes.push_back(Elt: PrivatePtr.getType()); |
5081 | } |
5082 | auto *CopyFnTy = llvm::FunctionType::get(Result: CGF.Builder.getVoidTy(), |
5083 | Params: ParamTypes, /*isVarArg=*/false); |
5084 | CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( |
5085 | CGF, Loc: S.getBeginLoc(), OutlinedFn: {CopyFnTy, CopyFn}, Args: CallArgs); |
5086 | for (const auto &Pair : LastprivateDstsOrigs) { |
5087 | const auto *OrigVD = cast<VarDecl>(Val: Pair.second->getDecl()); |
5088 | DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), |
5089 | /*RefersToEnclosingVariableOrCapture=*/ |
5090 | CGF.CapturedStmtInfo->lookup(VD: OrigVD) != nullptr, |
5091 | Pair.second->getType(), VK_LValue, |
5092 | Pair.second->getExprLoc()); |
5093 | Scope.addPrivate(LocalVD: Pair.first, Addr: CGF.EmitLValue(E: &DRE).getAddress()); |
5094 | } |
5095 | for (const auto &Pair : PrivatePtrs) { |
5096 | Address Replacement = Address( |
5097 | CGF.Builder.CreateLoad(Addr: Pair.second), |
5098 | CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()), |
5099 | CGF.getContext().getDeclAlign(D: Pair.first)); |
5100 | Scope.addPrivate(LocalVD: Pair.first, Addr: Replacement); |
5101 | if (auto *DI = CGF.getDebugInfo()) |
5102 | if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo()) |
5103 | (void)DI->EmitDeclareOfAutoVariable( |
5104 | Decl: Pair.first, AI: Pair.second.getBasePointer(), Builder&: CGF.Builder, |
5105 | /*UsePointerValue*/ true); |
5106 | } |
5107 | // Adjust mapping for internal locals by mapping actual memory instead of |
5108 | // a pointer to this memory. |
5109 | for (auto &Pair : UntiedLocalVars) { |
5110 | QualType VDType = Pair.first->getType().getNonReferenceType(); |
5111 | if (Pair.first->getType()->isLValueReferenceType()) |
5112 | VDType = CGF.getContext().getPointerType(T: VDType); |
5113 | if (isAllocatableDecl(VD: Pair.first)) { |
5114 | llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Pair.second.first); |
5115 | Address Replacement( |
5116 | Ptr, |
5117 | CGF.ConvertTypeForMem(T: CGF.getContext().getPointerType(T: VDType)), |
5118 | CGF.getPointerAlign()); |
5119 | Pair.second.first = Replacement; |
5120 | Ptr = CGF.Builder.CreateLoad(Addr: Replacement); |
5121 | Replacement = Address(Ptr, CGF.ConvertTypeForMem(T: VDType), |
5122 | CGF.getContext().getDeclAlign(D: Pair.first)); |
5123 | Pair.second.second = Replacement; |
5124 | } else { |
5125 | llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Pair.second.first); |
5126 | Address Replacement(Ptr, CGF.ConvertTypeForMem(T: VDType), |
5127 | CGF.getContext().getDeclAlign(D: Pair.first)); |
5128 | Pair.second.first = Replacement; |
5129 | } |
5130 | } |
5131 | } |
5132 | if (Data.Reductions) { |
5133 | OMPPrivateScope FirstprivateScope(CGF); |
5134 | for (const auto &Pair : FirstprivatePtrs) { |
5135 | Address Replacement( |
5136 | CGF.Builder.CreateLoad(Addr: Pair.second), |
5137 | CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()), |
5138 | CGF.getContext().getDeclAlign(D: Pair.first)); |
5139 | FirstprivateScope.addPrivate(LocalVD: Pair.first, Addr: Replacement); |
5140 | } |
5141 | (void)FirstprivateScope.Privatize(); |
5142 | OMPLexicalScope LexScope(CGF, S, CapturedRegion); |
5143 | ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, |
5144 | Data.ReductionCopies, Data.ReductionOps); |
5145 | llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( |
5146 | Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: 9))); |
5147 | for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { |
5148 | RedCG.emitSharedOrigLValue(CGF, N: Cnt); |
5149 | RedCG.emitAggregateType(CGF, N: Cnt); |
5150 | // FIXME: This must removed once the runtime library is fixed. |
5151 | // Emit required threadprivate variables for |
5152 | // initializer/combiner/finalizer. |
5153 | CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(), |
5154 | RCG&: RedCG, N: Cnt); |
5155 | Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( |
5156 | CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt)); |
5157 | Replacement = Address( |
5158 | CGF.EmitScalarConversion(Src: Replacement.emitRawPointer(CGF), |
5159 | SrcTy: CGF.getContext().VoidPtrTy, |
5160 | DstTy: CGF.getContext().getPointerType( |
5161 | T: Data.ReductionCopies[Cnt]->getType()), |
5162 | Loc: Data.ReductionCopies[Cnt]->getExprLoc()), |
5163 | CGF.ConvertTypeForMem(T: Data.ReductionCopies[Cnt]->getType()), |
5164 | Replacement.getAlignment()); |
5165 | Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement); |
5166 | Scope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement); |
5167 | } |
5168 | } |
5169 | // Privatize all private variables except for in_reduction items. |
5170 | (void)Scope.Privatize(); |
5171 | SmallVector<const Expr *, 4> InRedVars; |
5172 | SmallVector<const Expr *, 4> InRedPrivs; |
5173 | SmallVector<const Expr *, 4> InRedOps; |
5174 | SmallVector<const Expr *, 4> TaskgroupDescriptors; |
5175 | for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { |
5176 | auto IPriv = C->privates().begin(); |
5177 | auto IRed = C->reduction_ops().begin(); |
5178 | auto ITD = C->taskgroup_descriptors().begin(); |
5179 | for (const Expr *Ref : C->varlist()) { |
5180 | InRedVars.emplace_back(Args&: Ref); |
5181 | InRedPrivs.emplace_back(Args: *IPriv); |
5182 | InRedOps.emplace_back(Args: *IRed); |
5183 | TaskgroupDescriptors.emplace_back(Args: *ITD); |
5184 | std::advance(i&: IPriv, n: 1); |
5185 | std::advance(i&: IRed, n: 1); |
5186 | std::advance(i&: ITD, n: 1); |
5187 | } |
5188 | } |
5189 | // Privatize in_reduction items here, because taskgroup descriptors must be |
5190 | // privatized earlier. |
5191 | OMPPrivateScope InRedScope(CGF); |
5192 | if (!InRedVars.empty()) { |
5193 | ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); |
5194 | for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { |
5195 | RedCG.emitSharedOrigLValue(CGF, N: Cnt); |
5196 | RedCG.emitAggregateType(CGF, N: Cnt); |
5197 | // The taskgroup descriptor variable is always implicit firstprivate and |
5198 | // privatized already during processing of the firstprivates. |
5199 | // FIXME: This must removed once the runtime library is fixed. |
5200 | // Emit required threadprivate variables for |
5201 | // initializer/combiner/finalizer. |
5202 | CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(), |
5203 | RCG&: RedCG, N: Cnt); |
5204 | llvm::Value *ReductionsPtr; |
5205 | if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { |
5206 | ReductionsPtr = CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: TRExpr), |
5207 | Loc: TRExpr->getExprLoc()); |
5208 | } else { |
5209 | ReductionsPtr = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy); |
5210 | } |
5211 | Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( |
5212 | CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt)); |
5213 | Replacement = Address( |
5214 | CGF.EmitScalarConversion( |
5215 | Src: Replacement.emitRawPointer(CGF), SrcTy: CGF.getContext().VoidPtrTy, |
5216 | DstTy: CGF.getContext().getPointerType(T: InRedPrivs[Cnt]->getType()), |
5217 | Loc: InRedPrivs[Cnt]->getExprLoc()), |
5218 | CGF.ConvertTypeForMem(T: InRedPrivs[Cnt]->getType()), |
5219 | Replacement.getAlignment()); |
5220 | Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement); |
5221 | InRedScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement); |
5222 | } |
5223 | } |
5224 | (void)InRedScope.Privatize(); |
5225 | |
5226 | CGOpenMPRuntime::UntiedTaskLocalDeclsRAII (CGF, |
5227 | UntiedLocalVars); |
5228 | Action.Enter(CGF); |
5229 | BodyGen(CGF); |
5230 | }; |
5231 | OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); |
5232 | llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( |
5233 | D: S, ThreadIDVar: *I, PartIDVar: *PartId, TaskTVar: *TaskT, InnermostKind: EKind, CodeGen, Tied: Data.Tied, NumberOfParts&: Data.NumberOfParts); |
5234 | OMPLexicalScope Scope(*this, S, std::nullopt, |
5235 | !isOpenMPParallelDirective(DKind: EKind) && |
5236 | !isOpenMPSimdDirective(DKind: EKind)); |
5237 | TaskGen(*this, OutlinedFn, Data); |
5238 | } |
5239 | |
5240 | static ImplicitParamDecl * |
5241 | createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, |
5242 | QualType Ty, CapturedDecl *CD, |
5243 | SourceLocation Loc) { |
5244 | auto *OrigVD = ImplicitParamDecl::Create(C, DC: CD, IdLoc: Loc, /*Id=*/nullptr, T: Ty, |
5245 | ParamKind: ImplicitParamKind::Other); |
5246 | auto *OrigRef = DeclRefExpr::Create( |
5247 | Context: C, QualifierLoc: NestedNameSpecifierLoc(), TemplateKWLoc: SourceLocation(), D: OrigVD, |
5248 | /*RefersToEnclosingVariableOrCapture=*/false, NameLoc: Loc, T: Ty, VK: VK_LValue); |
5249 | auto *PrivateVD = ImplicitParamDecl::Create(C, DC: CD, IdLoc: Loc, /*Id=*/nullptr, T: Ty, |
5250 | ParamKind: ImplicitParamKind::Other); |
5251 | auto *PrivateRef = DeclRefExpr::Create( |
5252 | Context: C, QualifierLoc: NestedNameSpecifierLoc(), TemplateKWLoc: SourceLocation(), D: PrivateVD, |
5253 | /*RefersToEnclosingVariableOrCapture=*/false, NameLoc: Loc, T: Ty, VK: VK_LValue); |
5254 | QualType ElemType = C.getBaseElementType(QT: Ty); |
5255 | auto *InitVD = ImplicitParamDecl::Create(C, DC: CD, IdLoc: Loc, /*Id=*/nullptr, T: ElemType, |
5256 | ParamKind: ImplicitParamKind::Other); |
5257 | auto *InitRef = DeclRefExpr::Create( |
5258 | Context: C, QualifierLoc: NestedNameSpecifierLoc(), TemplateKWLoc: SourceLocation(), D: InitVD, |
5259 | /*RefersToEnclosingVariableOrCapture=*/false, NameLoc: Loc, T: ElemType, VK: VK_LValue); |
5260 | PrivateVD->setInitStyle(VarDecl::CInit); |
5261 | PrivateVD->setInit(ImplicitCastExpr::Create(Context: C, T: ElemType, Kind: CK_LValueToRValue, |
5262 | Operand: InitRef, /*BasePath=*/nullptr, |
5263 | Cat: VK_PRValue, FPO: FPOptionsOverride())); |
5264 | Data.FirstprivateVars.emplace_back(Args&: OrigRef); |
5265 | Data.FirstprivateCopies.emplace_back(Args&: PrivateRef); |
5266 | Data.FirstprivateInits.emplace_back(Args&: InitRef); |
5267 | return OrigVD; |
5268 | } |
5269 | |
5270 | void CodeGenFunction::EmitOMPTargetTaskBasedDirective( |
5271 | const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, |
5272 | OMPTargetDataInfo &InputInfo) { |
5273 | // Emit outlined function for task construct. |
5274 | const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_task); |
5275 | Address CapturedStruct = GenerateCapturedStmtArgument(S: *CS); |
5276 | QualType SharedsTy = getContext().getRecordType(Decl: CS->getCapturedRecordDecl()); |
5277 | auto I = CS->getCapturedDecl()->param_begin(); |
5278 | auto PartId = std::next(x: I); |
5279 | auto TaskT = std::next(x: I, n: 4); |
5280 | OMPTaskDataTy Data; |
5281 | // The task is not final. |
5282 | Data.Final.setInt(/*IntVal=*/false); |
5283 | // Get list of firstprivate variables. |
5284 | for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { |
5285 | auto IRef = C->varlist_begin(); |
5286 | auto IElemInitRef = C->inits().begin(); |
5287 | for (auto *IInit : C->private_copies()) { |
5288 | Data.FirstprivateVars.push_back(Elt: *IRef); |
5289 | Data.FirstprivateCopies.push_back(Elt: IInit); |
5290 | Data.FirstprivateInits.push_back(Elt: *IElemInitRef); |
5291 | ++IRef; |
5292 | ++IElemInitRef; |
5293 | } |
5294 | } |
5295 | SmallVector<const Expr *, 4> LHSs; |
5296 | SmallVector<const Expr *, 4> RHSs; |
5297 | for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { |
5298 | Data.ReductionVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
5299 | Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
5300 | Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
5301 | Data.ReductionOps.append(in_start: C->reduction_ops().begin(), |
5302 | in_end: C->reduction_ops().end()); |
5303 | LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end()); |
5304 | RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end()); |
5305 | } |
5306 | OMPPrivateScope TargetScope(*this); |
5307 | VarDecl *BPVD = nullptr; |
5308 | VarDecl *PVD = nullptr; |
5309 | VarDecl *SVD = nullptr; |
5310 | VarDecl *MVD = nullptr; |
5311 | if (InputInfo.NumberOfTargetItems > 0) { |
5312 | auto *CD = CapturedDecl::Create( |
5313 | C&: getContext(), DC: getContext().getTranslationUnitDecl(), /*NumParams=*/0); |
5314 | llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); |
5315 | QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType( |
5316 | EltTy: getContext().VoidPtrTy, ArySize: ArrSize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, |
5317 | /*IndexTypeQuals=*/0); |
5318 | BPVD = createImplicitFirstprivateForType( |
5319 | C&: getContext(), Data, Ty: BaseAndPointerAndMapperType, CD, Loc: S.getBeginLoc()); |
5320 | PVD = createImplicitFirstprivateForType( |
5321 | C&: getContext(), Data, Ty: BaseAndPointerAndMapperType, CD, Loc: S.getBeginLoc()); |
5322 | QualType SizesType = getContext().getConstantArrayType( |
5323 | EltTy: getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), |
5324 | ArySize: ArrSize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, |
5325 | /*IndexTypeQuals=*/0); |
5326 | SVD = createImplicitFirstprivateForType(C&: getContext(), Data, Ty: SizesType, CD, |
5327 | Loc: S.getBeginLoc()); |
5328 | TargetScope.addPrivate(LocalVD: BPVD, Addr: InputInfo.BasePointersArray); |
5329 | TargetScope.addPrivate(LocalVD: PVD, Addr: InputInfo.PointersArray); |
5330 | TargetScope.addPrivate(LocalVD: SVD, Addr: InputInfo.SizesArray); |
5331 | // If there is no user-defined mapper, the mapper array will be nullptr. In |
5332 | // this case, we don't need to privatize it. |
5333 | if (!isa_and_nonnull<llvm::ConstantPointerNull>( |
5334 | Val: InputInfo.MappersArray.emitRawPointer(CGF&: *this))) { |
5335 | MVD = createImplicitFirstprivateForType( |
5336 | C&: getContext(), Data, Ty: BaseAndPointerAndMapperType, CD, Loc: S.getBeginLoc()); |
5337 | TargetScope.addPrivate(LocalVD: MVD, Addr: InputInfo.MappersArray); |
5338 | } |
5339 | } |
5340 | (void)TargetScope.Privatize(); |
5341 | buildDependences(S, Data); |
5342 | OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); |
5343 | auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD, EKind, |
5344 | &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { |
5345 | // Set proper addresses for generated private copies. |
5346 | OMPPrivateScope Scope(CGF); |
5347 | if (!Data.FirstprivateVars.empty()) { |
5348 | enum { PrivatesParam = 2, CopyFnParam = 3 }; |
5349 | llvm::Value *CopyFn = CGF.Builder.CreateLoad( |
5350 | Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: CopyFnParam))); |
5351 | llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar( |
5352 | VD: CS->getCapturedDecl()->getParam(i: PrivatesParam))); |
5353 | // Map privates. |
5354 | llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; |
5355 | llvm::SmallVector<llvm::Value *, 16> CallArgs; |
5356 | llvm::SmallVector<llvm::Type *, 4> ParamTypes; |
5357 | CallArgs.push_back(Elt: PrivatesPtr); |
5358 | ParamTypes.push_back(Elt: PrivatesPtr->getType()); |
5359 | for (const Expr *E : Data.FirstprivateVars) { |
5360 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
5361 | RawAddress PrivatePtr = |
5362 | CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()), |
5363 | Name: ".firstpriv.ptr.addr" ); |
5364 | PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr); |
5365 | CallArgs.push_back(Elt: PrivatePtr.getPointer()); |
5366 | ParamTypes.push_back(Elt: PrivatePtr.getType()); |
5367 | } |
5368 | auto *CopyFnTy = llvm::FunctionType::get(Result: CGF.Builder.getVoidTy(), |
5369 | Params: ParamTypes, /*isVarArg=*/false); |
5370 | CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( |
5371 | CGF, Loc: S.getBeginLoc(), OutlinedFn: {CopyFnTy, CopyFn}, Args: CallArgs); |
5372 | for (const auto &Pair : PrivatePtrs) { |
5373 | Address Replacement( |
5374 | CGF.Builder.CreateLoad(Addr: Pair.second), |
5375 | CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()), |
5376 | CGF.getContext().getDeclAlign(D: Pair.first)); |
5377 | Scope.addPrivate(LocalVD: Pair.first, Addr: Replacement); |
5378 | } |
5379 | } |
5380 | CGF.processInReduction(S, Data, CGF, CS, Scope); |
5381 | if (InputInfo.NumberOfTargetItems > 0) { |
5382 | InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( |
5383 | Addr: CGF.GetAddrOfLocalVar(VD: BPVD), /*Index=*/0); |
5384 | InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( |
5385 | Addr: CGF.GetAddrOfLocalVar(VD: PVD), /*Index=*/0); |
5386 | InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( |
5387 | Addr: CGF.GetAddrOfLocalVar(VD: SVD), /*Index=*/0); |
5388 | // If MVD is nullptr, the mapper array is not privatized |
5389 | if (MVD) |
5390 | InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP( |
5391 | Addr: CGF.GetAddrOfLocalVar(VD: MVD), /*Index=*/0); |
5392 | } |
5393 | |
5394 | Action.Enter(CGF); |
5395 | OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); |
5396 | auto *TL = S.getSingleClause<OMPThreadLimitClause>(); |
5397 | if (CGF.CGM.getLangOpts().OpenMP >= 51 && |
5398 | needsTaskBasedThreadLimit(DKind: EKind) && TL) { |
5399 | // Emit __kmpc_set_thread_limit() to set the thread_limit for the task |
5400 | // enclosing this target region. This will indirectly set the thread_limit |
5401 | // for every applicable construct within target region. |
5402 | CGF.CGM.getOpenMPRuntime().emitThreadLimitClause( |
5403 | CGF, ThreadLimit: TL->getThreadLimit().front(), Loc: S.getBeginLoc()); |
5404 | } |
5405 | BodyGen(CGF); |
5406 | }; |
5407 | llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( |
5408 | D: S, ThreadIDVar: *I, PartIDVar: *PartId, TaskTVar: *TaskT, InnermostKind: EKind, CodeGen, /*Tied=*/true, |
5409 | NumberOfParts&: Data.NumberOfParts); |
5410 | llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); |
5411 | IntegerLiteral IfCond(getContext(), TrueOrFalse, |
5412 | getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/0), |
5413 | SourceLocation()); |
5414 | CGM.getOpenMPRuntime().emitTaskCall(CGF&: *this, Loc: S.getBeginLoc(), D: S, TaskFunction: OutlinedFn, |
5415 | SharedsTy, Shareds: CapturedStruct, IfCond: &IfCond, Data); |
5416 | } |
5417 | |
5418 | void CodeGenFunction::processInReduction(const OMPExecutableDirective &S, |
5419 | OMPTaskDataTy &Data, |
5420 | CodeGenFunction &CGF, |
5421 | const CapturedStmt *CS, |
5422 | OMPPrivateScope &Scope) { |
5423 | OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); |
5424 | if (Data.Reductions) { |
5425 | OpenMPDirectiveKind CapturedRegion = EKind; |
5426 | OMPLexicalScope LexScope(CGF, S, CapturedRegion); |
5427 | ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, |
5428 | Data.ReductionCopies, Data.ReductionOps); |
5429 | llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( |
5430 | Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: 4))); |
5431 | for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { |
5432 | RedCG.emitSharedOrigLValue(CGF, N: Cnt); |
5433 | RedCG.emitAggregateType(CGF, N: Cnt); |
5434 | // FIXME: This must removed once the runtime library is fixed. |
5435 | // Emit required threadprivate variables for |
5436 | // initializer/combiner/finalizer. |
5437 | CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(), |
5438 | RCG&: RedCG, N: Cnt); |
5439 | Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( |
5440 | CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt)); |
5441 | Replacement = Address( |
5442 | CGF.EmitScalarConversion(Src: Replacement.emitRawPointer(CGF), |
5443 | SrcTy: CGF.getContext().VoidPtrTy, |
5444 | DstTy: CGF.getContext().getPointerType( |
5445 | T: Data.ReductionCopies[Cnt]->getType()), |
5446 | Loc: Data.ReductionCopies[Cnt]->getExprLoc()), |
5447 | CGF.ConvertTypeForMem(T: Data.ReductionCopies[Cnt]->getType()), |
5448 | Replacement.getAlignment()); |
5449 | Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement); |
5450 | Scope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement); |
5451 | } |
5452 | } |
5453 | (void)Scope.Privatize(); |
5454 | SmallVector<const Expr *, 4> InRedVars; |
5455 | SmallVector<const Expr *, 4> InRedPrivs; |
5456 | SmallVector<const Expr *, 4> InRedOps; |
5457 | SmallVector<const Expr *, 4> TaskgroupDescriptors; |
5458 | for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { |
5459 | auto IPriv = C->privates().begin(); |
5460 | auto IRed = C->reduction_ops().begin(); |
5461 | auto ITD = C->taskgroup_descriptors().begin(); |
5462 | for (const Expr *Ref : C->varlist()) { |
5463 | InRedVars.emplace_back(Args&: Ref); |
5464 | InRedPrivs.emplace_back(Args: *IPriv); |
5465 | InRedOps.emplace_back(Args: *IRed); |
5466 | TaskgroupDescriptors.emplace_back(Args: *ITD); |
5467 | std::advance(i&: IPriv, n: 1); |
5468 | std::advance(i&: IRed, n: 1); |
5469 | std::advance(i&: ITD, n: 1); |
5470 | } |
5471 | } |
5472 | OMPPrivateScope InRedScope(CGF); |
5473 | if (!InRedVars.empty()) { |
5474 | ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); |
5475 | for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { |
5476 | RedCG.emitSharedOrigLValue(CGF, N: Cnt); |
5477 | RedCG.emitAggregateType(CGF, N: Cnt); |
5478 | // FIXME: This must removed once the runtime library is fixed. |
5479 | // Emit required threadprivate variables for |
5480 | // initializer/combiner/finalizer. |
5481 | CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(), |
5482 | RCG&: RedCG, N: Cnt); |
5483 | llvm::Value *ReductionsPtr; |
5484 | if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { |
5485 | ReductionsPtr = |
5486 | CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: TRExpr), Loc: TRExpr->getExprLoc()); |
5487 | } else { |
5488 | ReductionsPtr = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy); |
5489 | } |
5490 | Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( |
5491 | CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt)); |
5492 | Replacement = Address( |
5493 | CGF.EmitScalarConversion( |
5494 | Src: Replacement.emitRawPointer(CGF), SrcTy: CGF.getContext().VoidPtrTy, |
5495 | DstTy: CGF.getContext().getPointerType(T: InRedPrivs[Cnt]->getType()), |
5496 | Loc: InRedPrivs[Cnt]->getExprLoc()), |
5497 | CGF.ConvertTypeForMem(T: InRedPrivs[Cnt]->getType()), |
5498 | Replacement.getAlignment()); |
5499 | Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement); |
5500 | InRedScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement); |
5501 | } |
5502 | } |
5503 | (void)InRedScope.Privatize(); |
5504 | } |
5505 | |
5506 | void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { |
5507 | // Emit outlined function for task construct. |
5508 | const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_task); |
5509 | Address CapturedStruct = GenerateCapturedStmtArgument(S: *CS); |
5510 | QualType SharedsTy = getContext().getRecordType(Decl: CS->getCapturedRecordDecl()); |
5511 | const Expr *IfCond = nullptr; |
5512 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
5513 | if (C->getNameModifier() == OMPD_unknown || |
5514 | C->getNameModifier() == OMPD_task) { |
5515 | IfCond = C->getCondition(); |
5516 | break; |
5517 | } |
5518 | } |
5519 | |
5520 | OMPTaskDataTy Data; |
5521 | // Check if we should emit tied or untied task. |
5522 | Data.Tied = !S.getSingleClause<OMPUntiedClause>(); |
5523 | auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { |
5524 | CGF.EmitStmt(S: CS->getCapturedStmt()); |
5525 | }; |
5526 | auto &&TaskGen = [&S, SharedsTy, CapturedStruct, |
5527 | IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, |
5528 | const OMPTaskDataTy &Data) { |
5529 | CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, Loc: S.getBeginLoc(), D: S, TaskFunction: OutlinedFn, |
5530 | SharedsTy, Shareds: CapturedStruct, IfCond, |
5531 | Data); |
5532 | }; |
5533 | auto LPCRegion = |
5534 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
5535 | EmitOMPTaskBasedDirective(S, CapturedRegion: OMPD_task, BodyGen, TaskGen, Data); |
5536 | } |
5537 | |
5538 | void CodeGenFunction::EmitOMPTaskyieldDirective( |
5539 | const OMPTaskyieldDirective &S) { |
5540 | CGM.getOpenMPRuntime().emitTaskyieldCall(CGF&: *this, Loc: S.getBeginLoc()); |
5541 | } |
5542 | |
5543 | void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) { |
5544 | const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>(); |
5545 | Expr *ME = MC ? MC->getMessageString() : nullptr; |
5546 | const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>(); |
5547 | bool IsFatal = false; |
5548 | if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal) |
5549 | IsFatal = true; |
5550 | CGM.getOpenMPRuntime().emitErrorCall(CGF&: *this, Loc: S.getBeginLoc(), ME, IsFatal); |
5551 | } |
5552 | |
5553 | void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { |
5554 | CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_barrier); |
5555 | } |
5556 | |
5557 | void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { |
5558 | OMPTaskDataTy Data; |
5559 | // Build list of dependences |
5560 | buildDependences(S, Data); |
5561 | Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>(); |
5562 | CGM.getOpenMPRuntime().emitTaskwaitCall(CGF&: *this, Loc: S.getBeginLoc(), Data); |
5563 | } |
5564 | |
5565 | static bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) { |
5566 | return T.clauses().empty(); |
5567 | } |
5568 | |
5569 | void CodeGenFunction::EmitOMPTaskgroupDirective( |
5570 | const OMPTaskgroupDirective &S) { |
5571 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
5572 | if (CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(T: S)) { |
5573 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
5574 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
5575 | InsertPointTy AllocaIP(AllocaInsertPt->getParent(), |
5576 | AllocaInsertPt->getIterator()); |
5577 | |
5578 | auto BodyGenCB = [&, this](InsertPointTy AllocaIP, |
5579 | InsertPointTy CodeGenIP) { |
5580 | Builder.restoreIP(IP: CodeGenIP); |
5581 | EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt()); |
5582 | return llvm::Error::success(); |
5583 | }; |
5584 | CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; |
5585 | if (!CapturedStmtInfo) |
5586 | CapturedStmtInfo = &CapStmtInfo; |
5587 | llvm::OpenMPIRBuilder::InsertPointTy AfterIP = |
5588 | cantFail(ValOrErr: OMPBuilder.createTaskgroup(Loc: Builder, AllocaIP, BodyGenCB)); |
5589 | Builder.restoreIP(IP: AfterIP); |
5590 | return; |
5591 | } |
5592 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
5593 | Action.Enter(CGF); |
5594 | if (const Expr *E = S.getReductionRef()) { |
5595 | SmallVector<const Expr *, 4> LHSs; |
5596 | SmallVector<const Expr *, 4> RHSs; |
5597 | OMPTaskDataTy Data; |
5598 | for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { |
5599 | Data.ReductionVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
5600 | Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
5601 | Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
5602 | Data.ReductionOps.append(in_start: C->reduction_ops().begin(), |
5603 | in_end: C->reduction_ops().end()); |
5604 | LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end()); |
5605 | RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end()); |
5606 | } |
5607 | llvm::Value *ReductionDesc = |
5608 | CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, Loc: S.getBeginLoc(), |
5609 | LHSExprs: LHSs, RHSExprs: RHSs, Data); |
5610 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
5611 | CGF.EmitVarDecl(D: *VD); |
5612 | CGF.EmitStoreOfScalar(Value: ReductionDesc, Addr: CGF.GetAddrOfLocalVar(VD), |
5613 | /*Volatile=*/false, Ty: E->getType()); |
5614 | } |
5615 | CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt()); |
5616 | }; |
5617 | CGM.getOpenMPRuntime().emitTaskgroupRegion(CGF&: *this, TaskgroupOpGen: CodeGen, Loc: S.getBeginLoc()); |
5618 | } |
5619 | |
5620 | void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { |
5621 | llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>() |
5622 | ? llvm::AtomicOrdering::NotAtomic |
5623 | : llvm::AtomicOrdering::AcquireRelease; |
5624 | CGM.getOpenMPRuntime().emitFlush( |
5625 | CGF&: *this, |
5626 | Vars: [&S]() -> ArrayRef<const Expr *> { |
5627 | if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) |
5628 | return llvm::ArrayRef(FlushClause->varlist_begin(), |
5629 | FlushClause->varlist_end()); |
5630 | return {}; |
5631 | }(), |
5632 | Loc: S.getBeginLoc(), AO); |
5633 | } |
5634 | |
5635 | void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) { |
5636 | const auto *DO = S.getSingleClause<OMPDepobjClause>(); |
5637 | LValue DOLVal = EmitLValue(E: DO->getDepobj()); |
5638 | if (const auto *DC = S.getSingleClause<OMPDependClause>()) { |
5639 | // Build list and emit dependences |
5640 | OMPTaskDataTy Data; |
5641 | buildDependences(S, Data); |
5642 | for (auto &Dep : Data.Dependences) { |
5643 | Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause( |
5644 | CGF&: *this, Dependencies: Dep, Loc: DC->getBeginLoc()); |
5645 | EmitStoreOfScalar(value: DepAddr.emitRawPointer(CGF&: *this), lvalue: DOLVal); |
5646 | } |
5647 | return; |
5648 | } |
5649 | if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) { |
5650 | CGM.getOpenMPRuntime().emitDestroyClause(CGF&: *this, DepobjLVal: DOLVal, Loc: DC->getBeginLoc()); |
5651 | return; |
5652 | } |
5653 | if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) { |
5654 | CGM.getOpenMPRuntime().emitUpdateClause( |
5655 | CGF&: *this, DepobjLVal: DOLVal, NewDepKind: UC->getDependencyKind(), Loc: UC->getBeginLoc()); |
5656 | return; |
5657 | } |
5658 | } |
5659 | |
5660 | void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) { |
5661 | if (!OMPParentLoopDirectiveForScan) |
5662 | return; |
5663 | const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan; |
5664 | bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>(); |
5665 | SmallVector<const Expr *, 4> Shareds; |
5666 | SmallVector<const Expr *, 4> Privates; |
5667 | SmallVector<const Expr *, 4> LHSs; |
5668 | SmallVector<const Expr *, 4> RHSs; |
5669 | SmallVector<const Expr *, 4> ReductionOps; |
5670 | SmallVector<const Expr *, 4> CopyOps; |
5671 | SmallVector<const Expr *, 4> CopyArrayTemps; |
5672 | SmallVector<const Expr *, 4> CopyArrayElems; |
5673 | for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) { |
5674 | if (C->getModifier() != OMPC_REDUCTION_inscan) |
5675 | continue; |
5676 | Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
5677 | Privates.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
5678 | LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end()); |
5679 | RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end()); |
5680 | ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end()); |
5681 | CopyOps.append(in_start: C->copy_ops().begin(), in_end: C->copy_ops().end()); |
5682 | CopyArrayTemps.append(in_start: C->copy_array_temps().begin(), |
5683 | in_end: C->copy_array_temps().end()); |
5684 | CopyArrayElems.append(in_start: C->copy_array_elems().begin(), |
5685 | in_end: C->copy_array_elems().end()); |
5686 | } |
5687 | if (ParentDir.getDirectiveKind() == OMPD_simd || |
5688 | (getLangOpts().OpenMPSimd && |
5689 | isOpenMPSimdDirective(DKind: ParentDir.getDirectiveKind()))) { |
5690 | // For simd directive and simd-based directives in simd only mode, use the |
5691 | // following codegen: |
5692 | // int x = 0; |
5693 | // #pragma omp simd reduction(inscan, +: x) |
5694 | // for (..) { |
5695 | // <first part> |
5696 | // #pragma omp scan inclusive(x) |
5697 | // <second part> |
5698 | // } |
5699 | // is transformed to: |
5700 | // int x = 0; |
5701 | // for (..) { |
5702 | // int x_priv = 0; |
5703 | // <first part> |
5704 | // x = x_priv + x; |
5705 | // x_priv = x; |
5706 | // <second part> |
5707 | // } |
5708 | // and |
5709 | // int x = 0; |
5710 | // #pragma omp simd reduction(inscan, +: x) |
5711 | // for (..) { |
5712 | // <first part> |
5713 | // #pragma omp scan exclusive(x) |
5714 | // <second part> |
5715 | // } |
5716 | // to |
5717 | // int x = 0; |
5718 | // for (..) { |
5719 | // int x_priv = 0; |
5720 | // <second part> |
5721 | // int temp = x; |
5722 | // x = x_priv + x; |
5723 | // x_priv = temp; |
5724 | // <first part> |
5725 | // } |
5726 | llvm::BasicBlock *OMPScanReduce = createBasicBlock(name: "omp.inscan.reduce" ); |
5727 | EmitBranch(Block: IsInclusive |
5728 | ? OMPScanReduce |
5729 | : BreakContinueStack.back().ContinueBlock.getBlock()); |
5730 | EmitBlock(BB: OMPScanDispatch); |
5731 | { |
5732 | // New scope for correct construction/destruction of temp variables for |
5733 | // exclusive scan. |
5734 | LexicalScope Scope(*this, S.getSourceRange()); |
5735 | EmitBranch(Block: IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock); |
5736 | EmitBlock(BB: OMPScanReduce); |
5737 | if (!IsInclusive) { |
5738 | // Create temp var and copy LHS value to this temp value. |
5739 | // TMP = LHS; |
5740 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
5741 | const Expr *PrivateExpr = Privates[I]; |
5742 | const Expr *TempExpr = CopyArrayTemps[I]; |
5743 | EmitAutoVarDecl( |
5744 | D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: TempExpr)->getDecl())); |
5745 | LValue DestLVal = EmitLValue(E: TempExpr); |
5746 | LValue SrcLVal = EmitLValue(E: LHSs[I]); |
5747 | EmitOMPCopy(OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), |
5748 | SrcAddr: SrcLVal.getAddress(), |
5749 | DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()), |
5750 | SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), |
5751 | Copy: CopyOps[I]); |
5752 | } |
5753 | } |
5754 | CGM.getOpenMPRuntime().emitReduction( |
5755 | CGF&: *this, Loc: ParentDir.getEndLoc(), Privates, LHSExprs: LHSs, RHSExprs: RHSs, ReductionOps, |
5756 | Options: {/*WithNowait=*/true, /*SimpleReduction=*/true, |
5757 | /*IsPrivateVarReduction*/ {}, .ReductionKind: OMPD_simd}); |
5758 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
5759 | const Expr *PrivateExpr = Privates[I]; |
5760 | LValue DestLVal; |
5761 | LValue SrcLVal; |
5762 | if (IsInclusive) { |
5763 | DestLVal = EmitLValue(E: RHSs[I]); |
5764 | SrcLVal = EmitLValue(E: LHSs[I]); |
5765 | } else { |
5766 | const Expr *TempExpr = CopyArrayTemps[I]; |
5767 | DestLVal = EmitLValue(E: RHSs[I]); |
5768 | SrcLVal = EmitLValue(E: TempExpr); |
5769 | } |
5770 | EmitOMPCopy( |
5771 | OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(), |
5772 | DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()), |
5773 | SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]); |
5774 | } |
5775 | } |
5776 | EmitBranch(Block: IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock); |
5777 | OMPScanExitBlock = IsInclusive |
5778 | ? BreakContinueStack.back().ContinueBlock.getBlock() |
5779 | : OMPScanReduce; |
5780 | EmitBlock(BB: OMPAfterScanBlock); |
5781 | return; |
5782 | } |
5783 | if (!IsInclusive) { |
5784 | EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock()); |
5785 | EmitBlock(BB: OMPScanExitBlock); |
5786 | } |
5787 | if (OMPFirstScanLoop) { |
5788 | // Emit buffer[i] = red; at the end of the input phase. |
5789 | const auto *IVExpr = cast<OMPLoopDirective>(Val: ParentDir) |
5790 | .getIterationVariable() |
5791 | ->IgnoreParenImpCasts(); |
5792 | LValue IdxLVal = EmitLValue(E: IVExpr); |
5793 | llvm::Value *IdxVal = EmitLoadOfScalar(lvalue: IdxLVal, Loc: IVExpr->getExprLoc()); |
5794 | IdxVal = Builder.CreateIntCast(V: IdxVal, DestTy: SizeTy, /*isSigned=*/false); |
5795 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
5796 | const Expr *PrivateExpr = Privates[I]; |
5797 | const Expr *OrigExpr = Shareds[I]; |
5798 | const Expr *CopyArrayElem = CopyArrayElems[I]; |
5799 | OpaqueValueMapping IdxMapping( |
5800 | *this, |
5801 | cast<OpaqueValueExpr>( |
5802 | Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()), |
5803 | RValue::get(V: IdxVal)); |
5804 | LValue DestLVal = EmitLValue(E: CopyArrayElem); |
5805 | LValue SrcLVal = EmitLValue(E: OrigExpr); |
5806 | EmitOMPCopy( |
5807 | OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(), |
5808 | DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()), |
5809 | SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]); |
5810 | } |
5811 | } |
5812 | EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock()); |
5813 | if (IsInclusive) { |
5814 | EmitBlock(BB: OMPScanExitBlock); |
5815 | EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock()); |
5816 | } |
5817 | EmitBlock(BB: OMPScanDispatch); |
5818 | if (!OMPFirstScanLoop) { |
5819 | // Emit red = buffer[i]; at the entrance to the scan phase. |
5820 | const auto *IVExpr = cast<OMPLoopDirective>(Val: ParentDir) |
5821 | .getIterationVariable() |
5822 | ->IgnoreParenImpCasts(); |
5823 | LValue IdxLVal = EmitLValue(E: IVExpr); |
5824 | llvm::Value *IdxVal = EmitLoadOfScalar(lvalue: IdxLVal, Loc: IVExpr->getExprLoc()); |
5825 | IdxVal = Builder.CreateIntCast(V: IdxVal, DestTy: SizeTy, /*isSigned=*/false); |
5826 | llvm::BasicBlock *ExclusiveExitBB = nullptr; |
5827 | if (!IsInclusive) { |
5828 | llvm::BasicBlock *ContBB = createBasicBlock(name: "omp.exclusive.dec" ); |
5829 | ExclusiveExitBB = createBasicBlock(name: "omp.exclusive.copy.exit" ); |
5830 | llvm::Value *Cmp = Builder.CreateIsNull(Arg: IdxVal); |
5831 | Builder.CreateCondBr(Cond: Cmp, True: ExclusiveExitBB, False: ContBB); |
5832 | EmitBlock(BB: ContBB); |
5833 | // Use idx - 1 iteration for exclusive scan. |
5834 | IdxVal = Builder.CreateNUWSub(LHS: IdxVal, RHS: llvm::ConstantInt::get(Ty: SizeTy, V: 1)); |
5835 | } |
5836 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
5837 | const Expr *PrivateExpr = Privates[I]; |
5838 | const Expr *OrigExpr = Shareds[I]; |
5839 | const Expr *CopyArrayElem = CopyArrayElems[I]; |
5840 | OpaqueValueMapping IdxMapping( |
5841 | *this, |
5842 | cast<OpaqueValueExpr>( |
5843 | Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()), |
5844 | RValue::get(V: IdxVal)); |
5845 | LValue SrcLVal = EmitLValue(E: CopyArrayElem); |
5846 | LValue DestLVal = EmitLValue(E: OrigExpr); |
5847 | EmitOMPCopy( |
5848 | OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(), |
5849 | DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()), |
5850 | SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]); |
5851 | } |
5852 | if (!IsInclusive) { |
5853 | EmitBlock(BB: ExclusiveExitBB); |
5854 | } |
5855 | } |
5856 | EmitBranch(Block: (OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock |
5857 | : OMPAfterScanBlock); |
5858 | EmitBlock(BB: OMPAfterScanBlock); |
5859 | } |
5860 | |
5861 | void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, |
5862 | const CodeGenLoopTy &CodeGenLoop, |
5863 | Expr *IncExpr) { |
5864 | // Emit the loop iteration variable. |
5865 | const auto *IVExpr = cast<DeclRefExpr>(Val: S.getIterationVariable()); |
5866 | const auto *IVDecl = cast<VarDecl>(Val: IVExpr->getDecl()); |
5867 | EmitVarDecl(D: *IVDecl); |
5868 | |
5869 | // Emit the iterations count variable. |
5870 | // If it is not a variable, Sema decided to calculate iterations count on each |
5871 | // iteration (e.g., it is foldable into a constant). |
5872 | if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) { |
5873 | EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl())); |
5874 | // Emit calculation of the iterations count. |
5875 | EmitIgnoredExpr(E: S.getCalcLastIteration()); |
5876 | } |
5877 | |
5878 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
5879 | |
5880 | bool HasLastprivateClause = false; |
5881 | // Check pre-condition. |
5882 | { |
5883 | OMPLoopScope PreInitScope(*this, S); |
5884 | // Skip the entire loop if we don't meet the precondition. |
5885 | // If the condition constant folds and can be elided, avoid emitting the |
5886 | // whole loop. |
5887 | bool CondConstant; |
5888 | llvm::BasicBlock *ContBlock = nullptr; |
5889 | if (ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) { |
5890 | if (!CondConstant) |
5891 | return; |
5892 | } else { |
5893 | llvm::BasicBlock *ThenBlock = createBasicBlock(name: "omp.precond.then" ); |
5894 | ContBlock = createBasicBlock(name: "omp.precond.end" ); |
5895 | emitPreCond(CGF&: *this, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock, |
5896 | TrueCount: getProfileCount(S: &S)); |
5897 | EmitBlock(BB: ThenBlock); |
5898 | incrementProfileCounter(S: &S); |
5899 | } |
5900 | |
5901 | emitAlignedClause(CGF&: *this, D: S); |
5902 | // Emit 'then' code. |
5903 | { |
5904 | // Emit helper vars inits. |
5905 | |
5906 | LValue LB = EmitOMPHelperVar( |
5907 | CGF&: *this, Helper: cast<DeclRefExpr>( |
5908 | Val: (isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind()) |
5909 | ? S.getCombinedLowerBoundVariable() |
5910 | : S.getLowerBoundVariable()))); |
5911 | LValue UB = EmitOMPHelperVar( |
5912 | CGF&: *this, Helper: cast<DeclRefExpr>( |
5913 | Val: (isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind()) |
5914 | ? S.getCombinedUpperBoundVariable() |
5915 | : S.getUpperBoundVariable()))); |
5916 | LValue ST = |
5917 | EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable())); |
5918 | LValue IL = |
5919 | EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable())); |
5920 | |
5921 | OMPPrivateScope LoopScope(*this); |
5922 | if (EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope)) { |
5923 | // Emit implicit barrier to synchronize threads and avoid data races |
5924 | // on initialization of firstprivate variables and post-update of |
5925 | // lastprivate variables. |
5926 | CGM.getOpenMPRuntime().emitBarrierCall( |
5927 | CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false, |
5928 | /*ForceSimpleCall=*/true); |
5929 | } |
5930 | EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope); |
5931 | if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()) && |
5932 | !isOpenMPParallelDirective(DKind: S.getDirectiveKind()) && |
5933 | !isOpenMPTeamsDirective(DKind: S.getDirectiveKind())) |
5934 | EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope); |
5935 | HasLastprivateClause = EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope); |
5936 | EmitOMPPrivateLoopCounters(S, LoopScope); |
5937 | (void)LoopScope.Privatize(); |
5938 | if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind())) |
5939 | CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF&: *this, D: S); |
5940 | |
5941 | // Detect the distribute schedule kind and chunk. |
5942 | llvm::Value *Chunk = nullptr; |
5943 | OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; |
5944 | if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) { |
5945 | ScheduleKind = C->getDistScheduleKind(); |
5946 | if (const Expr *Ch = C->getChunkSize()) { |
5947 | Chunk = EmitScalarExpr(E: Ch); |
5948 | Chunk = EmitScalarConversion(Src: Chunk, SrcTy: Ch->getType(), |
5949 | DstTy: S.getIterationVariable()->getType(), |
5950 | Loc: S.getBeginLoc()); |
5951 | } |
5952 | } else { |
5953 | // Default behaviour for dist_schedule clause. |
5954 | CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk( |
5955 | CGF&: *this, S, ScheduleKind, Chunk); |
5956 | } |
5957 | const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType()); |
5958 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
5959 | |
5960 | // OpenMP [2.10.8, distribute Construct, Description] |
5961 | // If dist_schedule is specified, kind must be static. If specified, |
5962 | // iterations are divided into chunks of size chunk_size, chunks are |
5963 | // assigned to the teams of the league in a round-robin fashion in the |
5964 | // order of the team number. When no chunk_size is specified, the |
5965 | // iteration space is divided into chunks that are approximately equal |
5966 | // in size, and at most one chunk is distributed to each team of the |
5967 | // league. The size of the chunks is unspecified in this case. |
5968 | bool StaticChunked = |
5969 | RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && |
5970 | isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind()); |
5971 | if (RT.isStaticNonchunked(ScheduleKind, |
5972 | /* Chunked */ Chunk != nullptr) || |
5973 | StaticChunked) { |
5974 | CGOpenMPRuntime::StaticRTInput StaticInit( |
5975 | IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), |
5976 | LB.getAddress(), UB.getAddress(), ST.getAddress(), |
5977 | StaticChunked ? Chunk : nullptr); |
5978 | RT.emitDistributeStaticInit(CGF&: *this, Loc: S.getBeginLoc(), SchedKind: ScheduleKind, |
5979 | Values: StaticInit); |
5980 | JumpDest LoopExit = |
5981 | getJumpDestInCurrentScope(Target: createBasicBlock(name: "omp.loop.exit" )); |
5982 | // UB = min(UB, GlobalUB); |
5983 | EmitIgnoredExpr(E: isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind()) |
5984 | ? S.getCombinedEnsureUpperBound() |
5985 | : S.getEnsureUpperBound()); |
5986 | // IV = LB; |
5987 | EmitIgnoredExpr(E: isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind()) |
5988 | ? S.getCombinedInit() |
5989 | : S.getInit()); |
5990 | |
5991 | const Expr *Cond = |
5992 | isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind()) |
5993 | ? S.getCombinedCond() |
5994 | : S.getCond(); |
5995 | |
5996 | if (StaticChunked) |
5997 | Cond = S.getCombinedDistCond(); |
5998 | |
5999 | // For static unchunked schedules generate: |
6000 | // |
6001 | // 1. For distribute alone, codegen |
6002 | // while (idx <= UB) { |
6003 | // BODY; |
6004 | // ++idx; |
6005 | // } |
6006 | // |
6007 | // 2. When combined with 'for' (e.g. as in 'distribute parallel for') |
6008 | // while (idx <= UB) { |
6009 | // <CodeGen rest of pragma>(LB, UB); |
6010 | // idx += ST; |
6011 | // } |
6012 | // |
6013 | // For static chunk one schedule generate: |
6014 | // |
6015 | // while (IV <= GlobalUB) { |
6016 | // <CodeGen rest of pragma>(LB, UB); |
6017 | // LB += ST; |
6018 | // UB += ST; |
6019 | // UB = min(UB, GlobalUB); |
6020 | // IV = LB; |
6021 | // } |
6022 | // |
6023 | emitCommonSimdLoop( |
6024 | CGF&: *this, S, |
6025 | SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6026 | if (isOpenMPSimdDirective(DKind: S.getDirectiveKind())) |
6027 | CGF.EmitOMPSimdInit(D: S); |
6028 | }, |
6029 | BodyCodeGen: [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop, |
6030 | StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) { |
6031 | CGF.EmitOMPInnerLoop( |
6032 | S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: Cond, IncExpr, |
6033 | BodyGen: [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { |
6034 | CodeGenLoop(CGF, S, LoopExit); |
6035 | }, |
6036 | PostIncGen: [&S, StaticChunked](CodeGenFunction &CGF) { |
6037 | if (StaticChunked) { |
6038 | CGF.EmitIgnoredExpr(E: S.getCombinedNextLowerBound()); |
6039 | CGF.EmitIgnoredExpr(E: S.getCombinedNextUpperBound()); |
6040 | CGF.EmitIgnoredExpr(E: S.getCombinedEnsureUpperBound()); |
6041 | CGF.EmitIgnoredExpr(E: S.getCombinedInit()); |
6042 | } |
6043 | }); |
6044 | }); |
6045 | EmitBlock(BB: LoopExit.getBlock()); |
6046 | // Tell the runtime we are done. |
6047 | RT.emitForStaticFinish(CGF&: *this, Loc: S.getEndLoc(), DKind: OMPD_distribute); |
6048 | } else { |
6049 | // Emit the outer loop, which requests its work chunk [LB..UB] from |
6050 | // runtime and runs the inner loop to process it. |
6051 | const OMPLoopArguments LoopArguments = { |
6052 | LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), |
6053 | Chunk}; |
6054 | EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArgs: LoopArguments, |
6055 | CodeGenLoopContent: CodeGenLoop); |
6056 | } |
6057 | if (isOpenMPSimdDirective(DKind: S.getDirectiveKind())) { |
6058 | EmitOMPSimdFinal(D: S, CondGen: [IL, &S](CodeGenFunction &CGF) { |
6059 | return CGF.Builder.CreateIsNotNull( |
6060 | Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())); |
6061 | }); |
6062 | } |
6063 | if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()) && |
6064 | !isOpenMPParallelDirective(DKind: S.getDirectiveKind()) && |
6065 | !isOpenMPTeamsDirective(DKind: S.getDirectiveKind())) { |
6066 | EmitOMPReductionClauseFinal(D: S, ReductionKind: OMPD_simd); |
6067 | // Emit post-update of the reduction variables if IsLastIter != 0. |
6068 | emitPostUpdateForReductionClause( |
6069 | CGF&: *this, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) { |
6070 | return CGF.Builder.CreateIsNotNull( |
6071 | Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())); |
6072 | }); |
6073 | } |
6074 | // Emit final copy of the lastprivate variables if IsLastIter != 0. |
6075 | if (HasLastprivateClause) { |
6076 | EmitOMPLastprivateClauseFinal( |
6077 | D: S, /*NoFinals=*/false, |
6078 | IsLastIterCond: Builder.CreateIsNotNull(Arg: EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()))); |
6079 | } |
6080 | } |
6081 | |
6082 | // We're now done with the loop, so jump to the continuation block. |
6083 | if (ContBlock) { |
6084 | EmitBranch(Block: ContBlock); |
6085 | EmitBlock(BB: ContBlock, IsFinished: true); |
6086 | } |
6087 | } |
6088 | } |
6089 | |
6090 | // Pass OMPLoopDirective (instead of OMPDistributeDirective) to make this |
6091 | // function available for "loop bind(teams)", which maps to "distribute". |
6092 | static void emitOMPDistributeDirective(const OMPLoopDirective &S, |
6093 | CodeGenFunction &CGF, |
6094 | CodeGenModule &CGM) { |
6095 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6096 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc()); |
6097 | }; |
6098 | OMPLexicalScope Scope(CGF, S, OMPD_unknown); |
6099 | CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute, CodeGen); |
6100 | } |
6101 | |
6102 | void CodeGenFunction::EmitOMPDistributeDirective( |
6103 | const OMPDistributeDirective &S) { |
6104 | emitOMPDistributeDirective(S, CGF&: *this, CGM); |
6105 | } |
6106 | |
6107 | static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, |
6108 | const CapturedStmt *S, |
6109 | SourceLocation Loc) { |
6110 | CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); |
6111 | CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; |
6112 | CGF.CapturedStmtInfo = &CapStmtInfo; |
6113 | llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(S: *S, Loc); |
6114 | Fn->setDoesNotRecurse(); |
6115 | return Fn; |
6116 | } |
6117 | |
6118 | template <typename T> |
6119 | static void emitRestoreIP(CodeGenFunction &CGF, const T *C, |
6120 | llvm::OpenMPIRBuilder::InsertPointTy AllocaIP, |
6121 | llvm::OpenMPIRBuilder &OMPBuilder) { |
6122 | |
6123 | unsigned NumLoops = C->getNumLoops(); |
6124 | QualType Int64Ty = CGF.CGM.getContext().getIntTypeForBitwidth( |
6125 | /*DestWidth=*/64, /*Signed=*/1); |
6126 | llvm::SmallVector<llvm::Value *> StoreValues; |
6127 | for (unsigned I = 0; I < NumLoops; I++) { |
6128 | const Expr *CounterVal = C->getLoopData(I); |
6129 | assert(CounterVal); |
6130 | llvm::Value *StoreValue = CGF.EmitScalarConversion( |
6131 | Src: CGF.EmitScalarExpr(E: CounterVal), SrcTy: CounterVal->getType(), DstTy: Int64Ty, |
6132 | Loc: CounterVal->getExprLoc()); |
6133 | StoreValues.emplace_back(Args&: StoreValue); |
6134 | } |
6135 | OMPDoacrossKind<T> ODK; |
6136 | bool IsDependSource = ODK.isSource(C); |
6137 | CGF.Builder.restoreIP( |
6138 | IP: OMPBuilder.createOrderedDepend(Loc: CGF.Builder, AllocaIP, NumLoops, |
6139 | StoreValues, Name: ".cnt.addr" , IsDependSource)); |
6140 | } |
6141 | |
6142 | void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { |
6143 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
6144 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
6145 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
6146 | |
6147 | if (S.hasClausesOfKind<OMPDependClause>() || |
6148 | S.hasClausesOfKind<OMPDoacrossClause>()) { |
6149 | // The ordered directive with depend clause. |
6150 | assert(!S.hasAssociatedStmt() && "No associated statement must be in " |
6151 | "ordered depend|doacross construct." ); |
6152 | InsertPointTy AllocaIP(AllocaInsertPt->getParent(), |
6153 | AllocaInsertPt->getIterator()); |
6154 | for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) |
6155 | emitRestoreIP(CGF&: *this, C: DC, AllocaIP, OMPBuilder); |
6156 | for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>()) |
6157 | emitRestoreIP(CGF&: *this, C: DC, AllocaIP, OMPBuilder); |
6158 | } else { |
6159 | // The ordered directive with threads or simd clause, or without clause. |
6160 | // Without clause, it behaves as if the threads clause is specified. |
6161 | const auto *C = S.getSingleClause<OMPSIMDClause>(); |
6162 | |
6163 | auto FiniCB = [this](InsertPointTy IP) { |
6164 | OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP); |
6165 | return llvm::Error::success(); |
6166 | }; |
6167 | |
6168 | auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP, |
6169 | InsertPointTy CodeGenIP) { |
6170 | Builder.restoreIP(IP: CodeGenIP); |
6171 | |
6172 | const CapturedStmt *CS = S.getInnermostCapturedStmt(); |
6173 | if (C) { |
6174 | llvm::BasicBlock *FiniBB = splitBBWithSuffix( |
6175 | Builder, /*CreateBranch=*/false, Suffix: ".ordered.after" ); |
6176 | llvm::SmallVector<llvm::Value *, 16> CapturedVars; |
6177 | GenerateOpenMPCapturedVars(S: *CS, CapturedVars); |
6178 | llvm::Function *OutlinedFn = |
6179 | emitOutlinedOrderedFunction(CGM, S: CS, Loc: S.getBeginLoc()); |
6180 | assert(S.getBeginLoc().isValid() && |
6181 | "Outlined function call location must be valid." ); |
6182 | ApplyDebugLocation::CreateDefaultArtificial(CGF&: *this, TemporaryLocation: S.getBeginLoc()); |
6183 | OMPBuilderCBHelpers::EmitCaptureStmt(CGF&: *this, CodeGenIP, FiniBB&: *FiniBB, |
6184 | Fn: OutlinedFn, Args: CapturedVars); |
6185 | } else { |
6186 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
6187 | CGF&: *this, RegionBodyStmt: CS->getCapturedStmt(), AllocaIP, CodeGenIP, RegionName: "ordered" ); |
6188 | } |
6189 | return llvm::Error::success(); |
6190 | }; |
6191 | |
6192 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
6193 | llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail( |
6194 | ValOrErr: OMPBuilder.createOrderedThreadsSimd(Loc: Builder, BodyGenCB, FiniCB, IsThreads: !C)); |
6195 | Builder.restoreIP(IP: AfterIP); |
6196 | } |
6197 | return; |
6198 | } |
6199 | |
6200 | if (S.hasClausesOfKind<OMPDependClause>()) { |
6201 | assert(!S.hasAssociatedStmt() && |
6202 | "No associated statement must be in ordered depend construct." ); |
6203 | for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) |
6204 | CGM.getOpenMPRuntime().emitDoacrossOrdered(CGF&: *this, C: DC); |
6205 | return; |
6206 | } |
6207 | if (S.hasClausesOfKind<OMPDoacrossClause>()) { |
6208 | assert(!S.hasAssociatedStmt() && |
6209 | "No associated statement must be in ordered doacross construct." ); |
6210 | for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>()) |
6211 | CGM.getOpenMPRuntime().emitDoacrossOrdered(CGF&: *this, C: DC); |
6212 | return; |
6213 | } |
6214 | const auto *C = S.getSingleClause<OMPSIMDClause>(); |
6215 | auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, |
6216 | PrePostActionTy &Action) { |
6217 | const CapturedStmt *CS = S.getInnermostCapturedStmt(); |
6218 | if (C) { |
6219 | llvm::SmallVector<llvm::Value *, 16> CapturedVars; |
6220 | CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars); |
6221 | llvm::Function *OutlinedFn = |
6222 | emitOutlinedOrderedFunction(CGM, S: CS, Loc: S.getBeginLoc()); |
6223 | CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc: S.getBeginLoc(), |
6224 | OutlinedFn, Args: CapturedVars); |
6225 | } else { |
6226 | Action.Enter(CGF); |
6227 | CGF.EmitStmt(S: CS->getCapturedStmt()); |
6228 | } |
6229 | }; |
6230 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
6231 | CGM.getOpenMPRuntime().emitOrderedRegion(CGF&: *this, OrderedOpGen: CodeGen, Loc: S.getBeginLoc(), IsThreads: !C); |
6232 | } |
6233 | |
6234 | static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, |
6235 | QualType SrcType, QualType DestType, |
6236 | SourceLocation Loc) { |
6237 | assert(CGF.hasScalarEvaluationKind(DestType) && |
6238 | "DestType must have scalar evaluation kind." ); |
6239 | assert(!Val.isAggregate() && "Must be a scalar or complex." ); |
6240 | return Val.isScalar() ? CGF.EmitScalarConversion(Src: Val.getScalarVal(), SrcTy: SrcType, |
6241 | DstTy: DestType, Loc) |
6242 | : CGF.EmitComplexToScalarConversion( |
6243 | Src: Val.getComplexVal(), SrcTy: SrcType, DstTy: DestType, Loc); |
6244 | } |
6245 | |
6246 | static CodeGenFunction::ComplexPairTy |
6247 | convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, |
6248 | QualType DestType, SourceLocation Loc) { |
6249 | assert(CGF.getEvaluationKind(DestType) == TEK_Complex && |
6250 | "DestType must have complex evaluation kind." ); |
6251 | CodeGenFunction::ComplexPairTy ComplexVal; |
6252 | if (Val.isScalar()) { |
6253 | // Convert the input element to the element type of the complex. |
6254 | QualType DestElementType = |
6255 | DestType->castAs<ComplexType>()->getElementType(); |
6256 | llvm::Value *ScalarVal = CGF.EmitScalarConversion( |
6257 | Src: Val.getScalarVal(), SrcTy: SrcType, DstTy: DestElementType, Loc); |
6258 | ComplexVal = CodeGenFunction::ComplexPairTy( |
6259 | ScalarVal, llvm::Constant::getNullValue(Ty: ScalarVal->getType())); |
6260 | } else { |
6261 | assert(Val.isComplex() && "Must be a scalar or complex." ); |
6262 | QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); |
6263 | QualType DestElementType = |
6264 | DestType->castAs<ComplexType>()->getElementType(); |
6265 | ComplexVal.first = CGF.EmitScalarConversion( |
6266 | Src: Val.getComplexVal().first, SrcTy: SrcElementType, DstTy: DestElementType, Loc); |
6267 | ComplexVal.second = CGF.EmitScalarConversion( |
6268 | Src: Val.getComplexVal().second, SrcTy: SrcElementType, DstTy: DestElementType, Loc); |
6269 | } |
6270 | return ComplexVal; |
6271 | } |
6272 | |
6273 | static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO, |
6274 | LValue LVal, RValue RVal) { |
6275 | if (LVal.isGlobalReg()) |
6276 | CGF.EmitStoreThroughGlobalRegLValue(Src: RVal, Dst: LVal); |
6277 | else |
6278 | CGF.EmitAtomicStore(rvalue: RVal, lvalue: LVal, AO, IsVolatile: LVal.isVolatile(), /*isInit=*/false); |
6279 | } |
6280 | |
6281 | static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF, |
6282 | llvm::AtomicOrdering AO, LValue LVal, |
6283 | SourceLocation Loc) { |
6284 | if (LVal.isGlobalReg()) |
6285 | return CGF.EmitLoadOfLValue(V: LVal, Loc); |
6286 | return CGF.EmitAtomicLoad( |
6287 | lvalue: LVal, loc: Loc, AO: llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: AO), |
6288 | IsVolatile: LVal.isVolatile()); |
6289 | } |
6290 | |
6291 | void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, |
6292 | QualType RValTy, SourceLocation Loc) { |
6293 | switch (getEvaluationKind(T: LVal.getType())) { |
6294 | case TEK_Scalar: |
6295 | EmitStoreThroughLValue(Src: RValue::get(V: convertToScalarValue( |
6296 | CGF&: *this, Val: RVal, SrcType: RValTy, DestType: LVal.getType(), Loc)), |
6297 | Dst: LVal); |
6298 | break; |
6299 | case TEK_Complex: |
6300 | EmitStoreOfComplex( |
6301 | V: convertToComplexValue(CGF&: *this, Val: RVal, SrcType: RValTy, DestType: LVal.getType(), Loc), dest: LVal, |
6302 | /*isInit=*/false); |
6303 | break; |
6304 | case TEK_Aggregate: |
6305 | llvm_unreachable("Must be a scalar or complex." ); |
6306 | } |
6307 | } |
6308 | |
6309 | static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO, |
6310 | const Expr *X, const Expr *V, |
6311 | SourceLocation Loc) { |
6312 | // v = x; |
6313 | assert(V->isLValue() && "V of 'omp atomic read' is not lvalue" ); |
6314 | assert(X->isLValue() && "X of 'omp atomic read' is not lvalue" ); |
6315 | LValue XLValue = CGF.EmitLValue(E: X); |
6316 | LValue VLValue = CGF.EmitLValue(E: V); |
6317 | RValue Res = emitSimpleAtomicLoad(CGF, AO, LVal: XLValue, Loc); |
6318 | // OpenMP, 2.17.7, atomic Construct |
6319 | // If the read or capture clause is specified and the acquire, acq_rel, or |
6320 | // seq_cst clause is specified then the strong flush on exit from the atomic |
6321 | // operation is also an acquire flush. |
6322 | switch (AO) { |
6323 | case llvm::AtomicOrdering::Acquire: |
6324 | case llvm::AtomicOrdering::AcquireRelease: |
6325 | case llvm::AtomicOrdering::SequentiallyConsistent: |
6326 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc, |
6327 | AO: llvm::AtomicOrdering::Acquire); |
6328 | break; |
6329 | case llvm::AtomicOrdering::Monotonic: |
6330 | case llvm::AtomicOrdering::Release: |
6331 | break; |
6332 | case llvm::AtomicOrdering::NotAtomic: |
6333 | case llvm::AtomicOrdering::Unordered: |
6334 | llvm_unreachable("Unexpected ordering." ); |
6335 | } |
6336 | CGF.emitOMPSimpleStore(LVal: VLValue, RVal: Res, RValTy: X->getType().getNonReferenceType(), Loc); |
6337 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: V); |
6338 | } |
6339 | |
6340 | static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, |
6341 | llvm::AtomicOrdering AO, const Expr *X, |
6342 | const Expr *E, SourceLocation Loc) { |
6343 | // x = expr; |
6344 | assert(X->isLValue() && "X of 'omp atomic write' is not lvalue" ); |
6345 | emitSimpleAtomicStore(CGF, AO, LVal: CGF.EmitLValue(E: X), RVal: CGF.EmitAnyExpr(E)); |
6346 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X); |
6347 | // OpenMP, 2.17.7, atomic Construct |
6348 | // If the write, update, or capture clause is specified and the release, |
6349 | // acq_rel, or seq_cst clause is specified then the strong flush on entry to |
6350 | // the atomic operation is also a release flush. |
6351 | switch (AO) { |
6352 | case llvm::AtomicOrdering::Release: |
6353 | case llvm::AtomicOrdering::AcquireRelease: |
6354 | case llvm::AtomicOrdering::SequentiallyConsistent: |
6355 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc, |
6356 | AO: llvm::AtomicOrdering::Release); |
6357 | break; |
6358 | case llvm::AtomicOrdering::Acquire: |
6359 | case llvm::AtomicOrdering::Monotonic: |
6360 | break; |
6361 | case llvm::AtomicOrdering::NotAtomic: |
6362 | case llvm::AtomicOrdering::Unordered: |
6363 | llvm_unreachable("Unexpected ordering." ); |
6364 | } |
6365 | } |
6366 | |
6367 | static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, |
6368 | RValue Update, |
6369 | BinaryOperatorKind BO, |
6370 | llvm::AtomicOrdering AO, |
6371 | bool IsXLHSInRHSPart) { |
6372 | ASTContext &Context = CGF.getContext(); |
6373 | // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' |
6374 | // expression is simple and atomic is allowed for the given type for the |
6375 | // target platform. |
6376 | if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() || |
6377 | (!isa<llvm::ConstantInt>(Val: Update.getScalarVal()) && |
6378 | (Update.getScalarVal()->getType() != X.getAddress().getElementType())) || |
6379 | !Context.getTargetInfo().hasBuiltinAtomic( |
6380 | AtomicSizeInBits: Context.getTypeSize(T: X.getType()), AlignmentInBits: Context.toBits(CharSize: X.getAlignment()))) |
6381 | return std::make_pair(x: false, y: RValue::get(V: nullptr)); |
6382 | |
6383 | auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) { |
6384 | if (T->isIntegerTy()) |
6385 | return true; |
6386 | |
6387 | if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub)) |
6388 | return llvm::isPowerOf2_64(Value: CGF.CGM.getDataLayout().getTypeStoreSize(Ty: T)); |
6389 | |
6390 | return false; |
6391 | }; |
6392 | |
6393 | if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) || |
6394 | !CheckAtomicSupport(X.getAddress().getElementType(), BO)) |
6395 | return std::make_pair(x: false, y: RValue::get(V: nullptr)); |
6396 | |
6397 | bool IsInteger = X.getAddress().getElementType()->isIntegerTy(); |
6398 | llvm::AtomicRMWInst::BinOp RMWOp; |
6399 | switch (BO) { |
6400 | case BO_Add: |
6401 | RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd; |
6402 | break; |
6403 | case BO_Sub: |
6404 | if (!IsXLHSInRHSPart) |
6405 | return std::make_pair(x: false, y: RValue::get(V: nullptr)); |
6406 | RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub; |
6407 | break; |
6408 | case BO_And: |
6409 | RMWOp = llvm::AtomicRMWInst::And; |
6410 | break; |
6411 | case BO_Or: |
6412 | RMWOp = llvm::AtomicRMWInst::Or; |
6413 | break; |
6414 | case BO_Xor: |
6415 | RMWOp = llvm::AtomicRMWInst::Xor; |
6416 | break; |
6417 | case BO_LT: |
6418 | if (IsInteger) |
6419 | RMWOp = X.getType()->hasSignedIntegerRepresentation() |
6420 | ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min |
6421 | : llvm::AtomicRMWInst::Max) |
6422 | : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin |
6423 | : llvm::AtomicRMWInst::UMax); |
6424 | else |
6425 | RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMin |
6426 | : llvm::AtomicRMWInst::FMax; |
6427 | break; |
6428 | case BO_GT: |
6429 | if (IsInteger) |
6430 | RMWOp = X.getType()->hasSignedIntegerRepresentation() |
6431 | ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max |
6432 | : llvm::AtomicRMWInst::Min) |
6433 | : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax |
6434 | : llvm::AtomicRMWInst::UMin); |
6435 | else |
6436 | RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMax |
6437 | : llvm::AtomicRMWInst::FMin; |
6438 | break; |
6439 | case BO_Assign: |
6440 | RMWOp = llvm::AtomicRMWInst::Xchg; |
6441 | break; |
6442 | case BO_Mul: |
6443 | case BO_Div: |
6444 | case BO_Rem: |
6445 | case BO_Shl: |
6446 | case BO_Shr: |
6447 | case BO_LAnd: |
6448 | case BO_LOr: |
6449 | return std::make_pair(x: false, y: RValue::get(V: nullptr)); |
6450 | case BO_PtrMemD: |
6451 | case BO_PtrMemI: |
6452 | case BO_LE: |
6453 | case BO_GE: |
6454 | case BO_EQ: |
6455 | case BO_NE: |
6456 | case BO_Cmp: |
6457 | case BO_AddAssign: |
6458 | case BO_SubAssign: |
6459 | case BO_AndAssign: |
6460 | case BO_OrAssign: |
6461 | case BO_XorAssign: |
6462 | case BO_MulAssign: |
6463 | case BO_DivAssign: |
6464 | case BO_RemAssign: |
6465 | case BO_ShlAssign: |
6466 | case BO_ShrAssign: |
6467 | case BO_Comma: |
6468 | llvm_unreachable("Unsupported atomic update operation" ); |
6469 | } |
6470 | llvm::Value *UpdateVal = Update.getScalarVal(); |
6471 | if (auto *IC = dyn_cast<llvm::ConstantInt>(Val: UpdateVal)) { |
6472 | if (IsInteger) |
6473 | UpdateVal = CGF.Builder.CreateIntCast( |
6474 | V: IC, DestTy: X.getAddress().getElementType(), |
6475 | isSigned: X.getType()->hasSignedIntegerRepresentation()); |
6476 | else |
6477 | UpdateVal = CGF.Builder.CreateCast(Op: llvm::Instruction::CastOps::UIToFP, V: IC, |
6478 | DestTy: X.getAddress().getElementType()); |
6479 | } |
6480 | llvm::AtomicRMWInst *Res = |
6481 | CGF.emitAtomicRMWInst(Op: RMWOp, Addr: X.getAddress(), Val: UpdateVal, Order: AO); |
6482 | return std::make_pair(x: true, y: RValue::get(V: Res)); |
6483 | } |
6484 | |
6485 | std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( |
6486 | LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, |
6487 | llvm::AtomicOrdering AO, SourceLocation Loc, |
6488 | const llvm::function_ref<RValue(RValue)> CommonGen) { |
6489 | // Update expressions are allowed to have the following forms: |
6490 | // x binop= expr; -> xrval + expr; |
6491 | // x++, ++x -> xrval + 1; |
6492 | // x--, --x -> xrval - 1; |
6493 | // x = x binop expr; -> xrval binop expr |
6494 | // x = expr Op x; - > expr binop xrval; |
6495 | auto Res = emitOMPAtomicRMW(CGF&: *this, X, Update: E, BO, AO, IsXLHSInRHSPart); |
6496 | if (!Res.first) { |
6497 | if (X.isGlobalReg()) { |
6498 | // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop |
6499 | // 'xrval'. |
6500 | EmitStoreThroughLValue(Src: CommonGen(EmitLoadOfLValue(V: X, Loc)), Dst: X); |
6501 | } else { |
6502 | // Perform compare-and-swap procedure. |
6503 | EmitAtomicUpdate(LVal: X, AO, UpdateOp: CommonGen, IsVolatile: X.getType().isVolatileQualified()); |
6504 | } |
6505 | } |
6506 | return Res; |
6507 | } |
6508 | |
6509 | static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, |
6510 | llvm::AtomicOrdering AO, const Expr *X, |
6511 | const Expr *E, const Expr *UE, |
6512 | bool IsXLHSInRHSPart, SourceLocation Loc) { |
6513 | assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && |
6514 | "Update expr in 'atomic update' must be a binary operator." ); |
6515 | const auto *BOUE = cast<BinaryOperator>(Val: UE->IgnoreImpCasts()); |
6516 | // Update expressions are allowed to have the following forms: |
6517 | // x binop= expr; -> xrval + expr; |
6518 | // x++, ++x -> xrval + 1; |
6519 | // x--, --x -> xrval - 1; |
6520 | // x = x binop expr; -> xrval binop expr |
6521 | // x = expr Op x; - > expr binop xrval; |
6522 | assert(X->isLValue() && "X of 'omp atomic update' is not lvalue" ); |
6523 | LValue XLValue = CGF.EmitLValue(E: X); |
6524 | RValue ExprRValue = CGF.EmitAnyExpr(E); |
6525 | const auto *LHS = cast<OpaqueValueExpr>(Val: BOUE->getLHS()->IgnoreImpCasts()); |
6526 | const auto *RHS = cast<OpaqueValueExpr>(Val: BOUE->getRHS()->IgnoreImpCasts()); |
6527 | const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; |
6528 | const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; |
6529 | auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) { |
6530 | CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); |
6531 | CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); |
6532 | return CGF.EmitAnyExpr(E: UE); |
6533 | }; |
6534 | (void)CGF.EmitOMPAtomicSimpleUpdateExpr( |
6535 | X: XLValue, E: ExprRValue, BO: BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, CommonGen: Gen); |
6536 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X); |
6537 | // OpenMP, 2.17.7, atomic Construct |
6538 | // If the write, update, or capture clause is specified and the release, |
6539 | // acq_rel, or seq_cst clause is specified then the strong flush on entry to |
6540 | // the atomic operation is also a release flush. |
6541 | switch (AO) { |
6542 | case llvm::AtomicOrdering::Release: |
6543 | case llvm::AtomicOrdering::AcquireRelease: |
6544 | case llvm::AtomicOrdering::SequentiallyConsistent: |
6545 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc, |
6546 | AO: llvm::AtomicOrdering::Release); |
6547 | break; |
6548 | case llvm::AtomicOrdering::Acquire: |
6549 | case llvm::AtomicOrdering::Monotonic: |
6550 | break; |
6551 | case llvm::AtomicOrdering::NotAtomic: |
6552 | case llvm::AtomicOrdering::Unordered: |
6553 | llvm_unreachable("Unexpected ordering." ); |
6554 | } |
6555 | } |
6556 | |
6557 | static RValue convertToType(CodeGenFunction &CGF, RValue Value, |
6558 | QualType SourceType, QualType ResType, |
6559 | SourceLocation Loc) { |
6560 | switch (CGF.getEvaluationKind(T: ResType)) { |
6561 | case TEK_Scalar: |
6562 | return RValue::get( |
6563 | V: convertToScalarValue(CGF, Val: Value, SrcType: SourceType, DestType: ResType, Loc)); |
6564 | case TEK_Complex: { |
6565 | auto Res = convertToComplexValue(CGF, Val: Value, SrcType: SourceType, DestType: ResType, Loc); |
6566 | return RValue::getComplex(V1: Res.first, V2: Res.second); |
6567 | } |
6568 | case TEK_Aggregate: |
6569 | break; |
6570 | } |
6571 | llvm_unreachable("Must be a scalar or complex." ); |
6572 | } |
6573 | |
6574 | static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, |
6575 | llvm::AtomicOrdering AO, |
6576 | bool IsPostfixUpdate, const Expr *V, |
6577 | const Expr *X, const Expr *E, |
6578 | const Expr *UE, bool IsXLHSInRHSPart, |
6579 | SourceLocation Loc) { |
6580 | assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue" ); |
6581 | assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue" ); |
6582 | RValue NewVVal; |
6583 | LValue VLValue = CGF.EmitLValue(E: V); |
6584 | LValue XLValue = CGF.EmitLValue(E: X); |
6585 | RValue ExprRValue = CGF.EmitAnyExpr(E); |
6586 | QualType NewVValType; |
6587 | if (UE) { |
6588 | // 'x' is updated with some additional value. |
6589 | assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && |
6590 | "Update expr in 'atomic capture' must be a binary operator." ); |
6591 | const auto *BOUE = cast<BinaryOperator>(Val: UE->IgnoreImpCasts()); |
6592 | // Update expressions are allowed to have the following forms: |
6593 | // x binop= expr; -> xrval + expr; |
6594 | // x++, ++x -> xrval + 1; |
6595 | // x--, --x -> xrval - 1; |
6596 | // x = x binop expr; -> xrval binop expr |
6597 | // x = expr Op x; - > expr binop xrval; |
6598 | const auto *LHS = cast<OpaqueValueExpr>(Val: BOUE->getLHS()->IgnoreImpCasts()); |
6599 | const auto *RHS = cast<OpaqueValueExpr>(Val: BOUE->getRHS()->IgnoreImpCasts()); |
6600 | const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; |
6601 | NewVValType = XRValExpr->getType(); |
6602 | const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; |
6603 | auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, |
6604 | IsPostfixUpdate](RValue XRValue) { |
6605 | CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); |
6606 | CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); |
6607 | RValue Res = CGF.EmitAnyExpr(E: UE); |
6608 | NewVVal = IsPostfixUpdate ? XRValue : Res; |
6609 | return Res; |
6610 | }; |
6611 | auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( |
6612 | X: XLValue, E: ExprRValue, BO: BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, CommonGen: Gen); |
6613 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X); |
6614 | if (Res.first) { |
6615 | // 'atomicrmw' instruction was generated. |
6616 | if (IsPostfixUpdate) { |
6617 | // Use old value from 'atomicrmw'. |
6618 | NewVVal = Res.second; |
6619 | } else { |
6620 | // 'atomicrmw' does not provide new value, so evaluate it using old |
6621 | // value of 'x'. |
6622 | CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); |
6623 | CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); |
6624 | NewVVal = CGF.EmitAnyExpr(E: UE); |
6625 | } |
6626 | } |
6627 | } else { |
6628 | // 'x' is simply rewritten with some 'expr'. |
6629 | NewVValType = X->getType().getNonReferenceType(); |
6630 | ExprRValue = convertToType(CGF, Value: ExprRValue, SourceType: E->getType(), |
6631 | ResType: X->getType().getNonReferenceType(), Loc); |
6632 | auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) { |
6633 | NewVVal = XRValue; |
6634 | return ExprRValue; |
6635 | }; |
6636 | // Try to perform atomicrmw xchg, otherwise simple exchange. |
6637 | auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( |
6638 | X: XLValue, E: ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, |
6639 | Loc, CommonGen: Gen); |
6640 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X); |
6641 | if (Res.first) { |
6642 | // 'atomicrmw' instruction was generated. |
6643 | NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; |
6644 | } |
6645 | } |
6646 | // Emit post-update store to 'v' of old/new 'x' value. |
6647 | CGF.emitOMPSimpleStore(LVal: VLValue, RVal: NewVVal, RValTy: NewVValType, Loc); |
6648 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: V); |
6649 | // OpenMP 5.1 removes the required flush for capture clause. |
6650 | if (CGF.CGM.getLangOpts().OpenMP < 51) { |
6651 | // OpenMP, 2.17.7, atomic Construct |
6652 | // If the write, update, or capture clause is specified and the release, |
6653 | // acq_rel, or seq_cst clause is specified then the strong flush on entry to |
6654 | // the atomic operation is also a release flush. |
6655 | // If the read or capture clause is specified and the acquire, acq_rel, or |
6656 | // seq_cst clause is specified then the strong flush on exit from the atomic |
6657 | // operation is also an acquire flush. |
6658 | switch (AO) { |
6659 | case llvm::AtomicOrdering::Release: |
6660 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc, |
6661 | AO: llvm::AtomicOrdering::Release); |
6662 | break; |
6663 | case llvm::AtomicOrdering::Acquire: |
6664 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc, |
6665 | AO: llvm::AtomicOrdering::Acquire); |
6666 | break; |
6667 | case llvm::AtomicOrdering::AcquireRelease: |
6668 | case llvm::AtomicOrdering::SequentiallyConsistent: |
6669 | CGF.CGM.getOpenMPRuntime().emitFlush( |
6670 | CGF, Vars: {}, Loc, AO: llvm::AtomicOrdering::AcquireRelease); |
6671 | break; |
6672 | case llvm::AtomicOrdering::Monotonic: |
6673 | break; |
6674 | case llvm::AtomicOrdering::NotAtomic: |
6675 | case llvm::AtomicOrdering::Unordered: |
6676 | llvm_unreachable("Unexpected ordering." ); |
6677 | } |
6678 | } |
6679 | } |
6680 | |
6681 | static void emitOMPAtomicCompareExpr( |
6682 | CodeGenFunction &CGF, llvm::AtomicOrdering AO, llvm::AtomicOrdering FailAO, |
6683 | const Expr *X, const Expr *V, const Expr *R, const Expr *E, const Expr *D, |
6684 | const Expr *CE, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly, |
6685 | SourceLocation Loc) { |
6686 | llvm::OpenMPIRBuilder &OMPBuilder = |
6687 | CGF.CGM.getOpenMPRuntime().getOMPBuilder(); |
6688 | |
6689 | OMPAtomicCompareOp Op; |
6690 | assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator" ); |
6691 | switch (cast<BinaryOperator>(Val: CE)->getOpcode()) { |
6692 | case BO_EQ: |
6693 | Op = OMPAtomicCompareOp::EQ; |
6694 | break; |
6695 | case BO_LT: |
6696 | Op = OMPAtomicCompareOp::MIN; |
6697 | break; |
6698 | case BO_GT: |
6699 | Op = OMPAtomicCompareOp::MAX; |
6700 | break; |
6701 | default: |
6702 | llvm_unreachable("unsupported atomic compare binary operator" ); |
6703 | } |
6704 | |
6705 | LValue XLVal = CGF.EmitLValue(E: X); |
6706 | Address XAddr = XLVal.getAddress(); |
6707 | |
6708 | auto EmitRValueWithCastIfNeeded = [&CGF, Loc](const Expr *X, const Expr *E) { |
6709 | if (X->getType() == E->getType()) |
6710 | return CGF.EmitScalarExpr(E); |
6711 | const Expr *NewE = E->IgnoreImplicitAsWritten(); |
6712 | llvm::Value *V = CGF.EmitScalarExpr(E: NewE); |
6713 | if (NewE->getType() == X->getType()) |
6714 | return V; |
6715 | return CGF.EmitScalarConversion(Src: V, SrcTy: NewE->getType(), DstTy: X->getType(), Loc); |
6716 | }; |
6717 | |
6718 | llvm::Value *EVal = EmitRValueWithCastIfNeeded(X, E); |
6719 | llvm::Value *DVal = D ? EmitRValueWithCastIfNeeded(X, D) : nullptr; |
6720 | if (auto *CI = dyn_cast<llvm::ConstantInt>(Val: EVal)) |
6721 | EVal = CGF.Builder.CreateIntCast( |
6722 | V: CI, DestTy: XLVal.getAddress().getElementType(), |
6723 | isSigned: E->getType()->hasSignedIntegerRepresentation()); |
6724 | if (DVal) |
6725 | if (auto *CI = dyn_cast<llvm::ConstantInt>(Val: DVal)) |
6726 | DVal = CGF.Builder.CreateIntCast( |
6727 | V: CI, DestTy: XLVal.getAddress().getElementType(), |
6728 | isSigned: D->getType()->hasSignedIntegerRepresentation()); |
6729 | |
6730 | llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{ |
6731 | .Var: XAddr.emitRawPointer(CGF), .ElemTy: XAddr.getElementType(), |
6732 | .IsSigned: X->getType()->hasSignedIntegerRepresentation(), |
6733 | .IsVolatile: X->getType().isVolatileQualified()}; |
6734 | llvm::OpenMPIRBuilder::AtomicOpValue VOpVal, ROpVal; |
6735 | if (V) { |
6736 | LValue LV = CGF.EmitLValue(E: V); |
6737 | Address Addr = LV.getAddress(); |
6738 | VOpVal = {.Var: Addr.emitRawPointer(CGF), .ElemTy: Addr.getElementType(), |
6739 | .IsSigned: V->getType()->hasSignedIntegerRepresentation(), |
6740 | .IsVolatile: V->getType().isVolatileQualified()}; |
6741 | } |
6742 | if (R) { |
6743 | LValue LV = CGF.EmitLValue(E: R); |
6744 | Address Addr = LV.getAddress(); |
6745 | ROpVal = {.Var: Addr.emitRawPointer(CGF), .ElemTy: Addr.getElementType(), |
6746 | .IsSigned: R->getType()->hasSignedIntegerRepresentation(), |
6747 | .IsVolatile: R->getType().isVolatileQualified()}; |
6748 | } |
6749 | |
6750 | if (FailAO == llvm::AtomicOrdering::NotAtomic) { |
6751 | // fail clause was not mentioned on the |
6752 | // "#pragma omp atomic compare" construct. |
6753 | CGF.Builder.restoreIP(IP: OMPBuilder.createAtomicCompare( |
6754 | Loc: CGF.Builder, X&: XOpVal, V&: VOpVal, R&: ROpVal, E: EVal, D: DVal, AO, Op, IsXBinopExpr, |
6755 | IsPostfixUpdate, IsFailOnly)); |
6756 | } else |
6757 | CGF.Builder.restoreIP(IP: OMPBuilder.createAtomicCompare( |
6758 | Loc: CGF.Builder, X&: XOpVal, V&: VOpVal, R&: ROpVal, E: EVal, D: DVal, AO, Op, IsXBinopExpr, |
6759 | IsPostfixUpdate, IsFailOnly, Failure: FailAO)); |
6760 | } |
6761 | |
6762 | static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, |
6763 | llvm::AtomicOrdering AO, |
6764 | llvm::AtomicOrdering FailAO, bool IsPostfixUpdate, |
6765 | const Expr *X, const Expr *V, const Expr *R, |
6766 | const Expr *E, const Expr *UE, const Expr *D, |
6767 | const Expr *CE, bool IsXLHSInRHSPart, |
6768 | bool IsFailOnly, SourceLocation Loc) { |
6769 | switch (Kind) { |
6770 | case OMPC_read: |
6771 | emitOMPAtomicReadExpr(CGF, AO, X, V, Loc); |
6772 | break; |
6773 | case OMPC_write: |
6774 | emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc); |
6775 | break; |
6776 | case OMPC_unknown: |
6777 | case OMPC_update: |
6778 | emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc); |
6779 | break; |
6780 | case OMPC_capture: |
6781 | emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE, |
6782 | IsXLHSInRHSPart, Loc); |
6783 | break; |
6784 | case OMPC_compare: { |
6785 | emitOMPAtomicCompareExpr(CGF, AO, FailAO, X, V, R, E, D, CE, |
6786 | IsXBinopExpr: IsXLHSInRHSPart, IsPostfixUpdate, IsFailOnly, Loc); |
6787 | break; |
6788 | } |
6789 | default: |
6790 | llvm_unreachable("Clause is not allowed in 'omp atomic'." ); |
6791 | } |
6792 | } |
6793 | |
6794 | void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { |
6795 | llvm::AtomicOrdering AO = CGM.getOpenMPRuntime().getDefaultMemoryOrdering(); |
6796 | // Fail Memory Clause Ordering. |
6797 | llvm::AtomicOrdering FailAO = llvm::AtomicOrdering::NotAtomic; |
6798 | bool MemOrderingSpecified = false; |
6799 | if (S.getSingleClause<OMPSeqCstClause>()) { |
6800 | AO = llvm::AtomicOrdering::SequentiallyConsistent; |
6801 | MemOrderingSpecified = true; |
6802 | } else if (S.getSingleClause<OMPAcqRelClause>()) { |
6803 | AO = llvm::AtomicOrdering::AcquireRelease; |
6804 | MemOrderingSpecified = true; |
6805 | } else if (S.getSingleClause<OMPAcquireClause>()) { |
6806 | AO = llvm::AtomicOrdering::Acquire; |
6807 | MemOrderingSpecified = true; |
6808 | } else if (S.getSingleClause<OMPReleaseClause>()) { |
6809 | AO = llvm::AtomicOrdering::Release; |
6810 | MemOrderingSpecified = true; |
6811 | } else if (S.getSingleClause<OMPRelaxedClause>()) { |
6812 | AO = llvm::AtomicOrdering::Monotonic; |
6813 | MemOrderingSpecified = true; |
6814 | } |
6815 | llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered; |
6816 | OpenMPClauseKind Kind = OMPC_unknown; |
6817 | for (const OMPClause *C : S.clauses()) { |
6818 | // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause, |
6819 | // if it is first). |
6820 | OpenMPClauseKind K = C->getClauseKind(); |
6821 | // TBD |
6822 | if (K == OMPC_weak) |
6823 | return; |
6824 | if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire || |
6825 | K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint) |
6826 | continue; |
6827 | Kind = K; |
6828 | KindsEncountered.insert(V: K); |
6829 | } |
6830 | // We just need to correct Kind here. No need to set a bool saying it is |
6831 | // actually compare capture because we can tell from whether V and R are |
6832 | // nullptr. |
6833 | if (KindsEncountered.contains(V: OMPC_compare) && |
6834 | KindsEncountered.contains(V: OMPC_capture)) |
6835 | Kind = OMPC_compare; |
6836 | if (!MemOrderingSpecified) { |
6837 | llvm::AtomicOrdering DefaultOrder = |
6838 | CGM.getOpenMPRuntime().getDefaultMemoryOrdering(); |
6839 | if (DefaultOrder == llvm::AtomicOrdering::Monotonic || |
6840 | DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent || |
6841 | (DefaultOrder == llvm::AtomicOrdering::AcquireRelease && |
6842 | Kind == OMPC_capture)) { |
6843 | AO = DefaultOrder; |
6844 | } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) { |
6845 | if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) { |
6846 | AO = llvm::AtomicOrdering::Release; |
6847 | } else if (Kind == OMPC_read) { |
6848 | assert(Kind == OMPC_read && "Unexpected atomic kind." ); |
6849 | AO = llvm::AtomicOrdering::Acquire; |
6850 | } |
6851 | } |
6852 | } |
6853 | |
6854 | if (KindsEncountered.contains(V: OMPC_compare) && |
6855 | KindsEncountered.contains(V: OMPC_fail)) { |
6856 | Kind = OMPC_compare; |
6857 | const auto *FailClause = S.getSingleClause<OMPFailClause>(); |
6858 | if (FailClause) { |
6859 | OpenMPClauseKind FailParameter = FailClause->getFailParameter(); |
6860 | if (FailParameter == llvm::omp::OMPC_relaxed) |
6861 | FailAO = llvm::AtomicOrdering::Monotonic; |
6862 | else if (FailParameter == llvm::omp::OMPC_acquire) |
6863 | FailAO = llvm::AtomicOrdering::Acquire; |
6864 | else if (FailParameter == llvm::omp::OMPC_seq_cst) |
6865 | FailAO = llvm::AtomicOrdering::SequentiallyConsistent; |
6866 | } |
6867 | } |
6868 | |
6869 | LexicalScope Scope(*this, S.getSourceRange()); |
6870 | EmitStopPoint(S: S.getAssociatedStmt()); |
6871 | emitOMPAtomicExpr(CGF&: *this, Kind, AO, FailAO, IsPostfixUpdate: S.isPostfixUpdate(), X: S.getX(), |
6872 | V: S.getV(), R: S.getR(), E: S.getExpr(), UE: S.getUpdateExpr(), |
6873 | D: S.getD(), CE: S.getCondExpr(), IsXLHSInRHSPart: S.isXLHSInRHSPart(), |
6874 | IsFailOnly: S.isFailOnly(), Loc: S.getBeginLoc()); |
6875 | } |
6876 | |
6877 | static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, |
6878 | const OMPExecutableDirective &S, |
6879 | const RegionCodeGenTy &CodeGen) { |
6880 | assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); |
6881 | CodeGenModule &CGM = CGF.CGM; |
6882 | |
6883 | // On device emit this construct as inlined code. |
6884 | if (CGM.getLangOpts().OpenMPIsTargetDevice) { |
6885 | OMPLexicalScope Scope(CGF, S, OMPD_target); |
6886 | CGM.getOpenMPRuntime().emitInlinedDirective( |
6887 | CGF, InnermostKind: OMPD_target, CodeGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6888 | CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt()); |
6889 | }); |
6890 | return; |
6891 | } |
6892 | |
6893 | auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); |
6894 | llvm::Function *Fn = nullptr; |
6895 | llvm::Constant *FnID = nullptr; |
6896 | |
6897 | const Expr *IfCond = nullptr; |
6898 | // Check for the at most one if clause associated with the target region. |
6899 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
6900 | if (C->getNameModifier() == OMPD_unknown || |
6901 | C->getNameModifier() == OMPD_target) { |
6902 | IfCond = C->getCondition(); |
6903 | break; |
6904 | } |
6905 | } |
6906 | |
6907 | // Check if we have any device clause associated with the directive. |
6908 | llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device( |
6909 | nullptr, OMPC_DEVICE_unknown); |
6910 | if (auto *C = S.getSingleClause<OMPDeviceClause>()) |
6911 | Device.setPointerAndInt(PtrVal: C->getDevice(), IntVal: C->getModifier()); |
6912 | |
6913 | // Check if we have an if clause whose conditional always evaluates to false |
6914 | // or if we do not have any targets specified. If so the target region is not |
6915 | // an offload entry point. |
6916 | bool IsOffloadEntry = true; |
6917 | if (IfCond) { |
6918 | bool Val; |
6919 | if (CGF.ConstantFoldsToSimpleInteger(Cond: IfCond, Result&: Val) && !Val) |
6920 | IsOffloadEntry = false; |
6921 | } |
6922 | if (CGM.getLangOpts().OMPTargetTriples.empty()) |
6923 | IsOffloadEntry = false; |
6924 | |
6925 | if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) { |
6926 | unsigned DiagID = CGM.getDiags().getCustomDiagID( |
6927 | L: DiagnosticsEngine::Error, |
6928 | FormatString: "No offloading entry generated while offloading is mandatory." ); |
6929 | CGM.getDiags().Report(DiagID); |
6930 | } |
6931 | |
6932 | assert(CGF.CurFuncDecl && "No parent declaration for target region!" ); |
6933 | StringRef ParentName; |
6934 | // In case we have Ctors/Dtors we use the complete type variant to produce |
6935 | // the mangling of the device outlined kernel. |
6936 | if (const auto *D = dyn_cast<CXXConstructorDecl>(Val: CGF.CurFuncDecl)) |
6937 | ParentName = CGM.getMangledName(GD: GlobalDecl(D, Ctor_Complete)); |
6938 | else if (const auto *D = dyn_cast<CXXDestructorDecl>(Val: CGF.CurFuncDecl)) |
6939 | ParentName = CGM.getMangledName(GD: GlobalDecl(D, Dtor_Complete)); |
6940 | else |
6941 | ParentName = |
6942 | CGM.getMangledName(GD: GlobalDecl(cast<FunctionDecl>(Val: CGF.CurFuncDecl))); |
6943 | |
6944 | // Emit target region as a standalone region. |
6945 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction(D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: FnID, |
6946 | IsOffloadEntry, CodeGen); |
6947 | OMPLexicalScope Scope(CGF, S, OMPD_task); |
6948 | auto &&SizeEmitter = |
6949 | [IsOffloadEntry](CodeGenFunction &CGF, |
6950 | const OMPLoopDirective &D) -> llvm::Value * { |
6951 | if (IsOffloadEntry) { |
6952 | OMPLoopScope(CGF, D); |
6953 | // Emit calculation of the iterations count. |
6954 | llvm::Value *NumIterations = CGF.EmitScalarExpr(E: D.getNumIterations()); |
6955 | NumIterations = CGF.Builder.CreateIntCast(V: NumIterations, DestTy: CGF.Int64Ty, |
6956 | /*isSigned=*/false); |
6957 | return NumIterations; |
6958 | } |
6959 | return nullptr; |
6960 | }; |
6961 | CGM.getOpenMPRuntime().emitTargetCall(CGF, D: S, OutlinedFn: Fn, OutlinedFnID: FnID, IfCond, Device, |
6962 | SizeEmitter); |
6963 | } |
6964 | |
6965 | static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, |
6966 | PrePostActionTy &Action) { |
6967 | Action.Enter(CGF); |
6968 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
6969 | (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope); |
6970 | CGF.EmitOMPPrivateClause(D: S, PrivateScope); |
6971 | (void)PrivateScope.Privatize(); |
6972 | if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind())) |
6973 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S); |
6974 | |
6975 | CGF.EmitStmt(S: S.getCapturedStmt(RegionKind: OMPD_target)->getCapturedStmt()); |
6976 | CGF.EnsureInsertPoint(); |
6977 | } |
6978 | |
6979 | void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, |
6980 | StringRef ParentName, |
6981 | const OMPTargetDirective &S) { |
6982 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6983 | emitTargetRegion(CGF, S, Action); |
6984 | }; |
6985 | llvm::Function *Fn; |
6986 | llvm::Constant *Addr; |
6987 | // Emit target region as a standalone region. |
6988 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
6989 | D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen); |
6990 | assert(Fn && Addr && "Target device function emission failed." ); |
6991 | } |
6992 | |
6993 | void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { |
6994 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6995 | emitTargetRegion(CGF, S, Action); |
6996 | }; |
6997 | emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen); |
6998 | } |
6999 | |
7000 | static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, |
7001 | const OMPExecutableDirective &S, |
7002 | OpenMPDirectiveKind InnermostKind, |
7003 | const RegionCodeGenTy &CodeGen) { |
7004 | const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_teams); |
7005 | llvm::Function *OutlinedFn = |
7006 | CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( |
7007 | CGF, D: S, ThreadIDVar: *CS->getCapturedDecl()->param_begin(), InnermostKind, |
7008 | CodeGen); |
7009 | |
7010 | const auto *NT = S.getSingleClause<OMPNumTeamsClause>(); |
7011 | const auto *TL = S.getSingleClause<OMPThreadLimitClause>(); |
7012 | if (NT || TL) { |
7013 | const Expr *NumTeams = NT ? NT->getNumTeams().front() : nullptr; |
7014 | const Expr *ThreadLimit = TL ? TL->getThreadLimit().front() : nullptr; |
7015 | |
7016 | CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, |
7017 | Loc: S.getBeginLoc()); |
7018 | } |
7019 | |
7020 | OMPTeamsScope Scope(CGF, S); |
7021 | llvm::SmallVector<llvm::Value *, 16> CapturedVars; |
7022 | CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars); |
7023 | CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, D: S, Loc: S.getBeginLoc(), OutlinedFn, |
7024 | CapturedVars); |
7025 | } |
7026 | |
7027 | void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { |
7028 | // Emit teams region as a standalone region. |
7029 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7030 | Action.Enter(CGF); |
7031 | OMPPrivateScope PrivateScope(CGF); |
7032 | (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope); |
7033 | CGF.EmitOMPPrivateClause(D: S, PrivateScope); |
7034 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
7035 | (void)PrivateScope.Privatize(); |
7036 | CGF.EmitStmt(S: S.getCapturedStmt(RegionKind: OMPD_teams)->getCapturedStmt()); |
7037 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
7038 | }; |
7039 | emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute, CodeGen); |
7040 | emitPostUpdateForReductionClause(CGF&: *this, D: S, |
7041 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
7042 | } |
7043 | |
7044 | static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, |
7045 | const OMPTargetTeamsDirective &S) { |
7046 | auto *CS = S.getCapturedStmt(RegionKind: OMPD_teams); |
7047 | Action.Enter(CGF); |
7048 | // Emit teams region as a standalone region. |
7049 | auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7050 | Action.Enter(CGF); |
7051 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
7052 | (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope); |
7053 | CGF.EmitOMPPrivateClause(D: S, PrivateScope); |
7054 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
7055 | (void)PrivateScope.Privatize(); |
7056 | if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind())) |
7057 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S); |
7058 | CGF.EmitStmt(S: CS->getCapturedStmt()); |
7059 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
7060 | }; |
7061 | emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_teams, CodeGen); |
7062 | emitPostUpdateForReductionClause(CGF, D: S, |
7063 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
7064 | } |
7065 | |
7066 | void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( |
7067 | CodeGenModule &CGM, StringRef ParentName, |
7068 | const OMPTargetTeamsDirective &S) { |
7069 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7070 | emitTargetTeamsRegion(CGF, Action, S); |
7071 | }; |
7072 | llvm::Function *Fn; |
7073 | llvm::Constant *Addr; |
7074 | // Emit target region as a standalone region. |
7075 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7076 | D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen); |
7077 | assert(Fn && Addr && "Target device function emission failed." ); |
7078 | } |
7079 | |
7080 | void CodeGenFunction::EmitOMPTargetTeamsDirective( |
7081 | const OMPTargetTeamsDirective &S) { |
7082 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7083 | emitTargetTeamsRegion(CGF, Action, S); |
7084 | }; |
7085 | emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen); |
7086 | } |
7087 | |
7088 | static void |
7089 | emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, |
7090 | const OMPTargetTeamsDistributeDirective &S) { |
7091 | Action.Enter(CGF); |
7092 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7093 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc()); |
7094 | }; |
7095 | |
7096 | // Emit teams region as a standalone region. |
7097 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7098 | PrePostActionTy &Action) { |
7099 | Action.Enter(CGF); |
7100 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
7101 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
7102 | (void)PrivateScope.Privatize(); |
7103 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute, |
7104 | CodeGen: CodeGenDistribute); |
7105 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
7106 | }; |
7107 | emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute, CodeGen); |
7108 | emitPostUpdateForReductionClause(CGF, D: S, |
7109 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
7110 | } |
7111 | |
7112 | void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( |
7113 | CodeGenModule &CGM, StringRef ParentName, |
7114 | const OMPTargetTeamsDistributeDirective &S) { |
7115 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7116 | emitTargetTeamsDistributeRegion(CGF, Action, S); |
7117 | }; |
7118 | llvm::Function *Fn; |
7119 | llvm::Constant *Addr; |
7120 | // Emit target region as a standalone region. |
7121 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7122 | D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen); |
7123 | assert(Fn && Addr && "Target device function emission failed." ); |
7124 | } |
7125 | |
7126 | void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( |
7127 | const OMPTargetTeamsDistributeDirective &S) { |
7128 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7129 | emitTargetTeamsDistributeRegion(CGF, Action, S); |
7130 | }; |
7131 | emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen); |
7132 | } |
7133 | |
7134 | static void emitTargetTeamsDistributeSimdRegion( |
7135 | CodeGenFunction &CGF, PrePostActionTy &Action, |
7136 | const OMPTargetTeamsDistributeSimdDirective &S) { |
7137 | Action.Enter(CGF); |
7138 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7139 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc()); |
7140 | }; |
7141 | |
7142 | // Emit teams region as a standalone region. |
7143 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7144 | PrePostActionTy &Action) { |
7145 | Action.Enter(CGF); |
7146 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
7147 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
7148 | (void)PrivateScope.Privatize(); |
7149 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute, |
7150 | CodeGen: CodeGenDistribute); |
7151 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
7152 | }; |
7153 | emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_simd, CodeGen); |
7154 | emitPostUpdateForReductionClause(CGF, D: S, |
7155 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
7156 | } |
7157 | |
7158 | void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( |
7159 | CodeGenModule &CGM, StringRef ParentName, |
7160 | const OMPTargetTeamsDistributeSimdDirective &S) { |
7161 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7162 | emitTargetTeamsDistributeSimdRegion(CGF, Action, S); |
7163 | }; |
7164 | llvm::Function *Fn; |
7165 | llvm::Constant *Addr; |
7166 | // Emit target region as a standalone region. |
7167 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7168 | D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen); |
7169 | assert(Fn && Addr && "Target device function emission failed." ); |
7170 | } |
7171 | |
7172 | void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( |
7173 | const OMPTargetTeamsDistributeSimdDirective &S) { |
7174 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7175 | emitTargetTeamsDistributeSimdRegion(CGF, Action, S); |
7176 | }; |
7177 | emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen); |
7178 | } |
7179 | |
7180 | void CodeGenFunction::EmitOMPTeamsDistributeDirective( |
7181 | const OMPTeamsDistributeDirective &S) { |
7182 | |
7183 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7184 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc()); |
7185 | }; |
7186 | |
7187 | // Emit teams region as a standalone region. |
7188 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7189 | PrePostActionTy &Action) { |
7190 | Action.Enter(CGF); |
7191 | OMPPrivateScope PrivateScope(CGF); |
7192 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
7193 | (void)PrivateScope.Privatize(); |
7194 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute, |
7195 | CodeGen: CodeGenDistribute); |
7196 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
7197 | }; |
7198 | emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute, CodeGen); |
7199 | emitPostUpdateForReductionClause(CGF&: *this, D: S, |
7200 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
7201 | } |
7202 | |
7203 | void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( |
7204 | const OMPTeamsDistributeSimdDirective &S) { |
7205 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7206 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc()); |
7207 | }; |
7208 | |
7209 | // Emit teams region as a standalone region. |
7210 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7211 | PrePostActionTy &Action) { |
7212 | Action.Enter(CGF); |
7213 | OMPPrivateScope PrivateScope(CGF); |
7214 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
7215 | (void)PrivateScope.Privatize(); |
7216 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_simd, |
7217 | CodeGen: CodeGenDistribute); |
7218 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
7219 | }; |
7220 | emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute_simd, CodeGen); |
7221 | emitPostUpdateForReductionClause(CGF&: *this, D: S, |
7222 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
7223 | } |
7224 | |
7225 | void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( |
7226 | const OMPTeamsDistributeParallelForDirective &S) { |
7227 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7228 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined, |
7229 | IncExpr: S.getDistInc()); |
7230 | }; |
7231 | |
7232 | // Emit teams region as a standalone region. |
7233 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7234 | PrePostActionTy &Action) { |
7235 | Action.Enter(CGF); |
7236 | OMPPrivateScope PrivateScope(CGF); |
7237 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
7238 | (void)PrivateScope.Privatize(); |
7239 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute, |
7240 | CodeGen: CodeGenDistribute); |
7241 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
7242 | }; |
7243 | emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute_parallel_for, CodeGen); |
7244 | emitPostUpdateForReductionClause(CGF&: *this, D: S, |
7245 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
7246 | } |
7247 | |
7248 | void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( |
7249 | const OMPTeamsDistributeParallelForSimdDirective &S) { |
7250 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7251 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined, |
7252 | IncExpr: S.getDistInc()); |
7253 | }; |
7254 | |
7255 | // Emit teams region as a standalone region. |
7256 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7257 | PrePostActionTy &Action) { |
7258 | Action.Enter(CGF); |
7259 | OMPPrivateScope PrivateScope(CGF); |
7260 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
7261 | (void)PrivateScope.Privatize(); |
7262 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective( |
7263 | CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false); |
7264 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
7265 | }; |
7266 | emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute_parallel_for_simd, |
7267 | CodeGen); |
7268 | emitPostUpdateForReductionClause(CGF&: *this, D: S, |
7269 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
7270 | } |
7271 | |
7272 | void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) { |
7273 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
7274 | llvm::Value *Device = nullptr; |
7275 | llvm::Value *NumDependences = nullptr; |
7276 | llvm::Value *DependenceList = nullptr; |
7277 | |
7278 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
7279 | Device = EmitScalarExpr(E: C->getDevice()); |
7280 | |
7281 | // Build list and emit dependences |
7282 | OMPTaskDataTy Data; |
7283 | buildDependences(S, Data); |
7284 | if (!Data.Dependences.empty()) { |
7285 | Address DependenciesArray = Address::invalid(); |
7286 | std::tie(args&: NumDependences, args&: DependenciesArray) = |
7287 | CGM.getOpenMPRuntime().emitDependClause(CGF&: *this, Dependencies: Data.Dependences, |
7288 | Loc: S.getBeginLoc()); |
7289 | DependenceList = DependenciesArray.emitRawPointer(CGF&: *this); |
7290 | } |
7291 | Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>(); |
7292 | |
7293 | assert(!(Data.HasNowaitClause && !(S.getSingleClause<OMPInitClause>() || |
7294 | S.getSingleClause<OMPDestroyClause>() || |
7295 | S.getSingleClause<OMPUseClause>())) && |
7296 | "OMPNowaitClause clause is used separately in OMPInteropDirective." ); |
7297 | |
7298 | auto ItOMPInitClause = S.getClausesOfKind<OMPInitClause>(); |
7299 | if (!ItOMPInitClause.empty()) { |
7300 | // Look at the multiple init clauses |
7301 | for (const OMPInitClause *C : ItOMPInitClause) { |
7302 | llvm::Value *InteropvarPtr = |
7303 | EmitLValue(E: C->getInteropVar()).getPointer(CGF&: *this); |
7304 | llvm::omp::OMPInteropType InteropType = |
7305 | llvm::omp::OMPInteropType::Unknown; |
7306 | if (C->getIsTarget()) { |
7307 | InteropType = llvm::omp::OMPInteropType::Target; |
7308 | } else { |
7309 | assert(C->getIsTargetSync() && |
7310 | "Expected interop-type target/targetsync" ); |
7311 | InteropType = llvm::omp::OMPInteropType::TargetSync; |
7312 | } |
7313 | OMPBuilder.createOMPInteropInit(Loc: Builder, InteropVar: InteropvarPtr, InteropType, |
7314 | Device, NumDependences, DependenceAddress: DependenceList, |
7315 | HaveNowaitClause: Data.HasNowaitClause); |
7316 | } |
7317 | } |
7318 | auto ItOMPDestroyClause = S.getClausesOfKind<OMPDestroyClause>(); |
7319 | if (!ItOMPDestroyClause.empty()) { |
7320 | // Look at the multiple destroy clauses |
7321 | for (const OMPDestroyClause *C : ItOMPDestroyClause) { |
7322 | llvm::Value *InteropvarPtr = |
7323 | EmitLValue(E: C->getInteropVar()).getPointer(CGF&: *this); |
7324 | OMPBuilder.createOMPInteropDestroy(Loc: Builder, InteropVar: InteropvarPtr, Device, |
7325 | NumDependences, DependenceAddress: DependenceList, |
7326 | HaveNowaitClause: Data.HasNowaitClause); |
7327 | } |
7328 | } |
7329 | auto ItOMPUseClause = S.getClausesOfKind<OMPUseClause>(); |
7330 | if (!ItOMPUseClause.empty()) { |
7331 | // Look at the multiple use clauses |
7332 | for (const OMPUseClause *C : ItOMPUseClause) { |
7333 | llvm::Value *InteropvarPtr = |
7334 | EmitLValue(E: C->getInteropVar()).getPointer(CGF&: *this); |
7335 | OMPBuilder.createOMPInteropUse(Loc: Builder, InteropVar: InteropvarPtr, Device, |
7336 | NumDependences, DependenceAddress: DependenceList, |
7337 | HaveNowaitClause: Data.HasNowaitClause); |
7338 | } |
7339 | } |
7340 | } |
7341 | |
7342 | static void emitTargetTeamsDistributeParallelForRegion( |
7343 | CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, |
7344 | PrePostActionTy &Action) { |
7345 | Action.Enter(CGF); |
7346 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7347 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined, |
7348 | IncExpr: S.getDistInc()); |
7349 | }; |
7350 | |
7351 | // Emit teams region as a standalone region. |
7352 | auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7353 | PrePostActionTy &Action) { |
7354 | Action.Enter(CGF); |
7355 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
7356 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
7357 | (void)PrivateScope.Privatize(); |
7358 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective( |
7359 | CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false); |
7360 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
7361 | }; |
7362 | |
7363 | emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_parallel_for, |
7364 | CodeGen: CodeGenTeams); |
7365 | emitPostUpdateForReductionClause(CGF, D: S, |
7366 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
7367 | } |
7368 | |
7369 | void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( |
7370 | CodeGenModule &CGM, StringRef ParentName, |
7371 | const OMPTargetTeamsDistributeParallelForDirective &S) { |
7372 | // Emit SPMD target teams distribute parallel for region as a standalone |
7373 | // region. |
7374 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7375 | emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); |
7376 | }; |
7377 | llvm::Function *Fn; |
7378 | llvm::Constant *Addr; |
7379 | // Emit target region as a standalone region. |
7380 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7381 | D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen); |
7382 | assert(Fn && Addr && "Target device function emission failed." ); |
7383 | } |
7384 | |
7385 | void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( |
7386 | const OMPTargetTeamsDistributeParallelForDirective &S) { |
7387 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7388 | emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); |
7389 | }; |
7390 | emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen); |
7391 | } |
7392 | |
7393 | static void emitTargetTeamsDistributeParallelForSimdRegion( |
7394 | CodeGenFunction &CGF, |
7395 | const OMPTargetTeamsDistributeParallelForSimdDirective &S, |
7396 | PrePostActionTy &Action) { |
7397 | Action.Enter(CGF); |
7398 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7399 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined, |
7400 | IncExpr: S.getDistInc()); |
7401 | }; |
7402 | |
7403 | // Emit teams region as a standalone region. |
7404 | auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7405 | PrePostActionTy &Action) { |
7406 | Action.Enter(CGF); |
7407 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
7408 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
7409 | (void)PrivateScope.Privatize(); |
7410 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective( |
7411 | CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false); |
7412 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
7413 | }; |
7414 | |
7415 | emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_parallel_for_simd, |
7416 | CodeGen: CodeGenTeams); |
7417 | emitPostUpdateForReductionClause(CGF, D: S, |
7418 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
7419 | } |
7420 | |
7421 | void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( |
7422 | CodeGenModule &CGM, StringRef ParentName, |
7423 | const OMPTargetTeamsDistributeParallelForSimdDirective &S) { |
7424 | // Emit SPMD target teams distribute parallel for simd region as a standalone |
7425 | // region. |
7426 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7427 | emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); |
7428 | }; |
7429 | llvm::Function *Fn; |
7430 | llvm::Constant *Addr; |
7431 | // Emit target region as a standalone region. |
7432 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7433 | D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen); |
7434 | assert(Fn && Addr && "Target device function emission failed." ); |
7435 | } |
7436 | |
7437 | void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( |
7438 | const OMPTargetTeamsDistributeParallelForSimdDirective &S) { |
7439 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7440 | emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); |
7441 | }; |
7442 | emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen); |
7443 | } |
7444 | |
7445 | void CodeGenFunction::EmitOMPCancellationPointDirective( |
7446 | const OMPCancellationPointDirective &S) { |
7447 | CGM.getOpenMPRuntime().emitCancellationPointCall(CGF&: *this, Loc: S.getBeginLoc(), |
7448 | CancelRegion: S.getCancelRegion()); |
7449 | } |
7450 | |
7451 | void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { |
7452 | const Expr *IfCond = nullptr; |
7453 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
7454 | if (C->getNameModifier() == OMPD_unknown || |
7455 | C->getNameModifier() == OMPD_cancel) { |
7456 | IfCond = C->getCondition(); |
7457 | break; |
7458 | } |
7459 | } |
7460 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
7461 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
7462 | // TODO: This check is necessary as we only generate `omp parallel` through |
7463 | // the OpenMPIRBuilder for now. |
7464 | if (S.getCancelRegion() == OMPD_parallel || |
7465 | S.getCancelRegion() == OMPD_sections || |
7466 | S.getCancelRegion() == OMPD_section) { |
7467 | llvm::Value *IfCondition = nullptr; |
7468 | if (IfCond) |
7469 | IfCondition = EmitScalarExpr(E: IfCond, |
7470 | /*IgnoreResultAssign=*/true); |
7471 | llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail( |
7472 | ValOrErr: OMPBuilder.createCancel(Loc: Builder, IfCondition, CanceledDirective: S.getCancelRegion())); |
7473 | return Builder.restoreIP(IP: AfterIP); |
7474 | } |
7475 | } |
7476 | |
7477 | CGM.getOpenMPRuntime().emitCancelCall(CGF&: *this, Loc: S.getBeginLoc(), IfCond, |
7478 | CancelRegion: S.getCancelRegion()); |
7479 | } |
7480 | |
7481 | CodeGenFunction::JumpDest |
7482 | CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { |
7483 | if (Kind == OMPD_parallel || Kind == OMPD_task || |
7484 | Kind == OMPD_target_parallel || Kind == OMPD_taskloop || |
7485 | Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop) |
7486 | return ReturnBlock; |
7487 | assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || |
7488 | Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || |
7489 | Kind == OMPD_distribute_parallel_for || |
7490 | Kind == OMPD_target_parallel_for || |
7491 | Kind == OMPD_teams_distribute_parallel_for || |
7492 | Kind == OMPD_target_teams_distribute_parallel_for); |
7493 | return OMPCancelStack.getExitBlock(); |
7494 | } |
7495 | |
7496 | void CodeGenFunction::EmitOMPUseDevicePtrClause( |
7497 | const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope, |
7498 | const llvm::DenseMap<const ValueDecl *, llvm::Value *> |
7499 | CaptureDeviceAddrMap) { |
7500 | llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; |
7501 | for (const Expr *OrigVarIt : C.varlist()) { |
7502 | const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: OrigVarIt)->getDecl()); |
7503 | if (!Processed.insert(V: OrigVD).second) |
7504 | continue; |
7505 | |
7506 | // In order to identify the right initializer we need to match the |
7507 | // declaration used by the mapping logic. In some cases we may get |
7508 | // OMPCapturedExprDecl that refers to the original declaration. |
7509 | const ValueDecl *MatchingVD = OrigVD; |
7510 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: MatchingVD)) { |
7511 | // OMPCapturedExprDecl are used to privative fields of the current |
7512 | // structure. |
7513 | const auto *ME = cast<MemberExpr>(Val: OED->getInit()); |
7514 | assert(isa<CXXThisExpr>(ME->getBase()->IgnoreImpCasts()) && |
7515 | "Base should be the current struct!" ); |
7516 | MatchingVD = ME->getMemberDecl(); |
7517 | } |
7518 | |
7519 | // If we don't have information about the current list item, move on to |
7520 | // the next one. |
7521 | auto InitAddrIt = CaptureDeviceAddrMap.find(Val: MatchingVD); |
7522 | if (InitAddrIt == CaptureDeviceAddrMap.end()) |
7523 | continue; |
7524 | |
7525 | llvm::Type *Ty = ConvertTypeForMem(T: OrigVD->getType().getNonReferenceType()); |
7526 | |
7527 | // Return the address of the private variable. |
7528 | bool IsRegistered = PrivateScope.addPrivate( |
7529 | LocalVD: OrigVD, |
7530 | Addr: Address(InitAddrIt->second, Ty, |
7531 | getContext().getTypeAlignInChars(T: getContext().VoidPtrTy))); |
7532 | assert(IsRegistered && "firstprivate var already registered as private" ); |
7533 | // Silence the warning about unused variable. |
7534 | (void)IsRegistered; |
7535 | } |
7536 | } |
7537 | |
7538 | static const VarDecl *getBaseDecl(const Expr *Ref) { |
7539 | const Expr *Base = Ref->IgnoreParenImpCasts(); |
7540 | while (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: Base)) |
7541 | Base = OASE->getBase()->IgnoreParenImpCasts(); |
7542 | while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: Base)) |
7543 | Base = ASE->getBase()->IgnoreParenImpCasts(); |
7544 | return cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Base)->getDecl()); |
7545 | } |
7546 | |
7547 | void CodeGenFunction::EmitOMPUseDeviceAddrClause( |
7548 | const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope, |
7549 | const llvm::DenseMap<const ValueDecl *, llvm::Value *> |
7550 | CaptureDeviceAddrMap) { |
7551 | llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; |
7552 | for (const Expr *Ref : C.varlist()) { |
7553 | const VarDecl *OrigVD = getBaseDecl(Ref); |
7554 | if (!Processed.insert(V: OrigVD).second) |
7555 | continue; |
7556 | // In order to identify the right initializer we need to match the |
7557 | // declaration used by the mapping logic. In some cases we may get |
7558 | // OMPCapturedExprDecl that refers to the original declaration. |
7559 | const ValueDecl *MatchingVD = OrigVD; |
7560 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: MatchingVD)) { |
7561 | // OMPCapturedExprDecl are used to privative fields of the current |
7562 | // structure. |
7563 | const auto *ME = cast<MemberExpr>(Val: OED->getInit()); |
7564 | assert(isa<CXXThisExpr>(ME->getBase()) && |
7565 | "Base should be the current struct!" ); |
7566 | MatchingVD = ME->getMemberDecl(); |
7567 | } |
7568 | |
7569 | // If we don't have information about the current list item, move on to |
7570 | // the next one. |
7571 | auto InitAddrIt = CaptureDeviceAddrMap.find(Val: MatchingVD); |
7572 | if (InitAddrIt == CaptureDeviceAddrMap.end()) |
7573 | continue; |
7574 | |
7575 | llvm::Type *Ty = ConvertTypeForMem(T: OrigVD->getType().getNonReferenceType()); |
7576 | |
7577 | Address PrivAddr = |
7578 | Address(InitAddrIt->second, Ty, |
7579 | getContext().getTypeAlignInChars(T: getContext().VoidPtrTy)); |
7580 | // For declrefs and variable length array need to load the pointer for |
7581 | // correct mapping, since the pointer to the data was passed to the runtime. |
7582 | if (isa<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts()) || |
7583 | MatchingVD->getType()->isArrayType()) { |
7584 | QualType PtrTy = getContext().getPointerType( |
7585 | T: OrigVD->getType().getNonReferenceType()); |
7586 | PrivAddr = |
7587 | EmitLoadOfPointer(Ptr: PrivAddr.withElementType(ElemTy: ConvertTypeForMem(T: PtrTy)), |
7588 | PtrTy: PtrTy->castAs<PointerType>()); |
7589 | } |
7590 | |
7591 | (void)PrivateScope.addPrivate(LocalVD: OrigVD, Addr: PrivAddr); |
7592 | } |
7593 | } |
7594 | |
7595 | // Generate the instructions for '#pragma omp target data' directive. |
7596 | void CodeGenFunction::EmitOMPTargetDataDirective( |
7597 | const OMPTargetDataDirective &S) { |
7598 | CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true, |
7599 | /*SeparateBeginEndCalls=*/true); |
7600 | |
7601 | // Create a pre/post action to signal the privatization of the device pointer. |
7602 | // This action can be replaced by the OpenMP runtime code generation to |
7603 | // deactivate privatization. |
7604 | bool PrivatizeDevicePointers = false; |
7605 | class DevicePointerPrivActionTy : public PrePostActionTy { |
7606 | bool &PrivatizeDevicePointers; |
7607 | |
7608 | public: |
7609 | explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) |
7610 | : PrivatizeDevicePointers(PrivatizeDevicePointers) {} |
7611 | void Enter(CodeGenFunction &CGF) override { |
7612 | PrivatizeDevicePointers = true; |
7613 | } |
7614 | }; |
7615 | DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); |
7616 | |
7617 | auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7618 | auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7619 | CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt()); |
7620 | }; |
7621 | |
7622 | // Codegen that selects whether to generate the privatization code or not. |
7623 | auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7624 | RegionCodeGenTy RCG(InnermostCodeGen); |
7625 | PrivatizeDevicePointers = false; |
7626 | |
7627 | // Call the pre-action to change the status of PrivatizeDevicePointers if |
7628 | // needed. |
7629 | Action.Enter(CGF); |
7630 | |
7631 | if (PrivatizeDevicePointers) { |
7632 | OMPPrivateScope PrivateScope(CGF); |
7633 | // Emit all instances of the use_device_ptr clause. |
7634 | for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) |
7635 | CGF.EmitOMPUseDevicePtrClause(C: *C, PrivateScope, |
7636 | CaptureDeviceAddrMap: Info.CaptureDeviceAddrMap); |
7637 | for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) |
7638 | CGF.EmitOMPUseDeviceAddrClause(C: *C, PrivateScope, |
7639 | CaptureDeviceAddrMap: Info.CaptureDeviceAddrMap); |
7640 | (void)PrivateScope.Privatize(); |
7641 | RCG(CGF); |
7642 | } else { |
7643 | // If we don't have target devices, don't bother emitting the data |
7644 | // mapping code. |
7645 | std::optional<OpenMPDirectiveKind> CaptureRegion; |
7646 | if (CGM.getLangOpts().OMPTargetTriples.empty()) { |
7647 | // Emit helper decls of the use_device_ptr/use_device_addr clauses. |
7648 | for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) |
7649 | for (const Expr *E : C->varlist()) { |
7650 | const Decl *D = cast<DeclRefExpr>(Val: E)->getDecl(); |
7651 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D)) |
7652 | CGF.EmitVarDecl(D: *OED); |
7653 | } |
7654 | for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) |
7655 | for (const Expr *E : C->varlist()) { |
7656 | const Decl *D = getBaseDecl(Ref: E); |
7657 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D)) |
7658 | CGF.EmitVarDecl(D: *OED); |
7659 | } |
7660 | } else { |
7661 | CaptureRegion = OMPD_unknown; |
7662 | } |
7663 | |
7664 | OMPLexicalScope Scope(CGF, S, CaptureRegion); |
7665 | RCG(CGF); |
7666 | } |
7667 | }; |
7668 | |
7669 | // Forward the provided action to the privatization codegen. |
7670 | RegionCodeGenTy PrivRCG(PrivCodeGen); |
7671 | PrivRCG.setAction(Action); |
7672 | |
7673 | // Notwithstanding the body of the region is emitted as inlined directive, |
7674 | // we don't use an inline scope as changes in the references inside the |
7675 | // region are expected to be visible outside, so we do not privative them. |
7676 | OMPLexicalScope Scope(CGF, S); |
7677 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_target_data, |
7678 | CodeGen: PrivRCG); |
7679 | }; |
7680 | |
7681 | RegionCodeGenTy RCG(CodeGen); |
7682 | |
7683 | // If we don't have target devices, don't bother emitting the data mapping |
7684 | // code. |
7685 | if (CGM.getLangOpts().OMPTargetTriples.empty()) { |
7686 | RCG(*this); |
7687 | return; |
7688 | } |
7689 | |
7690 | // Check if we have any if clause associated with the directive. |
7691 | const Expr *IfCond = nullptr; |
7692 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
7693 | IfCond = C->getCondition(); |
7694 | |
7695 | // Check if we have any device clause associated with the directive. |
7696 | const Expr *Device = nullptr; |
7697 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
7698 | Device = C->getDevice(); |
7699 | |
7700 | // Set the action to signal privatization of device pointers. |
7701 | RCG.setAction(PrivAction); |
7702 | |
7703 | // Emit region code. |
7704 | CGM.getOpenMPRuntime().emitTargetDataCalls(CGF&: *this, D: S, IfCond, Device, CodeGen: RCG, |
7705 | Info); |
7706 | } |
7707 | |
7708 | void CodeGenFunction::EmitOMPTargetEnterDataDirective( |
7709 | const OMPTargetEnterDataDirective &S) { |
7710 | // If we don't have target devices, don't bother emitting the data mapping |
7711 | // code. |
7712 | if (CGM.getLangOpts().OMPTargetTriples.empty()) |
7713 | return; |
7714 | |
7715 | // Check if we have any if clause associated with the directive. |
7716 | const Expr *IfCond = nullptr; |
7717 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
7718 | IfCond = C->getCondition(); |
7719 | |
7720 | // Check if we have any device clause associated with the directive. |
7721 | const Expr *Device = nullptr; |
7722 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
7723 | Device = C->getDevice(); |
7724 | |
7725 | OMPLexicalScope Scope(*this, S, OMPD_task); |
7726 | CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF&: *this, D: S, IfCond, Device); |
7727 | } |
7728 | |
7729 | void CodeGenFunction::EmitOMPTargetExitDataDirective( |
7730 | const OMPTargetExitDataDirective &S) { |
7731 | // If we don't have target devices, don't bother emitting the data mapping |
7732 | // code. |
7733 | if (CGM.getLangOpts().OMPTargetTriples.empty()) |
7734 | return; |
7735 | |
7736 | // Check if we have any if clause associated with the directive. |
7737 | const Expr *IfCond = nullptr; |
7738 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
7739 | IfCond = C->getCondition(); |
7740 | |
7741 | // Check if we have any device clause associated with the directive. |
7742 | const Expr *Device = nullptr; |
7743 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
7744 | Device = C->getDevice(); |
7745 | |
7746 | OMPLexicalScope Scope(*this, S, OMPD_task); |
7747 | CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF&: *this, D: S, IfCond, Device); |
7748 | } |
7749 | |
7750 | static void emitTargetParallelRegion(CodeGenFunction &CGF, |
7751 | const OMPTargetParallelDirective &S, |
7752 | PrePostActionTy &Action) { |
7753 | // Get the captured statement associated with the 'parallel' region. |
7754 | const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_parallel); |
7755 | Action.Enter(CGF); |
7756 | auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7757 | Action.Enter(CGF); |
7758 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
7759 | (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope); |
7760 | CGF.EmitOMPPrivateClause(D: S, PrivateScope); |
7761 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
7762 | (void)PrivateScope.Privatize(); |
7763 | if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind())) |
7764 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S); |
7765 | // TODO: Add support for clauses. |
7766 | CGF.EmitStmt(S: CS->getCapturedStmt()); |
7767 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel); |
7768 | }; |
7769 | emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_parallel, CodeGen, |
7770 | CodeGenBoundParameters: emitEmptyBoundParameters); |
7771 | emitPostUpdateForReductionClause(CGF, D: S, |
7772 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
7773 | } |
7774 | |
7775 | void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( |
7776 | CodeGenModule &CGM, StringRef ParentName, |
7777 | const OMPTargetParallelDirective &S) { |
7778 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7779 | emitTargetParallelRegion(CGF, S, Action); |
7780 | }; |
7781 | llvm::Function *Fn; |
7782 | llvm::Constant *Addr; |
7783 | // Emit target region as a standalone region. |
7784 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7785 | D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen); |
7786 | assert(Fn && Addr && "Target device function emission failed." ); |
7787 | } |
7788 | |
7789 | void CodeGenFunction::EmitOMPTargetParallelDirective( |
7790 | const OMPTargetParallelDirective &S) { |
7791 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7792 | emitTargetParallelRegion(CGF, S, Action); |
7793 | }; |
7794 | emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen); |
7795 | } |
7796 | |
7797 | static void emitTargetParallelForRegion(CodeGenFunction &CGF, |
7798 | const OMPTargetParallelForDirective &S, |
7799 | PrePostActionTy &Action) { |
7800 | Action.Enter(CGF); |
7801 | // Emit directive as a combined directive that consists of two implicit |
7802 | // directives: 'parallel' with 'for' directive. |
7803 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7804 | Action.Enter(CGF); |
7805 | CodeGenFunction::OMPCancelStackRAII CancelRegion( |
7806 | CGF, OMPD_target_parallel_for, S.hasCancel()); |
7807 | CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds, |
7808 | CGDispatchBounds: emitDispatchForLoopBounds); |
7809 | }; |
7810 | emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_for, CodeGen, |
7811 | CodeGenBoundParameters: emitEmptyBoundParameters); |
7812 | } |
7813 | |
7814 | void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( |
7815 | CodeGenModule &CGM, StringRef ParentName, |
7816 | const OMPTargetParallelForDirective &S) { |
7817 | // Emit SPMD target parallel for region as a standalone region. |
7818 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7819 | emitTargetParallelForRegion(CGF, S, Action); |
7820 | }; |
7821 | llvm::Function *Fn; |
7822 | llvm::Constant *Addr; |
7823 | // Emit target region as a standalone region. |
7824 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7825 | D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen); |
7826 | assert(Fn && Addr && "Target device function emission failed." ); |
7827 | } |
7828 | |
7829 | void CodeGenFunction::EmitOMPTargetParallelForDirective( |
7830 | const OMPTargetParallelForDirective &S) { |
7831 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7832 | emitTargetParallelForRegion(CGF, S, Action); |
7833 | }; |
7834 | emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen); |
7835 | } |
7836 | |
7837 | static void |
7838 | emitTargetParallelForSimdRegion(CodeGenFunction &CGF, |
7839 | const OMPTargetParallelForSimdDirective &S, |
7840 | PrePostActionTy &Action) { |
7841 | Action.Enter(CGF); |
7842 | // Emit directive as a combined directive that consists of two implicit |
7843 | // directives: 'parallel' with 'for' directive. |
7844 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7845 | Action.Enter(CGF); |
7846 | CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds, |
7847 | CGDispatchBounds: emitDispatchForLoopBounds); |
7848 | }; |
7849 | emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_simd, CodeGen, |
7850 | CodeGenBoundParameters: emitEmptyBoundParameters); |
7851 | } |
7852 | |
7853 | void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( |
7854 | CodeGenModule &CGM, StringRef ParentName, |
7855 | const OMPTargetParallelForSimdDirective &S) { |
7856 | // Emit SPMD target parallel for region as a standalone region. |
7857 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7858 | emitTargetParallelForSimdRegion(CGF, S, Action); |
7859 | }; |
7860 | llvm::Function *Fn; |
7861 | llvm::Constant *Addr; |
7862 | // Emit target region as a standalone region. |
7863 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7864 | D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen); |
7865 | assert(Fn && Addr && "Target device function emission failed." ); |
7866 | } |
7867 | |
7868 | void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( |
7869 | const OMPTargetParallelForSimdDirective &S) { |
7870 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7871 | emitTargetParallelForSimdRegion(CGF, S, Action); |
7872 | }; |
7873 | emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen); |
7874 | } |
7875 | |
7876 | /// Emit a helper variable and return corresponding lvalue. |
7877 | static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, |
7878 | const ImplicitParamDecl *PVD, |
7879 | CodeGenFunction::OMPPrivateScope &Privates) { |
7880 | const auto *VDecl = cast<VarDecl>(Val: Helper->getDecl()); |
7881 | Privates.addPrivate(LocalVD: VDecl, Addr: CGF.GetAddrOfLocalVar(VD: PVD)); |
7882 | } |
7883 | |
7884 | void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { |
7885 | assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); |
7886 | // Emit outlined function for task construct. |
7887 | const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_taskloop); |
7888 | Address CapturedStruct = Address::invalid(); |
7889 | { |
7890 | OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); |
7891 | CapturedStruct = GenerateCapturedStmtArgument(S: *CS); |
7892 | } |
7893 | QualType SharedsTy = getContext().getRecordType(Decl: CS->getCapturedRecordDecl()); |
7894 | const Expr *IfCond = nullptr; |
7895 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
7896 | if (C->getNameModifier() == OMPD_unknown || |
7897 | C->getNameModifier() == OMPD_taskloop) { |
7898 | IfCond = C->getCondition(); |
7899 | break; |
7900 | } |
7901 | } |
7902 | |
7903 | OMPTaskDataTy Data; |
7904 | // Check if taskloop must be emitted without taskgroup. |
7905 | Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); |
7906 | // TODO: Check if we should emit tied or untied task. |
7907 | Data.Tied = true; |
7908 | // Set scheduling for taskloop |
7909 | if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) { |
7910 | // grainsize clause |
7911 | Data.Schedule.setInt(/*IntVal=*/false); |
7912 | Data.Schedule.setPointer(EmitScalarExpr(E: Clause->getGrainsize())); |
7913 | Data.HasModifier = |
7914 | (Clause->getModifier() == OMPC_GRAINSIZE_strict) ? true : false; |
7915 | } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) { |
7916 | // num_tasks clause |
7917 | Data.Schedule.setInt(/*IntVal=*/true); |
7918 | Data.Schedule.setPointer(EmitScalarExpr(E: Clause->getNumTasks())); |
7919 | Data.HasModifier = |
7920 | (Clause->getModifier() == OMPC_NUMTASKS_strict) ? true : false; |
7921 | } |
7922 | |
7923 | auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { |
7924 | // if (PreCond) { |
7925 | // for (IV in 0..LastIteration) BODY; |
7926 | // <Final counter/linear vars updates>; |
7927 | // } |
7928 | // |
7929 | |
7930 | // Emit: if (PreCond) - begin. |
7931 | // If the condition constant folds and can be elided, avoid emitting the |
7932 | // whole loop. |
7933 | bool CondConstant; |
7934 | llvm::BasicBlock *ContBlock = nullptr; |
7935 | OMPLoopScope PreInitScope(CGF, S); |
7936 | if (CGF.ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) { |
7937 | if (!CondConstant) |
7938 | return; |
7939 | } else { |
7940 | llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "taskloop.if.then" ); |
7941 | ContBlock = CGF.createBasicBlock(name: "taskloop.if.end" ); |
7942 | emitPreCond(CGF, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock, |
7943 | TrueCount: CGF.getProfileCount(S: &S)); |
7944 | CGF.EmitBlock(BB: ThenBlock); |
7945 | CGF.incrementProfileCounter(S: &S); |
7946 | } |
7947 | |
7948 | (void)CGF.EmitOMPLinearClauseInit(D: S); |
7949 | |
7950 | OMPPrivateScope LoopScope(CGF); |
7951 | // Emit helper vars inits. |
7952 | enum { LowerBound = 5, UpperBound, Stride, LastIter }; |
7953 | auto *I = CS->getCapturedDecl()->param_begin(); |
7954 | auto *LBP = std::next(x: I, n: LowerBound); |
7955 | auto *UBP = std::next(x: I, n: UpperBound); |
7956 | auto *STP = std::next(x: I, n: Stride); |
7957 | auto *LIP = std::next(x: I, n: LastIter); |
7958 | mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getLowerBoundVariable()), PVD: *LBP, |
7959 | Privates&: LoopScope); |
7960 | mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getUpperBoundVariable()), PVD: *UBP, |
7961 | Privates&: LoopScope); |
7962 | mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable()), PVD: *STP, Privates&: LoopScope); |
7963 | mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable()), PVD: *LIP, |
7964 | Privates&: LoopScope); |
7965 | CGF.EmitOMPPrivateLoopCounters(S, LoopScope); |
7966 | CGF.EmitOMPLinearClause(D: S, PrivateScope&: LoopScope); |
7967 | bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope); |
7968 | (void)LoopScope.Privatize(); |
7969 | // Emit the loop iteration variable. |
7970 | const Expr *IVExpr = S.getIterationVariable(); |
7971 | const auto *IVDecl = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IVExpr)->getDecl()); |
7972 | CGF.EmitVarDecl(D: *IVDecl); |
7973 | CGF.EmitIgnoredExpr(E: S.getInit()); |
7974 | |
7975 | // Emit the iterations count variable. |
7976 | // If it is not a variable, Sema decided to calculate iterations count on |
7977 | // each iteration (e.g., it is foldable into a constant). |
7978 | if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) { |
7979 | CGF.EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl())); |
7980 | // Emit calculation of the iterations count. |
7981 | CGF.EmitIgnoredExpr(E: S.getCalcLastIteration()); |
7982 | } |
7983 | |
7984 | { |
7985 | OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); |
7986 | emitCommonSimdLoop( |
7987 | CGF, S, |
7988 | SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7989 | if (isOpenMPSimdDirective(DKind: S.getDirectiveKind())) |
7990 | CGF.EmitOMPSimdInit(D: S); |
7991 | }, |
7992 | BodyCodeGen: [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
7993 | CGF.EmitOMPInnerLoop( |
7994 | S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: S.getCond(), IncExpr: S.getInc(), |
7995 | BodyGen: [&S](CodeGenFunction &CGF) { |
7996 | emitOMPLoopBodyWithStopPoint(CGF, S, |
7997 | LoopExit: CodeGenFunction::JumpDest()); |
7998 | }, |
7999 | PostIncGen: [](CodeGenFunction &) {}); |
8000 | }); |
8001 | } |
8002 | // Emit: if (PreCond) - end. |
8003 | if (ContBlock) { |
8004 | CGF.EmitBranch(Block: ContBlock); |
8005 | CGF.EmitBlock(BB: ContBlock, IsFinished: true); |
8006 | } |
8007 | // Emit final copy of the lastprivate variables if IsLastIter != 0. |
8008 | if (HasLastprivateClause) { |
8009 | CGF.EmitOMPLastprivateClauseFinal( |
8010 | D: S, NoFinals: isOpenMPSimdDirective(DKind: S.getDirectiveKind()), |
8011 | IsLastIterCond: CGF.Builder.CreateIsNotNull(Arg: CGF.EmitLoadOfScalar( |
8012 | Addr: CGF.GetAddrOfLocalVar(VD: *LIP), /*Volatile=*/false, |
8013 | Ty: (*LIP)->getType(), Loc: S.getBeginLoc()))); |
8014 | } |
8015 | LoopScope.restoreMap(); |
8016 | CGF.EmitOMPLinearClauseFinal(D: S, CondGen: [LIP, &S](CodeGenFunction &CGF) { |
8017 | return CGF.Builder.CreateIsNotNull( |
8018 | Arg: CGF.EmitLoadOfScalar(Addr: CGF.GetAddrOfLocalVar(VD: *LIP), /*Volatile=*/false, |
8019 | Ty: (*LIP)->getType(), Loc: S.getBeginLoc())); |
8020 | }); |
8021 | }; |
8022 | auto &&TaskGen = [&S, SharedsTy, CapturedStruct, |
8023 | IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, |
8024 | const OMPTaskDataTy &Data) { |
8025 | auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, |
8026 | &Data](CodeGenFunction &CGF, PrePostActionTy &) { |
8027 | OMPLoopScope PreInitScope(CGF, S); |
8028 | CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, Loc: S.getBeginLoc(), D: S, |
8029 | TaskFunction: OutlinedFn, SharedsTy, |
8030 | Shareds: CapturedStruct, IfCond, Data); |
8031 | }; |
8032 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_taskloop, |
8033 | CodeGen); |
8034 | }; |
8035 | if (Data.Nogroup) { |
8036 | EmitOMPTaskBasedDirective(S, CapturedRegion: OMPD_taskloop, BodyGen, TaskGen, Data); |
8037 | } else { |
8038 | CGM.getOpenMPRuntime().emitTaskgroupRegion( |
8039 | CGF&: *this, |
8040 | TaskgroupOpGen: [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, |
8041 | PrePostActionTy &Action) { |
8042 | Action.Enter(CGF); |
8043 | CGF.EmitOMPTaskBasedDirective(S, CapturedRegion: OMPD_taskloop, BodyGen, TaskGen, |
8044 | Data); |
8045 | }, |
8046 | Loc: S.getBeginLoc()); |
8047 | } |
8048 | } |
8049 | |
8050 | void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { |
8051 | auto LPCRegion = |
8052 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
8053 | EmitOMPTaskLoopBasedDirective(S); |
8054 | } |
8055 | |
8056 | void CodeGenFunction::EmitOMPTaskLoopSimdDirective( |
8057 | const OMPTaskLoopSimdDirective &S) { |
8058 | auto LPCRegion = |
8059 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
8060 | OMPLexicalScope Scope(*this, S); |
8061 | EmitOMPTaskLoopBasedDirective(S); |
8062 | } |
8063 | |
8064 | void CodeGenFunction::EmitOMPMasterTaskLoopDirective( |
8065 | const OMPMasterTaskLoopDirective &S) { |
8066 | auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8067 | Action.Enter(CGF); |
8068 | EmitOMPTaskLoopBasedDirective(S); |
8069 | }; |
8070 | auto LPCRegion = |
8071 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
8072 | OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false); |
8073 | CGM.getOpenMPRuntime().emitMasterRegion(CGF&: *this, MasterOpGen: CodeGen, Loc: S.getBeginLoc()); |
8074 | } |
8075 | |
8076 | void CodeGenFunction::EmitOMPMaskedTaskLoopDirective( |
8077 | const OMPMaskedTaskLoopDirective &S) { |
8078 | auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8079 | Action.Enter(CGF); |
8080 | EmitOMPTaskLoopBasedDirective(S); |
8081 | }; |
8082 | auto LPCRegion = |
8083 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
8084 | OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false); |
8085 | CGM.getOpenMPRuntime().emitMaskedRegion(CGF&: *this, MaskedOpGen: CodeGen, Loc: S.getBeginLoc()); |
8086 | } |
8087 | |
8088 | void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective( |
8089 | const OMPMasterTaskLoopSimdDirective &S) { |
8090 | auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8091 | Action.Enter(CGF); |
8092 | EmitOMPTaskLoopBasedDirective(S); |
8093 | }; |
8094 | auto LPCRegion = |
8095 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
8096 | OMPLexicalScope Scope(*this, S); |
8097 | CGM.getOpenMPRuntime().emitMasterRegion(CGF&: *this, MasterOpGen: CodeGen, Loc: S.getBeginLoc()); |
8098 | } |
8099 | |
8100 | void CodeGenFunction::EmitOMPMaskedTaskLoopSimdDirective( |
8101 | const OMPMaskedTaskLoopSimdDirective &S) { |
8102 | auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8103 | Action.Enter(CGF); |
8104 | EmitOMPTaskLoopBasedDirective(S); |
8105 | }; |
8106 | auto LPCRegion = |
8107 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
8108 | OMPLexicalScope Scope(*this, S); |
8109 | CGM.getOpenMPRuntime().emitMaskedRegion(CGF&: *this, MaskedOpGen: CodeGen, Loc: S.getBeginLoc()); |
8110 | } |
8111 | |
8112 | void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective( |
8113 | const OMPParallelMasterTaskLoopDirective &S) { |
8114 | auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8115 | auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, |
8116 | PrePostActionTy &Action) { |
8117 | Action.Enter(CGF); |
8118 | CGF.EmitOMPTaskLoopBasedDirective(S); |
8119 | }; |
8120 | OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); |
8121 | CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: TaskLoopCodeGen, |
8122 | Loc: S.getBeginLoc()); |
8123 | }; |
8124 | auto LPCRegion = |
8125 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
8126 | emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_master_taskloop, CodeGen, |
8127 | CodeGenBoundParameters: emitEmptyBoundParameters); |
8128 | } |
8129 | |
8130 | void CodeGenFunction::EmitOMPParallelMaskedTaskLoopDirective( |
8131 | const OMPParallelMaskedTaskLoopDirective &S) { |
8132 | auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8133 | auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, |
8134 | PrePostActionTy &Action) { |
8135 | Action.Enter(CGF); |
8136 | CGF.EmitOMPTaskLoopBasedDirective(S); |
8137 | }; |
8138 | OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); |
8139 | CGM.getOpenMPRuntime().emitMaskedRegion(CGF, MaskedOpGen: TaskLoopCodeGen, |
8140 | Loc: S.getBeginLoc()); |
8141 | }; |
8142 | auto LPCRegion = |
8143 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
8144 | emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_masked_taskloop, CodeGen, |
8145 | CodeGenBoundParameters: emitEmptyBoundParameters); |
8146 | } |
8147 | |
8148 | void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective( |
8149 | const OMPParallelMasterTaskLoopSimdDirective &S) { |
8150 | auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8151 | auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, |
8152 | PrePostActionTy &Action) { |
8153 | Action.Enter(CGF); |
8154 | CGF.EmitOMPTaskLoopBasedDirective(S); |
8155 | }; |
8156 | OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); |
8157 | CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: TaskLoopCodeGen, |
8158 | Loc: S.getBeginLoc()); |
8159 | }; |
8160 | auto LPCRegion = |
8161 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
8162 | emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_master_taskloop_simd, CodeGen, |
8163 | CodeGenBoundParameters: emitEmptyBoundParameters); |
8164 | } |
8165 | |
8166 | void CodeGenFunction::EmitOMPParallelMaskedTaskLoopSimdDirective( |
8167 | const OMPParallelMaskedTaskLoopSimdDirective &S) { |
8168 | auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8169 | auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, |
8170 | PrePostActionTy &Action) { |
8171 | Action.Enter(CGF); |
8172 | CGF.EmitOMPTaskLoopBasedDirective(S); |
8173 | }; |
8174 | OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); |
8175 | CGM.getOpenMPRuntime().emitMaskedRegion(CGF, MaskedOpGen: TaskLoopCodeGen, |
8176 | Loc: S.getBeginLoc()); |
8177 | }; |
8178 | auto LPCRegion = |
8179 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
8180 | emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_masked_taskloop_simd, CodeGen, |
8181 | CodeGenBoundParameters: emitEmptyBoundParameters); |
8182 | } |
8183 | |
8184 | // Generate the instructions for '#pragma omp target update' directive. |
8185 | void CodeGenFunction::EmitOMPTargetUpdateDirective( |
8186 | const OMPTargetUpdateDirective &S) { |
8187 | // If we don't have target devices, don't bother emitting the data mapping |
8188 | // code. |
8189 | if (CGM.getLangOpts().OMPTargetTriples.empty()) |
8190 | return; |
8191 | |
8192 | // Check if we have any if clause associated with the directive. |
8193 | const Expr *IfCond = nullptr; |
8194 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
8195 | IfCond = C->getCondition(); |
8196 | |
8197 | // Check if we have any device clause associated with the directive. |
8198 | const Expr *Device = nullptr; |
8199 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
8200 | Device = C->getDevice(); |
8201 | |
8202 | OMPLexicalScope Scope(*this, S, OMPD_task); |
8203 | CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF&: *this, D: S, IfCond, Device); |
8204 | } |
8205 | |
8206 | void CodeGenFunction::EmitOMPGenericLoopDirective( |
8207 | const OMPGenericLoopDirective &S) { |
8208 | // Always expect a bind clause on the loop directive. It it wasn't |
8209 | // in the source, it should have been added in sema. |
8210 | |
8211 | OpenMPBindClauseKind BindKind = OMPC_BIND_unknown; |
8212 | if (const auto *C = S.getSingleClause<OMPBindClause>()) |
8213 | BindKind = C->getBindKind(); |
8214 | |
8215 | switch (BindKind) { |
8216 | case OMPC_BIND_parallel: // for |
8217 | return emitOMPForDirective(S, CGF&: *this, CGM, /*HasCancel=*/false); |
8218 | case OMPC_BIND_teams: // distribute |
8219 | return emitOMPDistributeDirective(S, CGF&: *this, CGM); |
8220 | case OMPC_BIND_thread: // simd |
8221 | return emitOMPSimdDirective(S, CGF&: *this, CGM); |
8222 | case OMPC_BIND_unknown: |
8223 | break; |
8224 | } |
8225 | |
8226 | // Unimplemented, just inline the underlying statement for now. |
8227 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8228 | // Emit the loop iteration variable. |
8229 | const Stmt *CS = |
8230 | cast<CapturedStmt>(Val: S.getAssociatedStmt())->getCapturedStmt(); |
8231 | const auto *ForS = dyn_cast<ForStmt>(Val: CS); |
8232 | if (ForS && !isa<DeclStmt>(Val: ForS->getInit())) { |
8233 | OMPPrivateScope LoopScope(CGF); |
8234 | CGF.EmitOMPPrivateLoopCounters(S, LoopScope); |
8235 | (void)LoopScope.Privatize(); |
8236 | CGF.EmitStmt(S: CS); |
8237 | LoopScope.restoreMap(); |
8238 | } else { |
8239 | CGF.EmitStmt(S: CS); |
8240 | } |
8241 | }; |
8242 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
8243 | CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_loop, CodeGen); |
8244 | } |
8245 | |
8246 | void CodeGenFunction::EmitOMPParallelGenericLoopDirective( |
8247 | const OMPLoopDirective &S) { |
8248 | // Emit combined directive as if its constituent constructs are 'parallel' |
8249 | // and 'for'. |
8250 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8251 | Action.Enter(CGF); |
8252 | emitOMPCopyinClause(CGF, S); |
8253 | (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); |
8254 | }; |
8255 | { |
8256 | auto LPCRegion = |
8257 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
8258 | emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_for, CodeGen, |
8259 | CodeGenBoundParameters: emitEmptyBoundParameters); |
8260 | } |
8261 | // Check for outer lastprivate conditional update. |
8262 | checkForLastprivateConditionalUpdate(CGF&: *this, S); |
8263 | } |
8264 | |
8265 | void CodeGenFunction::EmitOMPTeamsGenericLoopDirective( |
8266 | const OMPTeamsGenericLoopDirective &S) { |
8267 | // To be consistent with current behavior of 'target teams loop', emit |
8268 | // 'teams loop' as if its constituent constructs are 'teams' and 'distribute'. |
8269 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
8270 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc()); |
8271 | }; |
8272 | |
8273 | // Emit teams region as a standalone region. |
8274 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
8275 | PrePostActionTy &Action) { |
8276 | Action.Enter(CGF); |
8277 | OMPPrivateScope PrivateScope(CGF); |
8278 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
8279 | (void)PrivateScope.Privatize(); |
8280 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute, |
8281 | CodeGen: CodeGenDistribute); |
8282 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
8283 | }; |
8284 | emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute, CodeGen); |
8285 | emitPostUpdateForReductionClause(CGF&: *this, D: S, |
8286 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
8287 | } |
8288 | |
8289 | #ifndef NDEBUG |
8290 | static void emitTargetTeamsLoopCodegenStatus(CodeGenFunction &CGF, |
8291 | std::string StatusMsg, |
8292 | const OMPExecutableDirective &D) { |
8293 | bool IsDevice = CGF.CGM.getLangOpts().OpenMPIsTargetDevice; |
8294 | if (IsDevice) |
8295 | StatusMsg += ": DEVICE" ; |
8296 | else |
8297 | StatusMsg += ": HOST" ; |
8298 | SourceLocation L = D.getBeginLoc(); |
8299 | auto &SM = CGF.getContext().getSourceManager(); |
8300 | PresumedLoc PLoc = SM.getPresumedLoc(L); |
8301 | const char *FileName = PLoc.isValid() ? PLoc.getFilename() : nullptr; |
8302 | unsigned LineNo = |
8303 | PLoc.isValid() ? PLoc.getLine() : SM.getExpansionLineNumber(L); |
8304 | llvm::dbgs() << StatusMsg << ": " << FileName << ": " << LineNo << "\n" ; |
8305 | } |
8306 | #endif |
8307 | |
8308 | static void emitTargetTeamsGenericLoopRegionAsParallel( |
8309 | CodeGenFunction &CGF, PrePostActionTy &Action, |
8310 | const OMPTargetTeamsGenericLoopDirective &S) { |
8311 | Action.Enter(CGF); |
8312 | // Emit 'teams loop' as if its constituent constructs are 'distribute, |
8313 | // 'parallel, and 'for'. |
8314 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
8315 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined, |
8316 | IncExpr: S.getDistInc()); |
8317 | }; |
8318 | |
8319 | // Emit teams region as a standalone region. |
8320 | auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
8321 | PrePostActionTy &Action) { |
8322 | Action.Enter(CGF); |
8323 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
8324 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
8325 | (void)PrivateScope.Privatize(); |
8326 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective( |
8327 | CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false); |
8328 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
8329 | }; |
8330 | DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE, |
8331 | emitTargetTeamsLoopCodegenStatus( |
8332 | CGF, TTL_CODEGEN_TYPE " as parallel for" , S)); |
8333 | emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_parallel_for, |
8334 | CodeGen: CodeGenTeams); |
8335 | emitPostUpdateForReductionClause(CGF, D: S, |
8336 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
8337 | } |
8338 | |
8339 | static void emitTargetTeamsGenericLoopRegionAsDistribute( |
8340 | CodeGenFunction &CGF, PrePostActionTy &Action, |
8341 | const OMPTargetTeamsGenericLoopDirective &S) { |
8342 | Action.Enter(CGF); |
8343 | // Emit 'teams loop' as if its constituent construct is 'distribute'. |
8344 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
8345 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc()); |
8346 | }; |
8347 | |
8348 | // Emit teams region as a standalone region. |
8349 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
8350 | PrePostActionTy &Action) { |
8351 | Action.Enter(CGF); |
8352 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
8353 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
8354 | (void)PrivateScope.Privatize(); |
8355 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective( |
8356 | CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false); |
8357 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
8358 | }; |
8359 | DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE, |
8360 | emitTargetTeamsLoopCodegenStatus( |
8361 | CGF, TTL_CODEGEN_TYPE " as distribute" , S)); |
8362 | emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute, CodeGen); |
8363 | emitPostUpdateForReductionClause(CGF, D: S, |
8364 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
8365 | } |
8366 | |
8367 | void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective( |
8368 | const OMPTargetTeamsGenericLoopDirective &S) { |
8369 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8370 | if (S.canBeParallelFor()) |
8371 | emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S); |
8372 | else |
8373 | emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S); |
8374 | }; |
8375 | emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen); |
8376 | } |
8377 | |
8378 | void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( |
8379 | CodeGenModule &CGM, StringRef ParentName, |
8380 | const OMPTargetTeamsGenericLoopDirective &S) { |
8381 | // Emit SPMD target parallel loop region as a standalone region. |
8382 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8383 | if (S.canBeParallelFor()) |
8384 | emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S); |
8385 | else |
8386 | emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S); |
8387 | }; |
8388 | llvm::Function *Fn; |
8389 | llvm::Constant *Addr; |
8390 | // Emit target region as a standalone region. |
8391 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
8392 | D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen); |
8393 | assert(Fn && Addr && |
8394 | "Target device function emission failed for 'target teams loop'." ); |
8395 | } |
8396 | |
8397 | static void emitTargetParallelGenericLoopRegion( |
8398 | CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S, |
8399 | PrePostActionTy &Action) { |
8400 | Action.Enter(CGF); |
8401 | // Emit as 'parallel for'. |
8402 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8403 | Action.Enter(CGF); |
8404 | CodeGenFunction::OMPCancelStackRAII CancelRegion( |
8405 | CGF, OMPD_target_parallel_loop, /*hasCancel=*/false); |
8406 | CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds, |
8407 | CGDispatchBounds: emitDispatchForLoopBounds); |
8408 | }; |
8409 | emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_for, CodeGen, |
8410 | CodeGenBoundParameters: emitEmptyBoundParameters); |
8411 | } |
8412 | |
8413 | void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction( |
8414 | CodeGenModule &CGM, StringRef ParentName, |
8415 | const OMPTargetParallelGenericLoopDirective &S) { |
8416 | // Emit target parallel loop region as a standalone region. |
8417 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8418 | emitTargetParallelGenericLoopRegion(CGF, S, Action); |
8419 | }; |
8420 | llvm::Function *Fn; |
8421 | llvm::Constant *Addr; |
8422 | // Emit target region as a standalone region. |
8423 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
8424 | D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen); |
8425 | assert(Fn && Addr && "Target device function emission failed." ); |
8426 | } |
8427 | |
8428 | /// Emit combined directive 'target parallel loop' as if its constituent |
8429 | /// constructs are 'target', 'parallel', and 'for'. |
8430 | void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective( |
8431 | const OMPTargetParallelGenericLoopDirective &S) { |
8432 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8433 | emitTargetParallelGenericLoopRegion(CGF, S, Action); |
8434 | }; |
8435 | emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen); |
8436 | } |
8437 | |
8438 | void CodeGenFunction::EmitSimpleOMPExecutableDirective( |
8439 | const OMPExecutableDirective &D) { |
8440 | if (const auto *SD = dyn_cast<OMPScanDirective>(Val: &D)) { |
8441 | EmitOMPScanDirective(S: *SD); |
8442 | return; |
8443 | } |
8444 | if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) |
8445 | return; |
8446 | auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8447 | OMPPrivateScope GlobalsScope(CGF); |
8448 | if (isOpenMPTaskingDirective(Kind: D.getDirectiveKind())) { |
8449 | // Capture global firstprivates to avoid crash. |
8450 | for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { |
8451 | for (const Expr *Ref : C->varlist()) { |
8452 | const auto *DRE = cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts()); |
8453 | if (!DRE) |
8454 | continue; |
8455 | const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl()); |
8456 | if (!VD || VD->hasLocalStorage()) |
8457 | continue; |
8458 | if (!CGF.LocalDeclMap.count(Val: VD)) { |
8459 | LValue GlobLVal = CGF.EmitLValue(E: Ref); |
8460 | GlobalsScope.addPrivate(LocalVD: VD, Addr: GlobLVal.getAddress()); |
8461 | } |
8462 | } |
8463 | } |
8464 | } |
8465 | if (isOpenMPSimdDirective(DKind: D.getDirectiveKind())) { |
8466 | (void)GlobalsScope.Privatize(); |
8467 | ParentLoopDirectiveForScanRegion ScanRegion(CGF, D); |
8468 | emitOMPSimdRegion(CGF, S: cast<OMPLoopDirective>(Val: D), Action); |
8469 | } else { |
8470 | if (const auto *LD = dyn_cast<OMPLoopDirective>(Val: &D)) { |
8471 | for (const Expr *E : LD->counters()) { |
8472 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
8473 | if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(Val: VD)) { |
8474 | LValue GlobLVal = CGF.EmitLValue(E); |
8475 | GlobalsScope.addPrivate(LocalVD: VD, Addr: GlobLVal.getAddress()); |
8476 | } |
8477 | if (isa<OMPCapturedExprDecl>(Val: VD)) { |
8478 | // Emit only those that were not explicitly referenced in clauses. |
8479 | if (!CGF.LocalDeclMap.count(Val: VD)) |
8480 | CGF.EmitVarDecl(D: *VD); |
8481 | } |
8482 | } |
8483 | for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) { |
8484 | if (!C->getNumForLoops()) |
8485 | continue; |
8486 | for (unsigned I = LD->getLoopsNumber(), |
8487 | E = C->getLoopNumIterations().size(); |
8488 | I < E; ++I) { |
8489 | if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( |
8490 | Val: cast<DeclRefExpr>(Val: C->getLoopCounter(NumLoop: I))->getDecl())) { |
8491 | // Emit only those that were not explicitly referenced in clauses. |
8492 | if (!CGF.LocalDeclMap.count(Val: VD)) |
8493 | CGF.EmitVarDecl(D: *VD); |
8494 | } |
8495 | } |
8496 | } |
8497 | } |
8498 | (void)GlobalsScope.Privatize(); |
8499 | CGF.EmitStmt(S: D.getInnermostCapturedStmt()->getCapturedStmt()); |
8500 | } |
8501 | }; |
8502 | if (D.getDirectiveKind() == OMPD_atomic || |
8503 | D.getDirectiveKind() == OMPD_critical || |
8504 | D.getDirectiveKind() == OMPD_section || |
8505 | D.getDirectiveKind() == OMPD_master || |
8506 | D.getDirectiveKind() == OMPD_masked || |
8507 | D.getDirectiveKind() == OMPD_unroll || |
8508 | D.getDirectiveKind() == OMPD_assume) { |
8509 | EmitStmt(S: D.getAssociatedStmt()); |
8510 | } else { |
8511 | auto LPCRegion = |
8512 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S: D); |
8513 | OMPSimdLexicalScope Scope(*this, D); |
8514 | CGM.getOpenMPRuntime().emitInlinedDirective( |
8515 | CGF&: *this, |
8516 | InnermostKind: isOpenMPSimdDirective(DKind: D.getDirectiveKind()) ? OMPD_simd |
8517 | : D.getDirectiveKind(), |
8518 | CodeGen); |
8519 | } |
8520 | // Check for outer lastprivate conditional update. |
8521 | checkForLastprivateConditionalUpdate(CGF&: *this, S: D); |
8522 | } |
8523 | |
8524 | void CodeGenFunction::EmitOMPAssumeDirective(const OMPAssumeDirective &S) { |
8525 | EmitStmt(S: S.getAssociatedStmt()); |
8526 | } |
8527 | |