1 | //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This contains code to emit OpenMP nodes as LLVM code. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "CGCleanup.h" |
14 | #include "CGOpenMPRuntime.h" |
15 | #include "CodeGenFunction.h" |
16 | #include "CodeGenModule.h" |
17 | #include "TargetInfo.h" |
18 | #include "clang/AST/ASTContext.h" |
19 | #include "clang/AST/Attr.h" |
20 | #include "clang/AST/DeclOpenMP.h" |
21 | #include "clang/AST/OpenMPClause.h" |
22 | #include "clang/AST/Stmt.h" |
23 | #include "clang/AST/StmtOpenMP.h" |
24 | #include "clang/AST/StmtVisitor.h" |
25 | #include "clang/Basic/OpenMPKinds.h" |
26 | #include "clang/Basic/PrettyStackTrace.h" |
27 | #include "clang/Basic/SourceManager.h" |
28 | #include "llvm/ADT/SmallSet.h" |
29 | #include "llvm/BinaryFormat/Dwarf.h" |
30 | #include "llvm/Frontend/OpenMP/OMPConstants.h" |
31 | #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" |
32 | #include "llvm/IR/Constants.h" |
33 | #include "llvm/IR/DebugInfoMetadata.h" |
34 | #include "llvm/IR/Instructions.h" |
35 | #include "llvm/IR/IntrinsicInst.h" |
36 | #include "llvm/IR/Metadata.h" |
37 | #include "llvm/Support/AtomicOrdering.h" |
38 | #include "llvm/Support/Debug.h" |
39 | #include <optional> |
40 | using namespace clang; |
41 | using namespace CodeGen; |
42 | using namespace llvm::omp; |
43 | |
44 | #define TTL_CODEGEN_TYPE "target-teams-loop-codegen" |
45 | |
46 | static const VarDecl *getBaseDecl(const Expr *Ref); |
47 | |
48 | namespace { |
49 | /// Lexical scope for OpenMP executable constructs, that handles correct codegen |
50 | /// for captured expressions. |
51 | class OMPLexicalScope : public CodeGenFunction::LexicalScope { |
52 | void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
53 | for (const auto *C : S.clauses()) { |
54 | if (const auto *CPI = OMPClauseWithPreInit::get(C)) { |
55 | if (const auto *PreInit = |
56 | cast_or_null<DeclStmt>(Val: CPI->getPreInitStmt())) { |
57 | for (const auto *I : PreInit->decls()) { |
58 | if (!I->hasAttr<OMPCaptureNoInitAttr>()) { |
59 | CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I)); |
60 | } else { |
61 | CodeGenFunction::AutoVarEmission Emission = |
62 | CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I)); |
63 | CGF.EmitAutoVarCleanups(emission: Emission); |
64 | } |
65 | } |
66 | } |
67 | } |
68 | } |
69 | } |
70 | CodeGenFunction::OMPPrivateScope InlinedShareds; |
71 | |
72 | static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { |
73 | return CGF.LambdaCaptureFields.lookup(Val: VD) || |
74 | (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || |
75 | (isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl) && |
76 | cast<BlockDecl>(Val: CGF.CurCodeDecl)->capturesVariable(var: VD)); |
77 | } |
78 | |
79 | public: |
80 | OMPLexicalScope( |
81 | CodeGenFunction &CGF, const OMPExecutableDirective &S, |
82 | const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt, |
83 | const bool EmitPreInitStmt = true) |
84 | : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), |
85 | InlinedShareds(CGF) { |
86 | if (EmitPreInitStmt) |
87 | emitPreInitStmt(CGF, S); |
88 | if (!CapturedRegion) |
89 | return; |
90 | assert(S.hasAssociatedStmt() && |
91 | "Expected associated statement for inlined directive." ); |
92 | const CapturedStmt *CS = S.getCapturedStmt(RegionKind: *CapturedRegion); |
93 | for (const auto &C : CS->captures()) { |
94 | if (C.capturesVariable() || C.capturesVariableByCopy()) { |
95 | auto *VD = C.getCapturedVar(); |
96 | assert(VD == VD->getCanonicalDecl() && |
97 | "Canonical decl must be captured." ); |
98 | DeclRefExpr DRE( |
99 | CGF.getContext(), const_cast<VarDecl *>(VD), |
100 | isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && |
101 | InlinedShareds.isGlobalVarCaptured(VD)), |
102 | VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); |
103 | InlinedShareds.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(E: &DRE).getAddress()); |
104 | } |
105 | } |
106 | (void)InlinedShareds.Privatize(); |
107 | } |
108 | }; |
109 | |
110 | /// Lexical scope for OpenMP parallel construct, that handles correct codegen |
111 | /// for captured expressions. |
112 | class OMPParallelScope final : public OMPLexicalScope { |
113 | bool EmitPreInitStmt(const OMPExecutableDirective &S) { |
114 | OpenMPDirectiveKind Kind = S.getDirectiveKind(); |
115 | return !(isOpenMPTargetExecutionDirective(DKind: Kind) || |
116 | isOpenMPLoopBoundSharingDirective(Kind)) && |
117 | isOpenMPParallelDirective(DKind: Kind); |
118 | } |
119 | |
120 | public: |
121 | OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) |
122 | : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt, |
123 | EmitPreInitStmt(S)) {} |
124 | }; |
125 | |
126 | /// Lexical scope for OpenMP teams construct, that handles correct codegen |
127 | /// for captured expressions. |
128 | class OMPTeamsScope final : public OMPLexicalScope { |
129 | bool EmitPreInitStmt(const OMPExecutableDirective &S) { |
130 | OpenMPDirectiveKind Kind = S.getDirectiveKind(); |
131 | return !isOpenMPTargetExecutionDirective(DKind: Kind) && |
132 | isOpenMPTeamsDirective(DKind: Kind); |
133 | } |
134 | |
135 | public: |
136 | OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) |
137 | : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt, |
138 | EmitPreInitStmt(S)) {} |
139 | }; |
140 | |
141 | /// Private scope for OpenMP loop-based directives, that supports capturing |
142 | /// of used expression from loop statement. |
143 | class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { |
144 | void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) { |
145 | const Stmt *PreInits; |
146 | CodeGenFunction::OMPMapVars PreCondVars; |
147 | if (auto *LD = dyn_cast<OMPLoopDirective>(Val: &S)) { |
148 | llvm::DenseSet<const VarDecl *> EmittedAsPrivate; |
149 | for (const auto *E : LD->counters()) { |
150 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
151 | EmittedAsPrivate.insert(V: VD->getCanonicalDecl()); |
152 | (void)PreCondVars.setVarAddr( |
153 | CGF, LocalVD: VD, TempAddr: CGF.CreateMemTemp(T: VD->getType().getNonReferenceType())); |
154 | } |
155 | // Mark private vars as undefs. |
156 | for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) { |
157 | for (const Expr *IRef : C->varlists()) { |
158 | const auto *OrigVD = |
159 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IRef)->getDecl()); |
160 | if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) { |
161 | QualType OrigVDTy = OrigVD->getType().getNonReferenceType(); |
162 | (void)PreCondVars.setVarAddr( |
163 | CGF, LocalVD: OrigVD, |
164 | TempAddr: Address(llvm::UndefValue::get(T: CGF.ConvertTypeForMem( |
165 | T: CGF.getContext().getPointerType(T: OrigVDTy))), |
166 | CGF.ConvertTypeForMem(T: OrigVDTy), |
167 | CGF.getContext().getDeclAlign(D: OrigVD))); |
168 | } |
169 | } |
170 | } |
171 | (void)PreCondVars.apply(CGF); |
172 | // Emit init, __range and __end variables for C++ range loops. |
173 | (void)OMPLoopBasedDirective::doForAllLoops( |
174 | CurStmt: LD->getInnermostCapturedStmt()->getCapturedStmt(), |
175 | /*TryImperfectlyNestedLoops=*/true, NumLoops: LD->getLoopsNumber(), |
176 | Callback: [&CGF](unsigned Cnt, const Stmt *CurStmt) { |
177 | if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(Val: CurStmt)) { |
178 | if (const Stmt *Init = CXXFor->getInit()) |
179 | CGF.EmitStmt(S: Init); |
180 | CGF.EmitStmt(S: CXXFor->getRangeStmt()); |
181 | CGF.EmitStmt(S: CXXFor->getEndStmt()); |
182 | } |
183 | return false; |
184 | }); |
185 | PreInits = LD->getPreInits(); |
186 | } else if (const auto *Tile = dyn_cast<OMPTileDirective>(Val: &S)) { |
187 | PreInits = Tile->getPreInits(); |
188 | } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(Val: &S)) { |
189 | PreInits = Unroll->getPreInits(); |
190 | } else if (const auto *Reverse = dyn_cast<OMPReverseDirective>(Val: &S)) { |
191 | PreInits = Reverse->getPreInits(); |
192 | } else if (const auto *Interchange = |
193 | dyn_cast<OMPInterchangeDirective>(Val: &S)) { |
194 | PreInits = Interchange->getPreInits(); |
195 | } else { |
196 | llvm_unreachable("Unknown loop-based directive kind." ); |
197 | } |
198 | if (PreInits) { |
199 | // CompoundStmts and DeclStmts are used as lists of PreInit statements and |
200 | // declarations. Since declarations must be visible in the the following |
201 | // that they initialize, unpack the CompoundStmt they are nested in. |
202 | SmallVector<const Stmt *> PreInitStmts; |
203 | if (auto *PreInitCompound = dyn_cast<CompoundStmt>(Val: PreInits)) |
204 | llvm::append_range(C&: PreInitStmts, R: PreInitCompound->body()); |
205 | else |
206 | PreInitStmts.push_back(Elt: PreInits); |
207 | |
208 | for (const Stmt *S : PreInitStmts) { |
209 | // EmitStmt skips any OMPCapturedExprDecls, but needs to be emitted |
210 | // here. |
211 | if (auto *PreInitDecl = dyn_cast<DeclStmt>(Val: S)) { |
212 | for (Decl *I : PreInitDecl->decls()) |
213 | CGF.EmitVarDecl(D: cast<VarDecl>(Val&: *I)); |
214 | continue; |
215 | } |
216 | CGF.EmitStmt(S); |
217 | } |
218 | } |
219 | PreCondVars.restore(CGF); |
220 | } |
221 | |
222 | public: |
223 | OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) |
224 | : CodeGenFunction::RunCleanupsScope(CGF) { |
225 | emitPreInitStmt(CGF, S); |
226 | } |
227 | }; |
228 | |
229 | class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { |
230 | CodeGenFunction::OMPPrivateScope InlinedShareds; |
231 | |
232 | static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { |
233 | return CGF.LambdaCaptureFields.lookup(Val: VD) || |
234 | (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || |
235 | (isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl) && |
236 | cast<BlockDecl>(Val: CGF.CurCodeDecl)->capturesVariable(var: VD)); |
237 | } |
238 | |
239 | public: |
240 | OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) |
241 | : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), |
242 | InlinedShareds(CGF) { |
243 | for (const auto *C : S.clauses()) { |
244 | if (const auto *CPI = OMPClauseWithPreInit::get(C)) { |
245 | if (const auto *PreInit = |
246 | cast_or_null<DeclStmt>(Val: CPI->getPreInitStmt())) { |
247 | for (const auto *I : PreInit->decls()) { |
248 | if (!I->hasAttr<OMPCaptureNoInitAttr>()) { |
249 | CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I)); |
250 | } else { |
251 | CodeGenFunction::AutoVarEmission Emission = |
252 | CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I)); |
253 | CGF.EmitAutoVarCleanups(emission: Emission); |
254 | } |
255 | } |
256 | } |
257 | } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(Val: C)) { |
258 | for (const Expr *E : UDP->varlists()) { |
259 | const Decl *D = cast<DeclRefExpr>(Val: E)->getDecl(); |
260 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D)) |
261 | CGF.EmitVarDecl(D: *OED); |
262 | } |
263 | } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(Val: C)) { |
264 | for (const Expr *E : UDP->varlists()) { |
265 | const Decl *D = getBaseDecl(Ref: E); |
266 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D)) |
267 | CGF.EmitVarDecl(D: *OED); |
268 | } |
269 | } |
270 | } |
271 | if (!isOpenMPSimdDirective(DKind: S.getDirectiveKind())) |
272 | CGF.EmitOMPPrivateClause(D: S, PrivateScope&: InlinedShareds); |
273 | if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(Val: &S)) { |
274 | if (const Expr *E = TG->getReductionRef()) |
275 | CGF.EmitVarDecl(D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl())); |
276 | } |
277 | // Temp copy arrays for inscan reductions should not be emitted as they are |
278 | // not used in simd only mode. |
279 | llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps; |
280 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
281 | if (C->getModifier() != OMPC_REDUCTION_inscan) |
282 | continue; |
283 | for (const Expr *E : C->copy_array_temps()) |
284 | CopyArrayTemps.insert(V: cast<DeclRefExpr>(Val: E)->getDecl()); |
285 | } |
286 | const auto *CS = cast_or_null<CapturedStmt>(Val: S.getAssociatedStmt()); |
287 | while (CS) { |
288 | for (auto &C : CS->captures()) { |
289 | if (C.capturesVariable() || C.capturesVariableByCopy()) { |
290 | auto *VD = C.getCapturedVar(); |
291 | if (CopyArrayTemps.contains(V: VD)) |
292 | continue; |
293 | assert(VD == VD->getCanonicalDecl() && |
294 | "Canonical decl must be captured." ); |
295 | DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), |
296 | isCapturedVar(CGF, VD) || |
297 | (CGF.CapturedStmtInfo && |
298 | InlinedShareds.isGlobalVarCaptured(VD)), |
299 | VD->getType().getNonReferenceType(), VK_LValue, |
300 | C.getLocation()); |
301 | InlinedShareds.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(E: &DRE).getAddress()); |
302 | } |
303 | } |
304 | CS = dyn_cast<CapturedStmt>(Val: CS->getCapturedStmt()); |
305 | } |
306 | (void)InlinedShareds.Privatize(); |
307 | } |
308 | }; |
309 | |
310 | } // namespace |
311 | |
312 | static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, |
313 | const OMPExecutableDirective &S, |
314 | const RegionCodeGenTy &CodeGen); |
315 | |
316 | LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { |
317 | if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(Val: E)) { |
318 | if (const auto *OrigVD = dyn_cast<VarDecl>(Val: OrigDRE->getDecl())) { |
319 | OrigVD = OrigVD->getCanonicalDecl(); |
320 | bool IsCaptured = |
321 | LambdaCaptureFields.lookup(Val: OrigVD) || |
322 | (CapturedStmtInfo && CapturedStmtInfo->lookup(VD: OrigVD)) || |
323 | (isa_and_nonnull<BlockDecl>(Val: CurCodeDecl)); |
324 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured, |
325 | OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); |
326 | return EmitLValue(E: &DRE); |
327 | } |
328 | } |
329 | return EmitLValue(E); |
330 | } |
331 | |
332 | llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { |
333 | ASTContext &C = getContext(); |
334 | llvm::Value *Size = nullptr; |
335 | auto SizeInChars = C.getTypeSizeInChars(T: Ty); |
336 | if (SizeInChars.isZero()) { |
337 | // getTypeSizeInChars() returns 0 for a VLA. |
338 | while (const VariableArrayType *VAT = C.getAsVariableArrayType(T: Ty)) { |
339 | VlaSizePair VlaSize = getVLASize(vla: VAT); |
340 | Ty = VlaSize.Type; |
341 | Size = |
342 | Size ? Builder.CreateNUWMul(LHS: Size, RHS: VlaSize.NumElts) : VlaSize.NumElts; |
343 | } |
344 | SizeInChars = C.getTypeSizeInChars(T: Ty); |
345 | if (SizeInChars.isZero()) |
346 | return llvm::ConstantInt::get(Ty: SizeTy, /*V=*/0); |
347 | return Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: SizeInChars)); |
348 | } |
349 | return CGM.getSize(numChars: SizeInChars); |
350 | } |
351 | |
352 | void CodeGenFunction::GenerateOpenMPCapturedVars( |
353 | const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { |
354 | const RecordDecl *RD = S.getCapturedRecordDecl(); |
355 | auto CurField = RD->field_begin(); |
356 | auto CurCap = S.captures().begin(); |
357 | for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), |
358 | E = S.capture_init_end(); |
359 | I != E; ++I, ++CurField, ++CurCap) { |
360 | if (CurField->hasCapturedVLAType()) { |
361 | const VariableArrayType *VAT = CurField->getCapturedVLAType(); |
362 | llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()]; |
363 | CapturedVars.push_back(Elt: Val); |
364 | } else if (CurCap->capturesThis()) { |
365 | CapturedVars.push_back(Elt: CXXThisValue); |
366 | } else if (CurCap->capturesVariableByCopy()) { |
367 | llvm::Value *CV = EmitLoadOfScalar(lvalue: EmitLValue(E: *I), Loc: CurCap->getLocation()); |
368 | |
369 | // If the field is not a pointer, we need to save the actual value |
370 | // and load it as a void pointer. |
371 | if (!CurField->getType()->isAnyPointerType()) { |
372 | ASTContext &Ctx = getContext(); |
373 | Address DstAddr = CreateMemTemp( |
374 | T: Ctx.getUIntPtrType(), |
375 | Name: Twine(CurCap->getCapturedVar()->getName(), ".casted" )); |
376 | LValue DstLV = MakeAddrLValue(Addr: DstAddr, T: Ctx.getUIntPtrType()); |
377 | |
378 | llvm::Value *SrcAddrVal = EmitScalarConversion( |
379 | Src: DstAddr.emitRawPointer(CGF&: *this), |
380 | SrcTy: Ctx.getPointerType(T: Ctx.getUIntPtrType()), |
381 | DstTy: Ctx.getPointerType(T: CurField->getType()), Loc: CurCap->getLocation()); |
382 | LValue SrcLV = |
383 | MakeNaturalAlignAddrLValue(V: SrcAddrVal, T: CurField->getType()); |
384 | |
385 | // Store the value using the source type pointer. |
386 | EmitStoreThroughLValue(Src: RValue::get(V: CV), Dst: SrcLV); |
387 | |
388 | // Load the value using the destination type pointer. |
389 | CV = EmitLoadOfScalar(lvalue: DstLV, Loc: CurCap->getLocation()); |
390 | } |
391 | CapturedVars.push_back(Elt: CV); |
392 | } else { |
393 | assert(CurCap->capturesVariable() && "Expected capture by reference." ); |
394 | CapturedVars.push_back(Elt: EmitLValue(E: *I).getAddress().emitRawPointer(CGF&: *this)); |
395 | } |
396 | } |
397 | } |
398 | |
399 | static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, |
400 | QualType DstType, StringRef Name, |
401 | LValue AddrLV) { |
402 | ASTContext &Ctx = CGF.getContext(); |
403 | |
404 | llvm::Value *CastedPtr = CGF.EmitScalarConversion( |
405 | Src: AddrLV.getAddress().emitRawPointer(CGF), SrcTy: Ctx.getUIntPtrType(), |
406 | DstTy: Ctx.getPointerType(T: DstType), Loc); |
407 | // FIXME: should the pointee type (DstType) be passed? |
408 | Address TmpAddr = |
409 | CGF.MakeNaturalAlignAddrLValue(V: CastedPtr, T: DstType).getAddress(); |
410 | return TmpAddr; |
411 | } |
412 | |
413 | static QualType getCanonicalParamType(ASTContext &C, QualType T) { |
414 | if (T->isLValueReferenceType()) |
415 | return C.getLValueReferenceType( |
416 | T: getCanonicalParamType(C, T: T.getNonReferenceType()), |
417 | /*SpelledAsLValue=*/false); |
418 | if (T->isPointerType()) |
419 | return C.getPointerType(T: getCanonicalParamType(C, T: T->getPointeeType())); |
420 | if (const ArrayType *A = T->getAsArrayTypeUnsafe()) { |
421 | if (const auto *VLA = dyn_cast<VariableArrayType>(Val: A)) |
422 | return getCanonicalParamType(C, T: VLA->getElementType()); |
423 | if (!A->isVariablyModifiedType()) |
424 | return C.getCanonicalType(T); |
425 | } |
426 | return C.getCanonicalParamType(T); |
427 | } |
428 | |
429 | namespace { |
430 | /// Contains required data for proper outlined function codegen. |
431 | struct FunctionOptions { |
432 | /// Captured statement for which the function is generated. |
433 | const CapturedStmt *S = nullptr; |
434 | /// true if cast to/from UIntPtr is required for variables captured by |
435 | /// value. |
436 | const bool UIntPtrCastRequired = true; |
437 | /// true if only casted arguments must be registered as local args or VLA |
438 | /// sizes. |
439 | const bool RegisterCastedArgsOnly = false; |
440 | /// Name of the generated function. |
441 | const StringRef FunctionName; |
442 | /// Location of the non-debug version of the outlined function. |
443 | SourceLocation Loc; |
444 | explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, |
445 | bool RegisterCastedArgsOnly, StringRef FunctionName, |
446 | SourceLocation Loc) |
447 | : S(S), UIntPtrCastRequired(UIntPtrCastRequired), |
448 | RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), |
449 | FunctionName(FunctionName), Loc(Loc) {} |
450 | }; |
451 | } // namespace |
452 | |
453 | static llvm::Function *emitOutlinedFunctionPrologue( |
454 | CodeGenFunction &CGF, FunctionArgList &Args, |
455 | llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> |
456 | &LocalAddrs, |
457 | llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> |
458 | &VLASizes, |
459 | llvm::Value *&CXXThisValue, const FunctionOptions &FO) { |
460 | const CapturedDecl *CD = FO.S->getCapturedDecl(); |
461 | const RecordDecl *RD = FO.S->getCapturedRecordDecl(); |
462 | assert(CD->hasBody() && "missing CapturedDecl body" ); |
463 | |
464 | CXXThisValue = nullptr; |
465 | // Build the argument list. |
466 | CodeGenModule &CGM = CGF.CGM; |
467 | ASTContext &Ctx = CGM.getContext(); |
468 | FunctionArgList TargetArgs; |
469 | Args.append(in_start: CD->param_begin(), |
470 | in_end: std::next(x: CD->param_begin(), n: CD->getContextParamPosition())); |
471 | TargetArgs.append( |
472 | in_start: CD->param_begin(), |
473 | in_end: std::next(x: CD->param_begin(), n: CD->getContextParamPosition())); |
474 | auto I = FO.S->captures().begin(); |
475 | FunctionDecl *DebugFunctionDecl = nullptr; |
476 | if (!FO.UIntPtrCastRequired) { |
477 | FunctionProtoType::ExtProtoInfo EPI; |
478 | QualType FunctionTy = Ctx.getFunctionType(ResultTy: Ctx.VoidTy, Args: std::nullopt, EPI); |
479 | DebugFunctionDecl = FunctionDecl::Create( |
480 | C&: Ctx, DC: Ctx.getTranslationUnitDecl(), StartLoc: FO.S->getBeginLoc(), |
481 | NLoc: SourceLocation(), N: DeclarationName(), T: FunctionTy, |
482 | TInfo: Ctx.getTrivialTypeSourceInfo(T: FunctionTy), SC: SC_Static, |
483 | /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false, |
484 | /*hasWrittenPrototype=*/false); |
485 | } |
486 | for (const FieldDecl *FD : RD->fields()) { |
487 | QualType ArgType = FD->getType(); |
488 | IdentifierInfo *II = nullptr; |
489 | VarDecl *CapVar = nullptr; |
490 | |
491 | // If this is a capture by copy and the type is not a pointer, the outlined |
492 | // function argument type should be uintptr and the value properly casted to |
493 | // uintptr. This is necessary given that the runtime library is only able to |
494 | // deal with pointers. We can pass in the same way the VLA type sizes to the |
495 | // outlined function. |
496 | if (FO.UIntPtrCastRequired && |
497 | ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || |
498 | I->capturesVariableArrayType())) |
499 | ArgType = Ctx.getUIntPtrType(); |
500 | |
501 | if (I->capturesVariable() || I->capturesVariableByCopy()) { |
502 | CapVar = I->getCapturedVar(); |
503 | II = CapVar->getIdentifier(); |
504 | } else if (I->capturesThis()) { |
505 | II = &Ctx.Idents.get(Name: "this" ); |
506 | } else { |
507 | assert(I->capturesVariableArrayType()); |
508 | II = &Ctx.Idents.get(Name: "vla" ); |
509 | } |
510 | if (ArgType->isVariablyModifiedType()) |
511 | ArgType = getCanonicalParamType(C&: Ctx, T: ArgType); |
512 | VarDecl *Arg; |
513 | if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) { |
514 | Arg = ImplicitParamDecl::Create(C&: Ctx, /*DC=*/nullptr, IdLoc: FD->getLocation(), |
515 | Id: II, T: ArgType, |
516 | ParamKind: ImplicitParamKind::ThreadPrivateVar); |
517 | } else if (DebugFunctionDecl && (CapVar || I->capturesThis())) { |
518 | Arg = ParmVarDecl::Create( |
519 | C&: Ctx, DC: DebugFunctionDecl, |
520 | StartLoc: CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(), |
521 | IdLoc: CapVar ? CapVar->getLocation() : FD->getLocation(), Id: II, T: ArgType, |
522 | /*TInfo=*/nullptr, S: SC_None, /*DefArg=*/nullptr); |
523 | } else { |
524 | Arg = ImplicitParamDecl::Create(C&: Ctx, /*DC=*/nullptr, IdLoc: FD->getLocation(), |
525 | Id: II, T: ArgType, ParamKind: ImplicitParamKind::Other); |
526 | } |
527 | Args.emplace_back(Args&: Arg); |
528 | // Do not cast arguments if we emit function with non-original types. |
529 | TargetArgs.emplace_back( |
530 | Args: FO.UIntPtrCastRequired |
531 | ? Arg |
532 | : CGM.getOpenMPRuntime().translateParameter(FD, NativeParam: Arg)); |
533 | ++I; |
534 | } |
535 | Args.append(in_start: std::next(x: CD->param_begin(), n: CD->getContextParamPosition() + 1), |
536 | in_end: CD->param_end()); |
537 | TargetArgs.append( |
538 | in_start: std::next(x: CD->param_begin(), n: CD->getContextParamPosition() + 1), |
539 | in_end: CD->param_end()); |
540 | |
541 | // Create the function declaration. |
542 | const CGFunctionInfo &FuncInfo = |
543 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: Ctx.VoidTy, args: TargetArgs); |
544 | llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(Info: FuncInfo); |
545 | |
546 | auto *F = |
547 | llvm::Function::Create(Ty: FuncLLVMTy, Linkage: llvm::GlobalValue::InternalLinkage, |
548 | N: FO.FunctionName, M: &CGM.getModule()); |
549 | CGM.SetInternalFunctionAttributes(GD: CD, F, FI: FuncInfo); |
550 | if (CD->isNothrow()) |
551 | F->setDoesNotThrow(); |
552 | F->setDoesNotRecurse(); |
553 | |
554 | // Always inline the outlined function if optimizations are enabled. |
555 | if (CGM.getCodeGenOpts().OptimizationLevel != 0) { |
556 | F->removeFnAttr(Kind: llvm::Attribute::NoInline); |
557 | F->addFnAttr(Kind: llvm::Attribute::AlwaysInline); |
558 | } |
559 | |
560 | // Generate the function. |
561 | CGF.StartFunction(GD: CD, RetTy: Ctx.VoidTy, Fn: F, FnInfo: FuncInfo, Args: TargetArgs, |
562 | Loc: FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(), |
563 | StartLoc: FO.UIntPtrCastRequired ? FO.Loc |
564 | : CD->getBody()->getBeginLoc()); |
565 | unsigned Cnt = CD->getContextParamPosition(); |
566 | I = FO.S->captures().begin(); |
567 | for (const FieldDecl *FD : RD->fields()) { |
568 | // Do not map arguments if we emit function with non-original types. |
569 | Address LocalAddr(Address::invalid()); |
570 | if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { |
571 | LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, NativeParam: Args[Cnt], |
572 | TargetParam: TargetArgs[Cnt]); |
573 | } else { |
574 | LocalAddr = CGF.GetAddrOfLocalVar(VD: Args[Cnt]); |
575 | } |
576 | // If we are capturing a pointer by copy we don't need to do anything, just |
577 | // use the value that we get from the arguments. |
578 | if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { |
579 | const VarDecl *CurVD = I->getCapturedVar(); |
580 | if (!FO.RegisterCastedArgsOnly) |
581 | LocalAddrs.insert(KV: {Args[Cnt], {CurVD, LocalAddr}}); |
582 | ++Cnt; |
583 | ++I; |
584 | continue; |
585 | } |
586 | |
587 | LValue ArgLVal = CGF.MakeAddrLValue(Addr: LocalAddr, T: Args[Cnt]->getType(), |
588 | Source: AlignmentSource::Decl); |
589 | if (FD->hasCapturedVLAType()) { |
590 | if (FO.UIntPtrCastRequired) { |
591 | ArgLVal = CGF.MakeAddrLValue( |
592 | Addr: castValueFromUintptr(CGF, Loc: I->getLocation(), DstType: FD->getType(), |
593 | Name: Args[Cnt]->getName(), AddrLV: ArgLVal), |
594 | T: FD->getType(), Source: AlignmentSource::Decl); |
595 | } |
596 | llvm::Value *ExprArg = CGF.EmitLoadOfScalar(lvalue: ArgLVal, Loc: I->getLocation()); |
597 | const VariableArrayType *VAT = FD->getCapturedVLAType(); |
598 | VLASizes.try_emplace(Key: Args[Cnt], Args: VAT->getSizeExpr(), Args&: ExprArg); |
599 | } else if (I->capturesVariable()) { |
600 | const VarDecl *Var = I->getCapturedVar(); |
601 | QualType VarTy = Var->getType(); |
602 | Address ArgAddr = ArgLVal.getAddress(); |
603 | if (ArgLVal.getType()->isLValueReferenceType()) { |
604 | ArgAddr = CGF.EmitLoadOfReference(RefLVal: ArgLVal); |
605 | } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { |
606 | assert(ArgLVal.getType()->isPointerType()); |
607 | ArgAddr = CGF.EmitLoadOfPointer( |
608 | Ptr: ArgAddr, PtrTy: ArgLVal.getType()->castAs<PointerType>()); |
609 | } |
610 | if (!FO.RegisterCastedArgsOnly) { |
611 | LocalAddrs.insert( |
612 | KV: {Args[Cnt], {Var, ArgAddr.withAlignment(NewAlignment: Ctx.getDeclAlign(D: Var))}}); |
613 | } |
614 | } else if (I->capturesVariableByCopy()) { |
615 | assert(!FD->getType()->isAnyPointerType() && |
616 | "Not expecting a captured pointer." ); |
617 | const VarDecl *Var = I->getCapturedVar(); |
618 | LocalAddrs.insert(KV: {Args[Cnt], |
619 | {Var, FO.UIntPtrCastRequired |
620 | ? castValueFromUintptr( |
621 | CGF, Loc: I->getLocation(), DstType: FD->getType(), |
622 | Name: Args[Cnt]->getName(), AddrLV: ArgLVal) |
623 | : ArgLVal.getAddress()}}); |
624 | } else { |
625 | // If 'this' is captured, load it into CXXThisValue. |
626 | assert(I->capturesThis()); |
627 | CXXThisValue = CGF.EmitLoadOfScalar(lvalue: ArgLVal, Loc: I->getLocation()); |
628 | LocalAddrs.insert(KV: {Args[Cnt], {nullptr, ArgLVal.getAddress()}}); |
629 | } |
630 | ++Cnt; |
631 | ++I; |
632 | } |
633 | |
634 | return F; |
635 | } |
636 | |
637 | llvm::Function * |
638 | CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, |
639 | SourceLocation Loc) { |
640 | assert( |
641 | CapturedStmtInfo && |
642 | "CapturedStmtInfo should be set when generating the captured function" ); |
643 | const CapturedDecl *CD = S.getCapturedDecl(); |
644 | // Build the argument list. |
645 | bool NeedWrapperFunction = |
646 | getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo(); |
647 | FunctionArgList Args; |
648 | llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; |
649 | llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; |
650 | SmallString<256> Buffer; |
651 | llvm::raw_svector_ostream Out(Buffer); |
652 | Out << CapturedStmtInfo->getHelperName(); |
653 | if (NeedWrapperFunction) |
654 | Out << "_debug__" ; |
655 | FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, |
656 | Out.str(), Loc); |
657 | llvm::Function *F = emitOutlinedFunctionPrologue(CGF&: *this, Args, LocalAddrs, |
658 | VLASizes, CXXThisValue, FO); |
659 | CodeGenFunction::OMPPrivateScope LocalScope(*this); |
660 | for (const auto &LocalAddrPair : LocalAddrs) { |
661 | if (LocalAddrPair.second.first) { |
662 | LocalScope.addPrivate(LocalVD: LocalAddrPair.second.first, |
663 | Addr: LocalAddrPair.second.second); |
664 | } |
665 | } |
666 | (void)LocalScope.Privatize(); |
667 | for (const auto &VLASizePair : VLASizes) |
668 | VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; |
669 | PGO.assignRegionCounters(GD: GlobalDecl(CD), Fn: F); |
670 | CapturedStmtInfo->EmitBody(CGF&: *this, S: CD->getBody()); |
671 | (void)LocalScope.ForceCleanup(); |
672 | FinishFunction(EndLoc: CD->getBodyRBrace()); |
673 | if (!NeedWrapperFunction) |
674 | return F; |
675 | |
676 | FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, |
677 | /*RegisterCastedArgsOnly=*/true, |
678 | CapturedStmtInfo->getHelperName(), Loc); |
679 | CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); |
680 | WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; |
681 | Args.clear(); |
682 | LocalAddrs.clear(); |
683 | VLASizes.clear(); |
684 | llvm::Function *WrapperF = |
685 | emitOutlinedFunctionPrologue(CGF&: WrapperCGF, Args, LocalAddrs, VLASizes, |
686 | CXXThisValue&: WrapperCGF.CXXThisValue, FO: WrapperFO); |
687 | llvm::SmallVector<llvm::Value *, 4> CallArgs; |
688 | auto *PI = F->arg_begin(); |
689 | for (const auto *Arg : Args) { |
690 | llvm::Value *CallArg; |
691 | auto I = LocalAddrs.find(Key: Arg); |
692 | if (I != LocalAddrs.end()) { |
693 | LValue LV = WrapperCGF.MakeAddrLValue( |
694 | Addr: I->second.second, |
695 | T: I->second.first ? I->second.first->getType() : Arg->getType(), |
696 | Source: AlignmentSource::Decl); |
697 | if (LV.getType()->isAnyComplexType()) |
698 | LV.setAddress(LV.getAddress().withElementType(ElemTy: PI->getType())); |
699 | CallArg = WrapperCGF.EmitLoadOfScalar(lvalue: LV, Loc: S.getBeginLoc()); |
700 | } else { |
701 | auto EI = VLASizes.find(Val: Arg); |
702 | if (EI != VLASizes.end()) { |
703 | CallArg = EI->second.second; |
704 | } else { |
705 | LValue LV = |
706 | WrapperCGF.MakeAddrLValue(Addr: WrapperCGF.GetAddrOfLocalVar(VD: Arg), |
707 | T: Arg->getType(), Source: AlignmentSource::Decl); |
708 | CallArg = WrapperCGF.EmitLoadOfScalar(lvalue: LV, Loc: S.getBeginLoc()); |
709 | } |
710 | } |
711 | CallArgs.emplace_back(Args: WrapperCGF.EmitFromMemory(Value: CallArg, Ty: Arg->getType())); |
712 | ++PI; |
713 | } |
714 | CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF&: WrapperCGF, Loc, OutlinedFn: F, Args: CallArgs); |
715 | WrapperCGF.FinishFunction(); |
716 | return WrapperF; |
717 | } |
718 | |
719 | //===----------------------------------------------------------------------===// |
720 | // OpenMP Directive Emission |
721 | //===----------------------------------------------------------------------===// |
722 | void CodeGenFunction::EmitOMPAggregateAssign( |
723 | Address DestAddr, Address SrcAddr, QualType OriginalType, |
724 | const llvm::function_ref<void(Address, Address)> CopyGen) { |
725 | // Perform element-by-element initialization. |
726 | QualType ElementTy; |
727 | |
728 | // Drill down to the base element type on both arrays. |
729 | const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe(); |
730 | llvm::Value *NumElements = emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: DestAddr); |
731 | SrcAddr = SrcAddr.withElementType(ElemTy: DestAddr.getElementType()); |
732 | |
733 | llvm::Value *SrcBegin = SrcAddr.emitRawPointer(CGF&: *this); |
734 | llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF&: *this); |
735 | // Cast from pointer to array type to pointer to single element. |
736 | llvm::Value *DestEnd = Builder.CreateInBoundsGEP(Ty: DestAddr.getElementType(), |
737 | Ptr: DestBegin, IdxList: NumElements); |
738 | |
739 | // The basic structure here is a while-do loop. |
740 | llvm::BasicBlock *BodyBB = createBasicBlock(name: "omp.arraycpy.body" ); |
741 | llvm::BasicBlock *DoneBB = createBasicBlock(name: "omp.arraycpy.done" ); |
742 | llvm::Value *IsEmpty = |
743 | Builder.CreateICmpEQ(LHS: DestBegin, RHS: DestEnd, Name: "omp.arraycpy.isempty" ); |
744 | Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB); |
745 | |
746 | // Enter the loop body, making that address the current address. |
747 | llvm::BasicBlock *EntryBB = Builder.GetInsertBlock(); |
748 | EmitBlock(BB: BodyBB); |
749 | |
750 | CharUnits ElementSize = getContext().getTypeSizeInChars(T: ElementTy); |
751 | |
752 | llvm::PHINode *SrcElementPHI = |
753 | Builder.CreatePHI(Ty: SrcBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.srcElementPast" ); |
754 | SrcElementPHI->addIncoming(V: SrcBegin, BB: EntryBB); |
755 | Address SrcElementCurrent = |
756 | Address(SrcElementPHI, SrcAddr.getElementType(), |
757 | SrcAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize)); |
758 | |
759 | llvm::PHINode *DestElementPHI = Builder.CreatePHI( |
760 | Ty: DestBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast" ); |
761 | DestElementPHI->addIncoming(V: DestBegin, BB: EntryBB); |
762 | Address DestElementCurrent = |
763 | Address(DestElementPHI, DestAddr.getElementType(), |
764 | DestAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize)); |
765 | |
766 | // Emit copy. |
767 | CopyGen(DestElementCurrent, SrcElementCurrent); |
768 | |
769 | // Shift the address forward by one element. |
770 | llvm::Value *DestElementNext = |
771 | Builder.CreateConstGEP1_32(Ty: DestAddr.getElementType(), Ptr: DestElementPHI, |
772 | /*Idx0=*/1, Name: "omp.arraycpy.dest.element" ); |
773 | llvm::Value *SrcElementNext = |
774 | Builder.CreateConstGEP1_32(Ty: SrcAddr.getElementType(), Ptr: SrcElementPHI, |
775 | /*Idx0=*/1, Name: "omp.arraycpy.src.element" ); |
776 | // Check whether we've reached the end. |
777 | llvm::Value *Done = |
778 | Builder.CreateICmpEQ(LHS: DestElementNext, RHS: DestEnd, Name: "omp.arraycpy.done" ); |
779 | Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB); |
780 | DestElementPHI->addIncoming(V: DestElementNext, BB: Builder.GetInsertBlock()); |
781 | SrcElementPHI->addIncoming(V: SrcElementNext, BB: Builder.GetInsertBlock()); |
782 | |
783 | // Done. |
784 | EmitBlock(BB: DoneBB, /*IsFinished=*/true); |
785 | } |
786 | |
787 | void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, |
788 | Address SrcAddr, const VarDecl *DestVD, |
789 | const VarDecl *SrcVD, const Expr *Copy) { |
790 | if (OriginalType->isArrayType()) { |
791 | const auto *BO = dyn_cast<BinaryOperator>(Val: Copy); |
792 | if (BO && BO->getOpcode() == BO_Assign) { |
793 | // Perform simple memcpy for simple copying. |
794 | LValue Dest = MakeAddrLValue(Addr: DestAddr, T: OriginalType); |
795 | LValue Src = MakeAddrLValue(Addr: SrcAddr, T: OriginalType); |
796 | EmitAggregateAssign(Dest, Src, EltTy: OriginalType); |
797 | } else { |
798 | // For arrays with complex element types perform element by element |
799 | // copying. |
800 | EmitOMPAggregateAssign( |
801 | DestAddr, SrcAddr, OriginalType, |
802 | CopyGen: [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { |
803 | // Working with the single array element, so have to remap |
804 | // destination and source variables to corresponding array |
805 | // elements. |
806 | CodeGenFunction::OMPPrivateScope Remap(*this); |
807 | Remap.addPrivate(LocalVD: DestVD, Addr: DestElement); |
808 | Remap.addPrivate(LocalVD: SrcVD, Addr: SrcElement); |
809 | (void)Remap.Privatize(); |
810 | EmitIgnoredExpr(E: Copy); |
811 | }); |
812 | } |
813 | } else { |
814 | // Remap pseudo source variable to private copy. |
815 | CodeGenFunction::OMPPrivateScope Remap(*this); |
816 | Remap.addPrivate(LocalVD: SrcVD, Addr: SrcAddr); |
817 | Remap.addPrivate(LocalVD: DestVD, Addr: DestAddr); |
818 | (void)Remap.Privatize(); |
819 | // Emit copying of the whole variable. |
820 | EmitIgnoredExpr(E: Copy); |
821 | } |
822 | } |
823 | |
824 | bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, |
825 | OMPPrivateScope &PrivateScope) { |
826 | if (!HaveInsertPoint()) |
827 | return false; |
828 | bool DeviceConstTarget = |
829 | getLangOpts().OpenMPIsTargetDevice && |
830 | isOpenMPTargetExecutionDirective(DKind: D.getDirectiveKind()); |
831 | bool FirstprivateIsLastprivate = false; |
832 | llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates; |
833 | for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { |
834 | for (const auto *D : C->varlists()) |
835 | Lastprivates.try_emplace( |
836 | Key: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D)->getDecl())->getCanonicalDecl(), |
837 | Args: C->getKind()); |
838 | } |
839 | llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; |
840 | llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; |
841 | getOpenMPCaptureRegions(CaptureRegions, DKind: D.getDirectiveKind()); |
842 | // Force emission of the firstprivate copy if the directive does not emit |
843 | // outlined function, like omp for, omp simd, omp distribute etc. |
844 | bool MustEmitFirstprivateCopy = |
845 | CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; |
846 | for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { |
847 | const auto *IRef = C->varlist_begin(); |
848 | const auto *InitsRef = C->inits().begin(); |
849 | for (const Expr *IInit : C->private_copies()) { |
850 | const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl()); |
851 | bool ThisFirstprivateIsLastprivate = |
852 | Lastprivates.count(Val: OrigVD->getCanonicalDecl()) > 0; |
853 | const FieldDecl *FD = CapturedStmtInfo->lookup(VD: OrigVD); |
854 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl()); |
855 | if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && |
856 | !FD->getType()->isReferenceType() && |
857 | (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { |
858 | EmittedAsFirstprivate.insert(V: OrigVD->getCanonicalDecl()); |
859 | ++IRef; |
860 | ++InitsRef; |
861 | continue; |
862 | } |
863 | // Do not emit copy for firstprivate constant variables in target regions, |
864 | // captured by reference. |
865 | if (DeviceConstTarget && OrigVD->getType().isConstant(Ctx: getContext()) && |
866 | FD && FD->getType()->isReferenceType() && |
867 | (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { |
868 | EmittedAsFirstprivate.insert(V: OrigVD->getCanonicalDecl()); |
869 | ++IRef; |
870 | ++InitsRef; |
871 | continue; |
872 | } |
873 | FirstprivateIsLastprivate = |
874 | FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; |
875 | if (EmittedAsFirstprivate.insert(V: OrigVD->getCanonicalDecl()).second) { |
876 | const auto *VDInit = |
877 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *InitsRef)->getDecl()); |
878 | bool IsRegistered; |
879 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
880 | /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, |
881 | (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); |
882 | LValue OriginalLVal; |
883 | if (!FD) { |
884 | // Check if the firstprivate variable is just a constant value. |
885 | ConstantEmission CE = tryEmitAsConstant(refExpr: &DRE); |
886 | if (CE && !CE.isReference()) { |
887 | // Constant value, no need to create a copy. |
888 | ++IRef; |
889 | ++InitsRef; |
890 | continue; |
891 | } |
892 | if (CE && CE.isReference()) { |
893 | OriginalLVal = CE.getReferenceLValue(CGF&: *this, refExpr: &DRE); |
894 | } else { |
895 | assert(!CE && "Expected non-constant firstprivate." ); |
896 | OriginalLVal = EmitLValue(E: &DRE); |
897 | } |
898 | } else { |
899 | OriginalLVal = EmitLValue(E: &DRE); |
900 | } |
901 | QualType Type = VD->getType(); |
902 | if (Type->isArrayType()) { |
903 | // Emit VarDecl with copy init for arrays. |
904 | // Get the address of the original variable captured in current |
905 | // captured region. |
906 | AutoVarEmission Emission = EmitAutoVarAlloca(var: *VD); |
907 | const Expr *Init = VD->getInit(); |
908 | if (!isa<CXXConstructExpr>(Val: Init) || isTrivialInitializer(Init)) { |
909 | // Perform simple memcpy. |
910 | LValue Dest = MakeAddrLValue(Addr: Emission.getAllocatedAddress(), T: Type); |
911 | EmitAggregateAssign(Dest, Src: OriginalLVal, EltTy: Type); |
912 | } else { |
913 | EmitOMPAggregateAssign( |
914 | DestAddr: Emission.getAllocatedAddress(), SrcAddr: OriginalLVal.getAddress(), OriginalType: Type, |
915 | CopyGen: [this, VDInit, Init](Address DestElement, Address SrcElement) { |
916 | // Clean up any temporaries needed by the |
917 | // initialization. |
918 | RunCleanupsScope InitScope(*this); |
919 | // Emit initialization for single element. |
920 | setAddrOfLocalVar(VD: VDInit, Addr: SrcElement); |
921 | EmitAnyExprToMem(E: Init, Location: DestElement, |
922 | Quals: Init->getType().getQualifiers(), |
923 | /*IsInitializer*/ false); |
924 | LocalDeclMap.erase(Val: VDInit); |
925 | }); |
926 | } |
927 | EmitAutoVarCleanups(emission: Emission); |
928 | IsRegistered = |
929 | PrivateScope.addPrivate(LocalVD: OrigVD, Addr: Emission.getAllocatedAddress()); |
930 | } else { |
931 | Address OriginalAddr = OriginalLVal.getAddress(); |
932 | // Emit private VarDecl with copy init. |
933 | // Remap temp VDInit variable to the address of the original |
934 | // variable (for proper handling of captured global variables). |
935 | setAddrOfLocalVar(VD: VDInit, Addr: OriginalAddr); |
936 | EmitDecl(D: *VD); |
937 | LocalDeclMap.erase(Val: VDInit); |
938 | Address VDAddr = GetAddrOfLocalVar(VD); |
939 | if (ThisFirstprivateIsLastprivate && |
940 | Lastprivates[OrigVD->getCanonicalDecl()] == |
941 | OMPC_LASTPRIVATE_conditional) { |
942 | // Create/init special variable for lastprivate conditionals. |
943 | llvm::Value *V = |
944 | EmitLoadOfScalar(lvalue: MakeAddrLValue(Addr: VDAddr, T: (*IRef)->getType(), |
945 | Source: AlignmentSource::Decl), |
946 | Loc: (*IRef)->getExprLoc()); |
947 | VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit( |
948 | CGF&: *this, VD: OrigVD); |
949 | EmitStoreOfScalar(value: V, lvalue: MakeAddrLValue(Addr: VDAddr, T: (*IRef)->getType(), |
950 | Source: AlignmentSource::Decl)); |
951 | LocalDeclMap.erase(Val: VD); |
952 | setAddrOfLocalVar(VD, Addr: VDAddr); |
953 | } |
954 | IsRegistered = PrivateScope.addPrivate(LocalVD: OrigVD, Addr: VDAddr); |
955 | } |
956 | assert(IsRegistered && |
957 | "firstprivate var already registered as private" ); |
958 | // Silence the warning about unused variable. |
959 | (void)IsRegistered; |
960 | } |
961 | ++IRef; |
962 | ++InitsRef; |
963 | } |
964 | } |
965 | return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); |
966 | } |
967 | |
968 | void CodeGenFunction::EmitOMPPrivateClause( |
969 | const OMPExecutableDirective &D, |
970 | CodeGenFunction::OMPPrivateScope &PrivateScope) { |
971 | if (!HaveInsertPoint()) |
972 | return; |
973 | llvm::DenseSet<const VarDecl *> EmittedAsPrivate; |
974 | for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { |
975 | auto IRef = C->varlist_begin(); |
976 | for (const Expr *IInit : C->private_copies()) { |
977 | const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl()); |
978 | if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) { |
979 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl()); |
980 | EmitDecl(D: *VD); |
981 | // Emit private VarDecl with copy init. |
982 | bool IsRegistered = |
983 | PrivateScope.addPrivate(LocalVD: OrigVD, Addr: GetAddrOfLocalVar(VD)); |
984 | assert(IsRegistered && "private var already registered as private" ); |
985 | // Silence the warning about unused variable. |
986 | (void)IsRegistered; |
987 | } |
988 | ++IRef; |
989 | } |
990 | } |
991 | } |
992 | |
993 | bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { |
994 | if (!HaveInsertPoint()) |
995 | return false; |
996 | // threadprivate_var1 = master_threadprivate_var1; |
997 | // operator=(threadprivate_var2, master_threadprivate_var2); |
998 | // ... |
999 | // __kmpc_barrier(&loc, global_tid); |
1000 | llvm::DenseSet<const VarDecl *> CopiedVars; |
1001 | llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; |
1002 | for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { |
1003 | auto IRef = C->varlist_begin(); |
1004 | auto ISrcRef = C->source_exprs().begin(); |
1005 | auto IDestRef = C->destination_exprs().begin(); |
1006 | for (const Expr *AssignOp : C->assignment_ops()) { |
1007 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl()); |
1008 | QualType Type = VD->getType(); |
1009 | if (CopiedVars.insert(V: VD->getCanonicalDecl()).second) { |
1010 | // Get the address of the master variable. If we are emitting code with |
1011 | // TLS support, the address is passed from the master as field in the |
1012 | // captured declaration. |
1013 | Address MasterAddr = Address::invalid(); |
1014 | if (getLangOpts().OpenMPUseTLS && |
1015 | getContext().getTargetInfo().isTLSSupported()) { |
1016 | assert(CapturedStmtInfo->lookup(VD) && |
1017 | "Copyin threadprivates should have been captured!" ); |
1018 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true, |
1019 | (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); |
1020 | MasterAddr = EmitLValue(E: &DRE).getAddress(); |
1021 | LocalDeclMap.erase(Val: VD); |
1022 | } else { |
1023 | MasterAddr = |
1024 | Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(D: VD) |
1025 | : CGM.GetAddrOfGlobal(GD: VD), |
1026 | CGM.getTypes().ConvertTypeForMem(T: VD->getType()), |
1027 | getContext().getDeclAlign(D: VD)); |
1028 | } |
1029 | // Get the address of the threadprivate variable. |
1030 | Address PrivateAddr = EmitLValue(E: *IRef).getAddress(); |
1031 | if (CopiedVars.size() == 1) { |
1032 | // At first check if current thread is a master thread. If it is, no |
1033 | // need to copy data. |
1034 | CopyBegin = createBasicBlock(name: "copyin.not.master" ); |
1035 | CopyEnd = createBasicBlock(name: "copyin.not.master.end" ); |
1036 | // TODO: Avoid ptrtoint conversion. |
1037 | auto *MasterAddrInt = Builder.CreatePtrToInt( |
1038 | V: MasterAddr.emitRawPointer(CGF&: *this), DestTy: CGM.IntPtrTy); |
1039 | auto *PrivateAddrInt = Builder.CreatePtrToInt( |
1040 | V: PrivateAddr.emitRawPointer(CGF&: *this), DestTy: CGM.IntPtrTy); |
1041 | Builder.CreateCondBr( |
1042 | Cond: Builder.CreateICmpNE(LHS: MasterAddrInt, RHS: PrivateAddrInt), True: CopyBegin, |
1043 | False: CopyEnd); |
1044 | EmitBlock(BB: CopyBegin); |
1045 | } |
1046 | const auto *SrcVD = |
1047 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ISrcRef)->getDecl()); |
1048 | const auto *DestVD = |
1049 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl()); |
1050 | EmitOMPCopy(OriginalType: Type, DestAddr: PrivateAddr, SrcAddr: MasterAddr, DestVD, SrcVD, Copy: AssignOp); |
1051 | } |
1052 | ++IRef; |
1053 | ++ISrcRef; |
1054 | ++IDestRef; |
1055 | } |
1056 | } |
1057 | if (CopyEnd) { |
1058 | // Exit out of copying procedure for non-master thread. |
1059 | EmitBlock(BB: CopyEnd, /*IsFinished=*/true); |
1060 | return true; |
1061 | } |
1062 | return false; |
1063 | } |
1064 | |
1065 | bool CodeGenFunction::EmitOMPLastprivateClauseInit( |
1066 | const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { |
1067 | if (!HaveInsertPoint()) |
1068 | return false; |
1069 | bool HasAtLeastOneLastprivate = false; |
1070 | llvm::DenseSet<const VarDecl *> SIMDLCVs; |
1071 | if (isOpenMPSimdDirective(DKind: D.getDirectiveKind())) { |
1072 | const auto *LoopDirective = cast<OMPLoopDirective>(Val: &D); |
1073 | for (const Expr *C : LoopDirective->counters()) { |
1074 | SIMDLCVs.insert( |
1075 | V: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: C)->getDecl())->getCanonicalDecl()); |
1076 | } |
1077 | } |
1078 | llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; |
1079 | for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { |
1080 | HasAtLeastOneLastprivate = true; |
1081 | if (isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind()) && |
1082 | !getLangOpts().OpenMPSimd) |
1083 | break; |
1084 | const auto *IRef = C->varlist_begin(); |
1085 | const auto *IDestRef = C->destination_exprs().begin(); |
1086 | for (const Expr *IInit : C->private_copies()) { |
1087 | // Keep the address of the original variable for future update at the end |
1088 | // of the loop. |
1089 | const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl()); |
1090 | // Taskloops do not require additional initialization, it is done in |
1091 | // runtime support library. |
1092 | if (AlreadyEmittedVars.insert(V: OrigVD->getCanonicalDecl()).second) { |
1093 | const auto *DestVD = |
1094 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl()); |
1095 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
1096 | /*RefersToEnclosingVariableOrCapture=*/ |
1097 | CapturedStmtInfo->lookup(VD: OrigVD) != nullptr, |
1098 | (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); |
1099 | PrivateScope.addPrivate(LocalVD: DestVD, Addr: EmitLValue(E: &DRE).getAddress()); |
1100 | // Check if the variable is also a firstprivate: in this case IInit is |
1101 | // not generated. Initialization of this variable will happen in codegen |
1102 | // for 'firstprivate' clause. |
1103 | if (IInit && !SIMDLCVs.count(V: OrigVD->getCanonicalDecl())) { |
1104 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl()); |
1105 | Address VDAddr = Address::invalid(); |
1106 | if (C->getKind() == OMPC_LASTPRIVATE_conditional) { |
1107 | VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit( |
1108 | CGF&: *this, VD: OrigVD); |
1109 | setAddrOfLocalVar(VD, Addr: VDAddr); |
1110 | } else { |
1111 | // Emit private VarDecl with copy init. |
1112 | EmitDecl(D: *VD); |
1113 | VDAddr = GetAddrOfLocalVar(VD); |
1114 | } |
1115 | bool IsRegistered = PrivateScope.addPrivate(LocalVD: OrigVD, Addr: VDAddr); |
1116 | assert(IsRegistered && |
1117 | "lastprivate var already registered as private" ); |
1118 | (void)IsRegistered; |
1119 | } |
1120 | } |
1121 | ++IRef; |
1122 | ++IDestRef; |
1123 | } |
1124 | } |
1125 | return HasAtLeastOneLastprivate; |
1126 | } |
1127 | |
1128 | void CodeGenFunction::EmitOMPLastprivateClauseFinal( |
1129 | const OMPExecutableDirective &D, bool NoFinals, |
1130 | llvm::Value *IsLastIterCond) { |
1131 | if (!HaveInsertPoint()) |
1132 | return; |
1133 | // Emit following code: |
1134 | // if (<IsLastIterCond>) { |
1135 | // orig_var1 = private_orig_var1; |
1136 | // ... |
1137 | // orig_varn = private_orig_varn; |
1138 | // } |
1139 | llvm::BasicBlock *ThenBB = nullptr; |
1140 | llvm::BasicBlock *DoneBB = nullptr; |
1141 | if (IsLastIterCond) { |
1142 | // Emit implicit barrier if at least one lastprivate conditional is found |
1143 | // and this is not a simd mode. |
1144 | if (!getLangOpts().OpenMPSimd && |
1145 | llvm::any_of(Range: D.getClausesOfKind<OMPLastprivateClause>(), |
1146 | P: [](const OMPLastprivateClause *C) { |
1147 | return C->getKind() == OMPC_LASTPRIVATE_conditional; |
1148 | })) { |
1149 | CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: D.getBeginLoc(), |
1150 | Kind: OMPD_unknown, |
1151 | /*EmitChecks=*/false, |
1152 | /*ForceSimpleCall=*/true); |
1153 | } |
1154 | ThenBB = createBasicBlock(name: ".omp.lastprivate.then" ); |
1155 | DoneBB = createBasicBlock(name: ".omp.lastprivate.done" ); |
1156 | Builder.CreateCondBr(Cond: IsLastIterCond, True: ThenBB, False: DoneBB); |
1157 | EmitBlock(BB: ThenBB); |
1158 | } |
1159 | llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; |
1160 | llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; |
1161 | if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(Val: &D)) { |
1162 | auto IC = LoopDirective->counters().begin(); |
1163 | for (const Expr *F : LoopDirective->finals()) { |
1164 | const auto *D = |
1165 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IC)->getDecl())->getCanonicalDecl(); |
1166 | if (NoFinals) |
1167 | AlreadyEmittedVars.insert(V: D); |
1168 | else |
1169 | LoopCountersAndUpdates[D] = F; |
1170 | ++IC; |
1171 | } |
1172 | } |
1173 | for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { |
1174 | auto IRef = C->varlist_begin(); |
1175 | auto ISrcRef = C->source_exprs().begin(); |
1176 | auto IDestRef = C->destination_exprs().begin(); |
1177 | for (const Expr *AssignOp : C->assignment_ops()) { |
1178 | const auto *PrivateVD = |
1179 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl()); |
1180 | QualType Type = PrivateVD->getType(); |
1181 | const auto *CanonicalVD = PrivateVD->getCanonicalDecl(); |
1182 | if (AlreadyEmittedVars.insert(V: CanonicalVD).second) { |
1183 | // If lastprivate variable is a loop control variable for loop-based |
1184 | // directive, update its value before copyin back to original |
1185 | // variable. |
1186 | if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(Val: CanonicalVD)) |
1187 | EmitIgnoredExpr(E: FinalExpr); |
1188 | const auto *SrcVD = |
1189 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ISrcRef)->getDecl()); |
1190 | const auto *DestVD = |
1191 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl()); |
1192 | // Get the address of the private variable. |
1193 | Address PrivateAddr = GetAddrOfLocalVar(VD: PrivateVD); |
1194 | if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) |
1195 | PrivateAddr = Address( |
1196 | Builder.CreateLoad(Addr: PrivateAddr), |
1197 | CGM.getTypes().ConvertTypeForMem(T: RefTy->getPointeeType()), |
1198 | CGM.getNaturalTypeAlignment(T: RefTy->getPointeeType())); |
1199 | // Store the last value to the private copy in the last iteration. |
1200 | if (C->getKind() == OMPC_LASTPRIVATE_conditional) |
1201 | CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( |
1202 | CGF&: *this, PrivLVal: MakeAddrLValue(Addr: PrivateAddr, T: (*IRef)->getType()), VD: PrivateVD, |
1203 | Loc: (*IRef)->getExprLoc()); |
1204 | // Get the address of the original variable. |
1205 | Address OriginalAddr = GetAddrOfLocalVar(VD: DestVD); |
1206 | EmitOMPCopy(OriginalType: Type, DestAddr: OriginalAddr, SrcAddr: PrivateAddr, DestVD, SrcVD, Copy: AssignOp); |
1207 | } |
1208 | ++IRef; |
1209 | ++ISrcRef; |
1210 | ++IDestRef; |
1211 | } |
1212 | if (const Expr *PostUpdate = C->getPostUpdateExpr()) |
1213 | EmitIgnoredExpr(E: PostUpdate); |
1214 | } |
1215 | if (IsLastIterCond) |
1216 | EmitBlock(BB: DoneBB, /*IsFinished=*/true); |
1217 | } |
1218 | |
1219 | void CodeGenFunction::EmitOMPReductionClauseInit( |
1220 | const OMPExecutableDirective &D, |
1221 | CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) { |
1222 | if (!HaveInsertPoint()) |
1223 | return; |
1224 | SmallVector<const Expr *, 4> Shareds; |
1225 | SmallVector<const Expr *, 4> Privates; |
1226 | SmallVector<const Expr *, 4> ReductionOps; |
1227 | SmallVector<const Expr *, 4> LHSs; |
1228 | SmallVector<const Expr *, 4> RHSs; |
1229 | OMPTaskDataTy Data; |
1230 | SmallVector<const Expr *, 4> TaskLHSs; |
1231 | SmallVector<const Expr *, 4> TaskRHSs; |
1232 | for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { |
1233 | if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan)) |
1234 | continue; |
1235 | Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
1236 | Privates.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
1237 | ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end()); |
1238 | LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end()); |
1239 | RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end()); |
1240 | if (C->getModifier() == OMPC_REDUCTION_task) { |
1241 | Data.ReductionVars.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
1242 | Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
1243 | Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
1244 | Data.ReductionOps.append(in_start: C->reduction_ops().begin(), |
1245 | in_end: C->reduction_ops().end()); |
1246 | TaskLHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end()); |
1247 | TaskRHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end()); |
1248 | } |
1249 | } |
1250 | ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); |
1251 | unsigned Count = 0; |
1252 | auto *ILHS = LHSs.begin(); |
1253 | auto *IRHS = RHSs.begin(); |
1254 | auto *IPriv = Privates.begin(); |
1255 | for (const Expr *IRef : Shareds) { |
1256 | const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IPriv)->getDecl()); |
1257 | // Emit private VarDecl with reduction init. |
1258 | RedCG.emitSharedOrigLValue(CGF&: *this, N: Count); |
1259 | RedCG.emitAggregateType(CGF&: *this, N: Count); |
1260 | AutoVarEmission Emission = EmitAutoVarAlloca(var: *PrivateVD); |
1261 | RedCG.emitInitialization(CGF&: *this, N: Count, PrivateAddr: Emission.getAllocatedAddress(), |
1262 | SharedAddr: RedCG.getSharedLValue(N: Count).getAddress(), |
1263 | DefaultInit: [&Emission](CodeGenFunction &CGF) { |
1264 | CGF.EmitAutoVarInit(emission: Emission); |
1265 | return true; |
1266 | }); |
1267 | EmitAutoVarCleanups(emission: Emission); |
1268 | Address BaseAddr = RedCG.adjustPrivateAddress( |
1269 | CGF&: *this, N: Count, PrivateAddr: Emission.getAllocatedAddress()); |
1270 | bool IsRegistered = |
1271 | PrivateScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Count), Addr: BaseAddr); |
1272 | assert(IsRegistered && "private var already registered as private" ); |
1273 | // Silence the warning about unused variable. |
1274 | (void)IsRegistered; |
1275 | |
1276 | const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl()); |
1277 | const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl()); |
1278 | QualType Type = PrivateVD->getType(); |
1279 | bool isaOMPArraySectionExpr = isa<ArraySectionExpr>(Val: IRef); |
1280 | if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { |
1281 | // Store the address of the original variable associated with the LHS |
1282 | // implicit variable. |
1283 | PrivateScope.addPrivate(LocalVD: LHSVD, Addr: RedCG.getSharedLValue(N: Count).getAddress()); |
1284 | PrivateScope.addPrivate(LocalVD: RHSVD, Addr: GetAddrOfLocalVar(VD: PrivateVD)); |
1285 | } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || |
1286 | isa<ArraySubscriptExpr>(Val: IRef)) { |
1287 | // Store the address of the original variable associated with the LHS |
1288 | // implicit variable. |
1289 | PrivateScope.addPrivate(LocalVD: LHSVD, Addr: RedCG.getSharedLValue(N: Count).getAddress()); |
1290 | PrivateScope.addPrivate(LocalVD: RHSVD, |
1291 | Addr: GetAddrOfLocalVar(VD: PrivateVD).withElementType( |
1292 | ElemTy: ConvertTypeForMem(T: RHSVD->getType()))); |
1293 | } else { |
1294 | QualType Type = PrivateVD->getType(); |
1295 | bool IsArray = getContext().getAsArrayType(T: Type) != nullptr; |
1296 | Address OriginalAddr = RedCG.getSharedLValue(N: Count).getAddress(); |
1297 | // Store the address of the original variable associated with the LHS |
1298 | // implicit variable. |
1299 | if (IsArray) { |
1300 | OriginalAddr = |
1301 | OriginalAddr.withElementType(ElemTy: ConvertTypeForMem(T: LHSVD->getType())); |
1302 | } |
1303 | PrivateScope.addPrivate(LocalVD: LHSVD, Addr: OriginalAddr); |
1304 | PrivateScope.addPrivate( |
1305 | LocalVD: RHSVD, Addr: IsArray ? GetAddrOfLocalVar(VD: PrivateVD).withElementType( |
1306 | ElemTy: ConvertTypeForMem(T: RHSVD->getType())) |
1307 | : GetAddrOfLocalVar(VD: PrivateVD)); |
1308 | } |
1309 | ++ILHS; |
1310 | ++IRHS; |
1311 | ++IPriv; |
1312 | ++Count; |
1313 | } |
1314 | if (!Data.ReductionVars.empty()) { |
1315 | Data.IsReductionWithTaskMod = true; |
1316 | Data.IsWorksharingReduction = |
1317 | isOpenMPWorksharingDirective(DKind: D.getDirectiveKind()); |
1318 | llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit( |
1319 | CGF&: *this, Loc: D.getBeginLoc(), LHSExprs: TaskLHSs, RHSExprs: TaskRHSs, Data); |
1320 | const Expr *TaskRedRef = nullptr; |
1321 | switch (D.getDirectiveKind()) { |
1322 | case OMPD_parallel: |
1323 | TaskRedRef = cast<OMPParallelDirective>(Val: D).getTaskReductionRefExpr(); |
1324 | break; |
1325 | case OMPD_for: |
1326 | TaskRedRef = cast<OMPForDirective>(Val: D).getTaskReductionRefExpr(); |
1327 | break; |
1328 | case OMPD_sections: |
1329 | TaskRedRef = cast<OMPSectionsDirective>(Val: D).getTaskReductionRefExpr(); |
1330 | break; |
1331 | case OMPD_parallel_for: |
1332 | TaskRedRef = cast<OMPParallelForDirective>(Val: D).getTaskReductionRefExpr(); |
1333 | break; |
1334 | case OMPD_parallel_master: |
1335 | TaskRedRef = |
1336 | cast<OMPParallelMasterDirective>(Val: D).getTaskReductionRefExpr(); |
1337 | break; |
1338 | case OMPD_parallel_sections: |
1339 | TaskRedRef = |
1340 | cast<OMPParallelSectionsDirective>(Val: D).getTaskReductionRefExpr(); |
1341 | break; |
1342 | case OMPD_target_parallel: |
1343 | TaskRedRef = |
1344 | cast<OMPTargetParallelDirective>(Val: D).getTaskReductionRefExpr(); |
1345 | break; |
1346 | case OMPD_target_parallel_for: |
1347 | TaskRedRef = |
1348 | cast<OMPTargetParallelForDirective>(Val: D).getTaskReductionRefExpr(); |
1349 | break; |
1350 | case OMPD_distribute_parallel_for: |
1351 | TaskRedRef = |
1352 | cast<OMPDistributeParallelForDirective>(Val: D).getTaskReductionRefExpr(); |
1353 | break; |
1354 | case OMPD_teams_distribute_parallel_for: |
1355 | TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(Val: D) |
1356 | .getTaskReductionRefExpr(); |
1357 | break; |
1358 | case OMPD_target_teams_distribute_parallel_for: |
1359 | TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(Val: D) |
1360 | .getTaskReductionRefExpr(); |
1361 | break; |
1362 | case OMPD_simd: |
1363 | case OMPD_for_simd: |
1364 | case OMPD_section: |
1365 | case OMPD_single: |
1366 | case OMPD_master: |
1367 | case OMPD_critical: |
1368 | case OMPD_parallel_for_simd: |
1369 | case OMPD_task: |
1370 | case OMPD_taskyield: |
1371 | case OMPD_error: |
1372 | case OMPD_barrier: |
1373 | case OMPD_taskwait: |
1374 | case OMPD_taskgroup: |
1375 | case OMPD_flush: |
1376 | case OMPD_depobj: |
1377 | case OMPD_scan: |
1378 | case OMPD_ordered: |
1379 | case OMPD_atomic: |
1380 | case OMPD_teams: |
1381 | case OMPD_target: |
1382 | case OMPD_cancellation_point: |
1383 | case OMPD_cancel: |
1384 | case OMPD_target_data: |
1385 | case OMPD_target_enter_data: |
1386 | case OMPD_target_exit_data: |
1387 | case OMPD_taskloop: |
1388 | case OMPD_taskloop_simd: |
1389 | case OMPD_master_taskloop: |
1390 | case OMPD_master_taskloop_simd: |
1391 | case OMPD_parallel_master_taskloop: |
1392 | case OMPD_parallel_master_taskloop_simd: |
1393 | case OMPD_distribute: |
1394 | case OMPD_target_update: |
1395 | case OMPD_distribute_parallel_for_simd: |
1396 | case OMPD_distribute_simd: |
1397 | case OMPD_target_parallel_for_simd: |
1398 | case OMPD_target_simd: |
1399 | case OMPD_teams_distribute: |
1400 | case OMPD_teams_distribute_simd: |
1401 | case OMPD_teams_distribute_parallel_for_simd: |
1402 | case OMPD_target_teams: |
1403 | case OMPD_target_teams_distribute: |
1404 | case OMPD_target_teams_distribute_parallel_for_simd: |
1405 | case OMPD_target_teams_distribute_simd: |
1406 | case OMPD_declare_target: |
1407 | case OMPD_end_declare_target: |
1408 | case OMPD_threadprivate: |
1409 | case OMPD_allocate: |
1410 | case OMPD_declare_reduction: |
1411 | case OMPD_declare_mapper: |
1412 | case OMPD_declare_simd: |
1413 | case OMPD_requires: |
1414 | case OMPD_declare_variant: |
1415 | case OMPD_begin_declare_variant: |
1416 | case OMPD_end_declare_variant: |
1417 | case OMPD_unknown: |
1418 | default: |
1419 | llvm_unreachable("Unexpected directive with task reductions." ); |
1420 | } |
1421 | |
1422 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: TaskRedRef)->getDecl()); |
1423 | EmitVarDecl(D: *VD); |
1424 | EmitStoreOfScalar(Value: ReductionDesc, Addr: GetAddrOfLocalVar(VD), |
1425 | /*Volatile=*/false, Ty: TaskRedRef->getType()); |
1426 | } |
1427 | } |
1428 | |
1429 | void CodeGenFunction::EmitOMPReductionClauseFinal( |
1430 | const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { |
1431 | if (!HaveInsertPoint()) |
1432 | return; |
1433 | llvm::SmallVector<const Expr *, 8> Privates; |
1434 | llvm::SmallVector<const Expr *, 8> LHSExprs; |
1435 | llvm::SmallVector<const Expr *, 8> RHSExprs; |
1436 | llvm::SmallVector<const Expr *, 8> ReductionOps; |
1437 | bool HasAtLeastOneReduction = false; |
1438 | bool IsReductionWithTaskMod = false; |
1439 | for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { |
1440 | // Do not emit for inscan reductions. |
1441 | if (C->getModifier() == OMPC_REDUCTION_inscan) |
1442 | continue; |
1443 | HasAtLeastOneReduction = true; |
1444 | Privates.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
1445 | LHSExprs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end()); |
1446 | RHSExprs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end()); |
1447 | ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end()); |
1448 | IsReductionWithTaskMod = |
1449 | IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task; |
1450 | } |
1451 | if (HasAtLeastOneReduction) { |
1452 | if (IsReductionWithTaskMod) { |
1453 | CGM.getOpenMPRuntime().emitTaskReductionFini( |
1454 | CGF&: *this, Loc: D.getBeginLoc(), |
1455 | IsWorksharingReduction: isOpenMPWorksharingDirective(DKind: D.getDirectiveKind())); |
1456 | } |
1457 | bool TeamsLoopCanBeParallel = false; |
1458 | if (auto *TTLD = dyn_cast<OMPTargetTeamsGenericLoopDirective>(Val: &D)) |
1459 | TeamsLoopCanBeParallel = TTLD->canBeParallelFor(); |
1460 | bool WithNowait = D.getSingleClause<OMPNowaitClause>() || |
1461 | isOpenMPParallelDirective(DKind: D.getDirectiveKind()) || |
1462 | TeamsLoopCanBeParallel || ReductionKind == OMPD_simd; |
1463 | bool SimpleReduction = ReductionKind == OMPD_simd; |
1464 | // Emit nowait reduction if nowait clause is present or directive is a |
1465 | // parallel directive (it always has implicit barrier). |
1466 | CGM.getOpenMPRuntime().emitReduction( |
1467 | CGF&: *this, Loc: D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps, |
1468 | Options: {.WithNowait: WithNowait, .SimpleReduction: SimpleReduction, .ReductionKind: ReductionKind}); |
1469 | } |
1470 | } |
1471 | |
1472 | static void emitPostUpdateForReductionClause( |
1473 | CodeGenFunction &CGF, const OMPExecutableDirective &D, |
1474 | const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { |
1475 | if (!CGF.HaveInsertPoint()) |
1476 | return; |
1477 | llvm::BasicBlock *DoneBB = nullptr; |
1478 | for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { |
1479 | if (const Expr *PostUpdate = C->getPostUpdateExpr()) { |
1480 | if (!DoneBB) { |
1481 | if (llvm::Value *Cond = CondGen(CGF)) { |
1482 | // If the first post-update expression is found, emit conditional |
1483 | // block if it was requested. |
1484 | llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: ".omp.reduction.pu" ); |
1485 | DoneBB = CGF.createBasicBlock(name: ".omp.reduction.pu.done" ); |
1486 | CGF.Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB); |
1487 | CGF.EmitBlock(BB: ThenBB); |
1488 | } |
1489 | } |
1490 | CGF.EmitIgnoredExpr(E: PostUpdate); |
1491 | } |
1492 | } |
1493 | if (DoneBB) |
1494 | CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true); |
1495 | } |
1496 | |
1497 | namespace { |
1498 | /// Codegen lambda for appending distribute lower and upper bounds to outlined |
1499 | /// parallel function. This is necessary for combined constructs such as |
1500 | /// 'distribute parallel for' |
1501 | typedef llvm::function_ref<void(CodeGenFunction &, |
1502 | const OMPExecutableDirective &, |
1503 | llvm::SmallVectorImpl<llvm::Value *> &)> |
1504 | CodeGenBoundParametersTy; |
1505 | } // anonymous namespace |
1506 | |
1507 | static void |
1508 | checkForLastprivateConditionalUpdate(CodeGenFunction &CGF, |
1509 | const OMPExecutableDirective &S) { |
1510 | if (CGF.getLangOpts().OpenMP < 50) |
1511 | return; |
1512 | llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls; |
1513 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
1514 | for (const Expr *Ref : C->varlists()) { |
1515 | if (!Ref->getType()->isScalarType()) |
1516 | continue; |
1517 | const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts()); |
1518 | if (!DRE) |
1519 | continue; |
1520 | PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl())); |
1521 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: Ref); |
1522 | } |
1523 | } |
1524 | for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { |
1525 | for (const Expr *Ref : C->varlists()) { |
1526 | if (!Ref->getType()->isScalarType()) |
1527 | continue; |
1528 | const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts()); |
1529 | if (!DRE) |
1530 | continue; |
1531 | PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl())); |
1532 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: Ref); |
1533 | } |
1534 | } |
1535 | for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { |
1536 | for (const Expr *Ref : C->varlists()) { |
1537 | if (!Ref->getType()->isScalarType()) |
1538 | continue; |
1539 | const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts()); |
1540 | if (!DRE) |
1541 | continue; |
1542 | PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl())); |
1543 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: Ref); |
1544 | } |
1545 | } |
1546 | // Privates should ne analyzed since they are not captured at all. |
1547 | // Task reductions may be skipped - tasks are ignored. |
1548 | // Firstprivates do not return value but may be passed by reference - no need |
1549 | // to check for updated lastprivate conditional. |
1550 | for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { |
1551 | for (const Expr *Ref : C->varlists()) { |
1552 | if (!Ref->getType()->isScalarType()) |
1553 | continue; |
1554 | const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts()); |
1555 | if (!DRE) |
1556 | continue; |
1557 | PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl())); |
1558 | } |
1559 | } |
1560 | CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional( |
1561 | CGF, D: S, IgnoredDecls: PrivateDecls); |
1562 | } |
1563 | |
1564 | static void emitCommonOMPParallelDirective( |
1565 | CodeGenFunction &CGF, const OMPExecutableDirective &S, |
1566 | OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, |
1567 | const CodeGenBoundParametersTy &CodeGenBoundParameters) { |
1568 | const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_parallel); |
1569 | llvm::Value *NumThreads = nullptr; |
1570 | llvm::Function *OutlinedFn = |
1571 | CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( |
1572 | CGF, D: S, ThreadIDVar: *CS->getCapturedDecl()->param_begin(), InnermostKind, |
1573 | CodeGen); |
1574 | if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { |
1575 | CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); |
1576 | NumThreads = CGF.EmitScalarExpr(E: NumThreadsClause->getNumThreads(), |
1577 | /*IgnoreResultAssign=*/true); |
1578 | CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( |
1579 | CGF, NumThreads, Loc: NumThreadsClause->getBeginLoc()); |
1580 | } |
1581 | if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { |
1582 | CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); |
1583 | CGF.CGM.getOpenMPRuntime().emitProcBindClause( |
1584 | CGF, ProcBind: ProcBindClause->getProcBindKind(), Loc: ProcBindClause->getBeginLoc()); |
1585 | } |
1586 | const Expr *IfCond = nullptr; |
1587 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
1588 | if (C->getNameModifier() == OMPD_unknown || |
1589 | C->getNameModifier() == OMPD_parallel) { |
1590 | IfCond = C->getCondition(); |
1591 | break; |
1592 | } |
1593 | } |
1594 | |
1595 | OMPParallelScope Scope(CGF, S); |
1596 | llvm::SmallVector<llvm::Value *, 16> CapturedVars; |
1597 | // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk |
1598 | // lower and upper bounds with the pragma 'for' chunking mechanism. |
1599 | // The following lambda takes care of appending the lower and upper bound |
1600 | // parameters when necessary |
1601 | CodeGenBoundParameters(CGF, S, CapturedVars); |
1602 | CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars); |
1603 | CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, Loc: S.getBeginLoc(), OutlinedFn, |
1604 | CapturedVars, IfCond, NumThreads); |
1605 | } |
1606 | |
1607 | static bool isAllocatableDecl(const VarDecl *VD) { |
1608 | const VarDecl *CVD = VD->getCanonicalDecl(); |
1609 | if (!CVD->hasAttr<OMPAllocateDeclAttr>()) |
1610 | return false; |
1611 | const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); |
1612 | // Use the default allocation. |
1613 | return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || |
1614 | AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && |
1615 | !AA->getAllocator()); |
1616 | } |
1617 | |
1618 | static void emitEmptyBoundParameters(CodeGenFunction &, |
1619 | const OMPExecutableDirective &, |
1620 | llvm::SmallVectorImpl<llvm::Value *> &) {} |
1621 | |
1622 | static void emitOMPCopyinClause(CodeGenFunction &CGF, |
1623 | const OMPExecutableDirective &S) { |
1624 | bool Copyins = CGF.EmitOMPCopyinClause(D: S); |
1625 | if (Copyins) { |
1626 | // Emit implicit barrier to synchronize threads and avoid data races on |
1627 | // propagation master's thread values of threadprivate variables to local |
1628 | // instances of that variables of all other implicit threads. |
1629 | CGF.CGM.getOpenMPRuntime().emitBarrierCall( |
1630 | CGF, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false, |
1631 | /*ForceSimpleCall=*/true); |
1632 | } |
1633 | } |
1634 | |
1635 | Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable( |
1636 | CodeGenFunction &CGF, const VarDecl *VD) { |
1637 | CodeGenModule &CGM = CGF.CGM; |
1638 | auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1639 | |
1640 | if (!VD) |
1641 | return Address::invalid(); |
1642 | const VarDecl *CVD = VD->getCanonicalDecl(); |
1643 | if (!isAllocatableDecl(VD: CVD)) |
1644 | return Address::invalid(); |
1645 | llvm::Value *Size; |
1646 | CharUnits Align = CGM.getContext().getDeclAlign(D: CVD); |
1647 | if (CVD->getType()->isVariablyModifiedType()) { |
1648 | Size = CGF.getTypeSize(Ty: CVD->getType()); |
1649 | // Align the size: ((size + align - 1) / align) * align |
1650 | Size = CGF.Builder.CreateNUWAdd( |
1651 | LHS: Size, RHS: CGM.getSize(numChars: Align - CharUnits::fromQuantity(Quantity: 1))); |
1652 | Size = CGF.Builder.CreateUDiv(LHS: Size, RHS: CGM.getSize(numChars: Align)); |
1653 | Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: Align)); |
1654 | } else { |
1655 | CharUnits Sz = CGM.getContext().getTypeSizeInChars(T: CVD->getType()); |
1656 | Size = CGM.getSize(numChars: Sz.alignTo(Align)); |
1657 | } |
1658 | |
1659 | const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); |
1660 | assert(AA->getAllocator() && |
1661 | "Expected allocator expression for non-default allocator." ); |
1662 | llvm::Value *Allocator = CGF.EmitScalarExpr(E: AA->getAllocator()); |
1663 | // According to the standard, the original allocator type is a enum (integer). |
1664 | // Convert to pointer type, if required. |
1665 | if (Allocator->getType()->isIntegerTy()) |
1666 | Allocator = CGF.Builder.CreateIntToPtr(V: Allocator, DestTy: CGM.VoidPtrTy); |
1667 | else if (Allocator->getType()->isPointerTy()) |
1668 | Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: Allocator, |
1669 | DestTy: CGM.VoidPtrTy); |
1670 | |
1671 | llvm::Value *Addr = OMPBuilder.createOMPAlloc( |
1672 | Loc: CGF.Builder, Size, Allocator, |
1673 | Name: getNameWithSeparators(Parts: {CVD->getName(), ".void.addr" }, FirstSeparator: "." , Separator: "." )); |
1674 | llvm::CallInst *FreeCI = |
1675 | OMPBuilder.createOMPFree(Loc: CGF.Builder, Addr, Allocator); |
1676 | |
1677 | CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(Kind: NormalAndEHCleanup, A: FreeCI); |
1678 | Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
1679 | V: Addr, |
1680 | DestTy: CGF.ConvertTypeForMem(T: CGM.getContext().getPointerType(T: CVD->getType())), |
1681 | Name: getNameWithSeparators(Parts: {CVD->getName(), ".addr" }, FirstSeparator: "." , Separator: "." )); |
1682 | return Address(Addr, CGF.ConvertTypeForMem(T: CVD->getType()), Align); |
1683 | } |
1684 | |
1685 | Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate( |
1686 | CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, |
1687 | SourceLocation Loc) { |
1688 | CodeGenModule &CGM = CGF.CGM; |
1689 | if (CGM.getLangOpts().OpenMPUseTLS && |
1690 | CGM.getContext().getTargetInfo().isTLSSupported()) |
1691 | return VDAddr; |
1692 | |
1693 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1694 | |
1695 | llvm::Type *VarTy = VDAddr.getElementType(); |
1696 | llvm::Value *Data = |
1697 | CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.Int8PtrTy); |
1698 | llvm::ConstantInt *Size = CGM.getSize(numChars: CGM.GetTargetTypeStoreSize(Ty: VarTy)); |
1699 | std::string Suffix = getNameWithSeparators(Parts: {"cache" , "" }); |
1700 | llvm::Twine CacheName = Twine(CGM.getMangledName(GD: VD)).concat(Suffix); |
1701 | |
1702 | llvm::CallInst *ThreadPrivateCacheCall = |
1703 | OMPBuilder.createCachedThreadPrivate(Loc: CGF.Builder, Pointer: Data, Size, Name: CacheName); |
1704 | |
1705 | return Address(ThreadPrivateCacheCall, CGM.Int8Ty, VDAddr.getAlignment()); |
1706 | } |
1707 | |
1708 | std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators( |
1709 | ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) { |
1710 | SmallString<128> Buffer; |
1711 | llvm::raw_svector_ostream OS(Buffer); |
1712 | StringRef Sep = FirstSeparator; |
1713 | for (StringRef Part : Parts) { |
1714 | OS << Sep << Part; |
1715 | Sep = Separator; |
1716 | } |
1717 | return OS.str().str(); |
1718 | } |
1719 | |
1720 | void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
1721 | CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP, |
1722 | InsertPointTy CodeGenIP, Twine RegionName) { |
1723 | CGBuilderTy &Builder = CGF.Builder; |
1724 | Builder.restoreIP(IP: CodeGenIP); |
1725 | llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false, |
1726 | Suffix: "." + RegionName + ".after" ); |
1727 | |
1728 | { |
1729 | OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB); |
1730 | CGF.EmitStmt(S: RegionBodyStmt); |
1731 | } |
1732 | |
1733 | if (Builder.saveIP().isSet()) |
1734 | Builder.CreateBr(Dest: FiniBB); |
1735 | } |
1736 | |
1737 | void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( |
1738 | CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP, |
1739 | InsertPointTy CodeGenIP, Twine RegionName) { |
1740 | CGBuilderTy &Builder = CGF.Builder; |
1741 | Builder.restoreIP(IP: CodeGenIP); |
1742 | llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false, |
1743 | Suffix: "." + RegionName + ".after" ); |
1744 | |
1745 | { |
1746 | OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB); |
1747 | CGF.EmitStmt(S: RegionBodyStmt); |
1748 | } |
1749 | |
1750 | if (Builder.saveIP().isSet()) |
1751 | Builder.CreateBr(Dest: FiniBB); |
1752 | } |
1753 | |
1754 | void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { |
1755 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
1756 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1757 | // Check if we have any if clause associated with the directive. |
1758 | llvm::Value *IfCond = nullptr; |
1759 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
1760 | IfCond = EmitScalarExpr(E: C->getCondition(), |
1761 | /*IgnoreResultAssign=*/true); |
1762 | |
1763 | llvm::Value *NumThreads = nullptr; |
1764 | if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) |
1765 | NumThreads = EmitScalarExpr(E: NumThreadsClause->getNumThreads(), |
1766 | /*IgnoreResultAssign=*/true); |
1767 | |
1768 | ProcBindKind ProcBind = OMP_PROC_BIND_default; |
1769 | if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) |
1770 | ProcBind = ProcBindClause->getProcBindKind(); |
1771 | |
1772 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
1773 | |
1774 | // The cleanup callback that finalizes all variables at the given location, |
1775 | // thus calls destructors etc. |
1776 | auto FiniCB = [this](InsertPointTy IP) { |
1777 | OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP); |
1778 | }; |
1779 | |
1780 | // Privatization callback that performs appropriate action for |
1781 | // shared/private/firstprivate/lastprivate/copyin/... variables. |
1782 | // |
1783 | // TODO: This defaults to shared right now. |
1784 | auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, |
1785 | llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { |
1786 | // The next line is appropriate only for variables (Val) with the |
1787 | // data-sharing attribute "shared". |
1788 | ReplVal = &Val; |
1789 | |
1790 | return CodeGenIP; |
1791 | }; |
1792 | |
1793 | const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_parallel); |
1794 | const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); |
1795 | |
1796 | auto BodyGenCB = [&, this](InsertPointTy AllocaIP, |
1797 | InsertPointTy CodeGenIP) { |
1798 | OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( |
1799 | CGF&: *this, RegionBodyStmt: ParallelRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "parallel" ); |
1800 | }; |
1801 | |
1802 | CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); |
1803 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); |
1804 | llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( |
1805 | AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); |
1806 | Builder.restoreIP( |
1807 | IP: OMPBuilder.createParallel(Loc: Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB, |
1808 | IfCondition: IfCond, NumThreads, ProcBind, IsCancellable: S.hasCancel())); |
1809 | return; |
1810 | } |
1811 | |
1812 | // Emit parallel region as a standalone region. |
1813 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
1814 | Action.Enter(CGF); |
1815 | OMPPrivateScope PrivateScope(CGF); |
1816 | emitOMPCopyinClause(CGF, S); |
1817 | (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope); |
1818 | CGF.EmitOMPPrivateClause(D: S, PrivateScope); |
1819 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
1820 | (void)PrivateScope.Privatize(); |
1821 | CGF.EmitStmt(S: S.getCapturedStmt(RegionKind: OMPD_parallel)->getCapturedStmt()); |
1822 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel); |
1823 | }; |
1824 | { |
1825 | auto LPCRegion = |
1826 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
1827 | emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_parallel, CodeGen, |
1828 | CodeGenBoundParameters: emitEmptyBoundParameters); |
1829 | emitPostUpdateForReductionClause(CGF&: *this, D: S, |
1830 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
1831 | } |
1832 | // Check for outer lastprivate conditional update. |
1833 | checkForLastprivateConditionalUpdate(CGF&: *this, S); |
1834 | } |
1835 | |
1836 | void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) { |
1837 | EmitStmt(S: S.getIfStmt()); |
1838 | } |
1839 | |
1840 | namespace { |
1841 | /// RAII to handle scopes for loop transformation directives. |
1842 | class OMPTransformDirectiveScopeRAII { |
1843 | OMPLoopScope *Scope = nullptr; |
1844 | CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr; |
1845 | CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr; |
1846 | |
1847 | OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII &) = |
1848 | delete; |
1849 | OMPTransformDirectiveScopeRAII & |
1850 | operator=(const OMPTransformDirectiveScopeRAII &) = delete; |
1851 | |
1852 | public: |
1853 | OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) { |
1854 | if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(Val: S)) { |
1855 | Scope = new OMPLoopScope(CGF, *Dir); |
1856 | CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP); |
1857 | CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI); |
1858 | } |
1859 | } |
1860 | ~OMPTransformDirectiveScopeRAII() { |
1861 | if (!Scope) |
1862 | return; |
1863 | delete CapInfoRAII; |
1864 | delete CGSI; |
1865 | delete Scope; |
1866 | } |
1867 | }; |
1868 | } // namespace |
1869 | |
1870 | static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, |
1871 | int MaxLevel, int Level = 0) { |
1872 | assert(Level < MaxLevel && "Too deep lookup during loop body codegen." ); |
1873 | const Stmt *SimplifiedS = S->IgnoreContainers(); |
1874 | if (const auto *CS = dyn_cast<CompoundStmt>(Val: SimplifiedS)) { |
1875 | PrettyStackTraceLoc CrashInfo( |
1876 | CGF.getContext().getSourceManager(), CS->getLBracLoc(), |
1877 | "LLVM IR generation of compound statement ('{}')" ); |
1878 | |
1879 | // Keep track of the current cleanup stack depth, including debug scopes. |
1880 | CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange()); |
1881 | for (const Stmt *CurStmt : CS->body()) |
1882 | emitBody(CGF, S: CurStmt, NextLoop, MaxLevel, Level); |
1883 | return; |
1884 | } |
1885 | if (SimplifiedS == NextLoop) { |
1886 | if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(Val: SimplifiedS)) |
1887 | SimplifiedS = Dir->getTransformedStmt(); |
1888 | if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(Val: SimplifiedS)) |
1889 | SimplifiedS = CanonLoop->getLoopStmt(); |
1890 | if (const auto *For = dyn_cast<ForStmt>(Val: SimplifiedS)) { |
1891 | S = For->getBody(); |
1892 | } else { |
1893 | assert(isa<CXXForRangeStmt>(SimplifiedS) && |
1894 | "Expected canonical for loop or range-based for loop." ); |
1895 | const auto *CXXFor = cast<CXXForRangeStmt>(Val: SimplifiedS); |
1896 | CGF.EmitStmt(S: CXXFor->getLoopVarStmt()); |
1897 | S = CXXFor->getBody(); |
1898 | } |
1899 | if (Level + 1 < MaxLevel) { |
1900 | NextLoop = OMPLoopDirective::tryToFindNextInnerLoop( |
1901 | CurStmt: S, /*TryImperfectlyNestedLoops=*/true); |
1902 | emitBody(CGF, S, NextLoop, MaxLevel, Level: Level + 1); |
1903 | return; |
1904 | } |
1905 | } |
1906 | CGF.EmitStmt(S); |
1907 | } |
1908 | |
1909 | void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, |
1910 | JumpDest LoopExit) { |
1911 | RunCleanupsScope BodyScope(*this); |
1912 | // Update counters values on current iteration. |
1913 | for (const Expr *UE : D.updates()) |
1914 | EmitIgnoredExpr(E: UE); |
1915 | // Update the linear variables. |
1916 | // In distribute directives only loop counters may be marked as linear, no |
1917 | // need to generate the code for them. |
1918 | if (!isOpenMPDistributeDirective(DKind: D.getDirectiveKind())) { |
1919 | for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { |
1920 | for (const Expr *UE : C->updates()) |
1921 | EmitIgnoredExpr(E: UE); |
1922 | } |
1923 | } |
1924 | |
1925 | // On a continue in the body, jump to the end. |
1926 | JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.body.continue" ); |
1927 | BreakContinueStack.push_back(Elt: BreakContinue(LoopExit, Continue)); |
1928 | for (const Expr *E : D.finals_conditions()) { |
1929 | if (!E) |
1930 | continue; |
1931 | // Check that loop counter in non-rectangular nest fits into the iteration |
1932 | // space. |
1933 | llvm::BasicBlock *NextBB = createBasicBlock(name: "omp.body.next" ); |
1934 | EmitBranchOnBoolExpr(Cond: E, TrueBlock: NextBB, FalseBlock: Continue.getBlock(), |
1935 | TrueCount: getProfileCount(S: D.getBody())); |
1936 | EmitBlock(BB: NextBB); |
1937 | } |
1938 | |
1939 | OMPPrivateScope InscanScope(*this); |
1940 | EmitOMPReductionClauseInit(D, PrivateScope&: InscanScope, /*ForInscan=*/true); |
1941 | bool IsInscanRegion = InscanScope.Privatize(); |
1942 | if (IsInscanRegion) { |
1943 | // Need to remember the block before and after scan directive |
1944 | // to dispatch them correctly depending on the clause used in |
1945 | // this directive, inclusive or exclusive. For inclusive scan the natural |
1946 | // order of the blocks is used, for exclusive clause the blocks must be |
1947 | // executed in reverse order. |
1948 | OMPBeforeScanBlock = createBasicBlock(name: "omp.before.scan.bb" ); |
1949 | OMPAfterScanBlock = createBasicBlock(name: "omp.after.scan.bb" ); |
1950 | // No need to allocate inscan exit block, in simd mode it is selected in the |
1951 | // codegen for the scan directive. |
1952 | if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd) |
1953 | OMPScanExitBlock = createBasicBlock(name: "omp.exit.inscan.bb" ); |
1954 | OMPScanDispatch = createBasicBlock(name: "omp.inscan.dispatch" ); |
1955 | EmitBranch(Block: OMPScanDispatch); |
1956 | EmitBlock(BB: OMPBeforeScanBlock); |
1957 | } |
1958 | |
1959 | // Emit loop variables for C++ range loops. |
1960 | const Stmt *Body = |
1961 | D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); |
1962 | // Emit loop body. |
1963 | emitBody(CGF&: *this, S: Body, |
1964 | NextLoop: OMPLoopBasedDirective::tryToFindNextInnerLoop( |
1965 | CurStmt: Body, /*TryImperfectlyNestedLoops=*/true), |
1966 | MaxLevel: D.getLoopsNumber()); |
1967 | |
1968 | // Jump to the dispatcher at the end of the loop body. |
1969 | if (IsInscanRegion) |
1970 | EmitBranch(Block: OMPScanExitBlock); |
1971 | |
1972 | // The end (updates/cleanups). |
1973 | EmitBlock(BB: Continue.getBlock()); |
1974 | BreakContinueStack.pop_back(); |
1975 | } |
1976 | |
1977 | using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>; |
1978 | |
1979 | /// Emit a captured statement and return the function as well as its captured |
1980 | /// closure context. |
1981 | static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF, |
1982 | const CapturedStmt *S) { |
1983 | LValue CapStruct = ParentCGF.InitCapturedStruct(S: *S); |
1984 | CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true); |
1985 | std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI = |
1986 | std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(args: *S); |
1987 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get()); |
1988 | llvm::Function *F = CGF.GenerateCapturedStmtFunction(S: *S); |
1989 | |
1990 | return {F, CapStruct.getPointer(CGF&: ParentCGF)}; |
1991 | } |
1992 | |
1993 | /// Emit a call to a previously captured closure. |
1994 | static llvm::CallInst * |
1995 | emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap, |
1996 | llvm::ArrayRef<llvm::Value *> Args) { |
1997 | // Append the closure context to the argument. |
1998 | SmallVector<llvm::Value *> EffectiveArgs; |
1999 | EffectiveArgs.reserve(N: Args.size() + 1); |
2000 | llvm::append_range(C&: EffectiveArgs, R&: Args); |
2001 | EffectiveArgs.push_back(Elt: Cap.second); |
2002 | |
2003 | return ParentCGF.Builder.CreateCall(Callee: Cap.first, Args: EffectiveArgs); |
2004 | } |
2005 | |
2006 | llvm::CanonicalLoopInfo * |
2007 | CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) { |
2008 | assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented" ); |
2009 | |
2010 | // The caller is processing the loop-associated directive processing the \p |
2011 | // Depth loops nested in \p S. Put the previous pending loop-associated |
2012 | // directive to the stack. If the current loop-associated directive is a loop |
2013 | // transformation directive, it will push its generated loops onto the stack |
2014 | // such that together with the loops left here they form the combined loop |
2015 | // nest for the parent loop-associated directive. |
2016 | int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth; |
2017 | ExpectedOMPLoopDepth = Depth; |
2018 | |
2019 | EmitStmt(S); |
2020 | assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops" ); |
2021 | |
2022 | // The last added loop is the outermost one. |
2023 | llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back(); |
2024 | |
2025 | // Pop the \p Depth loops requested by the call from that stack and restore |
2026 | // the previous context. |
2027 | OMPLoopNestStack.pop_back_n(NumItems: Depth); |
2028 | ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth; |
2029 | |
2030 | return Result; |
2031 | } |
2032 | |
2033 | void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) { |
2034 | const Stmt *SyntacticalLoop = S->getLoopStmt(); |
2035 | if (!getLangOpts().OpenMPIRBuilder) { |
2036 | // Ignore if OpenMPIRBuilder is not enabled. |
2037 | EmitStmt(S: SyntacticalLoop); |
2038 | return; |
2039 | } |
2040 | |
2041 | LexicalScope ForScope(*this, S->getSourceRange()); |
2042 | |
2043 | // Emit init statements. The Distance/LoopVar funcs may reference variable |
2044 | // declarations they contain. |
2045 | const Stmt *BodyStmt; |
2046 | if (const auto *For = dyn_cast<ForStmt>(Val: SyntacticalLoop)) { |
2047 | if (const Stmt *InitStmt = For->getInit()) |
2048 | EmitStmt(S: InitStmt); |
2049 | BodyStmt = For->getBody(); |
2050 | } else if (const auto *RangeFor = |
2051 | dyn_cast<CXXForRangeStmt>(Val: SyntacticalLoop)) { |
2052 | if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt()) |
2053 | EmitStmt(S: RangeStmt); |
2054 | if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt()) |
2055 | EmitStmt(S: BeginStmt); |
2056 | if (const DeclStmt *EndStmt = RangeFor->getEndStmt()) |
2057 | EmitStmt(S: EndStmt); |
2058 | if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt()) |
2059 | EmitStmt(S: LoopVarStmt); |
2060 | BodyStmt = RangeFor->getBody(); |
2061 | } else |
2062 | llvm_unreachable("Expected for-stmt or range-based for-stmt" ); |
2063 | |
2064 | // Emit closure for later use. By-value captures will be captured here. |
2065 | const CapturedStmt *DistanceFunc = S->getDistanceFunc(); |
2066 | EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(ParentCGF&: *this, S: DistanceFunc); |
2067 | const CapturedStmt *LoopVarFunc = S->getLoopVarFunc(); |
2068 | EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(ParentCGF&: *this, S: LoopVarFunc); |
2069 | |
2070 | // Call the distance function to get the number of iterations of the loop to |
2071 | // come. |
2072 | QualType LogicalTy = DistanceFunc->getCapturedDecl() |
2073 | ->getParam(i: 0) |
2074 | ->getType() |
2075 | .getNonReferenceType(); |
2076 | RawAddress CountAddr = CreateMemTemp(T: LogicalTy, Name: ".count.addr" ); |
2077 | emitCapturedStmtCall(ParentCGF&: *this, Cap: DistanceClosure, Args: {CountAddr.getPointer()}); |
2078 | llvm::Value *DistVal = Builder.CreateLoad(Addr: CountAddr, Name: ".count" ); |
2079 | |
2080 | // Emit the loop structure. |
2081 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
2082 | auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, |
2083 | llvm::Value *IndVar) { |
2084 | Builder.restoreIP(IP: CodeGenIP); |
2085 | |
2086 | // Emit the loop body: Convert the logical iteration number to the loop |
2087 | // variable and emit the body. |
2088 | const DeclRefExpr *LoopVarRef = S->getLoopVarRef(); |
2089 | LValue LCVal = EmitLValue(E: LoopVarRef); |
2090 | Address LoopVarAddress = LCVal.getAddress(); |
2091 | emitCapturedStmtCall(ParentCGF&: *this, Cap: LoopVarClosure, |
2092 | Args: {LoopVarAddress.emitRawPointer(CGF&: *this), IndVar}); |
2093 | |
2094 | RunCleanupsScope BodyScope(*this); |
2095 | EmitStmt(S: BodyStmt); |
2096 | }; |
2097 | llvm::CanonicalLoopInfo *CL = |
2098 | OMPBuilder.createCanonicalLoop(Loc: Builder, BodyGenCB: BodyGen, TripCount: DistVal); |
2099 | |
2100 | // Finish up the loop. |
2101 | Builder.restoreIP(IP: CL->getAfterIP()); |
2102 | ForScope.ForceCleanup(); |
2103 | |
2104 | // Remember the CanonicalLoopInfo for parent AST nodes consuming it. |
2105 | OMPLoopNestStack.push_back(Elt: CL); |
2106 | } |
2107 | |
2108 | void CodeGenFunction::EmitOMPInnerLoop( |
2109 | const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond, |
2110 | const Expr *IncExpr, |
2111 | const llvm::function_ref<void(CodeGenFunction &)> BodyGen, |
2112 | const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { |
2113 | auto LoopExit = getJumpDestInCurrentScope(Name: "omp.inner.for.end" ); |
2114 | |
2115 | // Start the loop with a block that tests the condition. |
2116 | auto CondBlock = createBasicBlock(name: "omp.inner.for.cond" ); |
2117 | EmitBlock(BB: CondBlock); |
2118 | const SourceRange R = S.getSourceRange(); |
2119 | |
2120 | // If attributes are attached, push to the basic block with them. |
2121 | const auto &OMPED = cast<OMPExecutableDirective>(Val: S); |
2122 | const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt(); |
2123 | const Stmt *SS = ICS->getCapturedStmt(); |
2124 | const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(Val: SS); |
2125 | OMPLoopNestStack.clear(); |
2126 | if (AS) |
2127 | LoopStack.push(Header: CondBlock, Ctx&: CGM.getContext(), CGOpts: CGM.getCodeGenOpts(), |
2128 | Attrs: AS->getAttrs(), StartLoc: SourceLocToDebugLoc(Location: R.getBegin()), |
2129 | EndLoc: SourceLocToDebugLoc(Location: R.getEnd())); |
2130 | else |
2131 | LoopStack.push(Header: CondBlock, StartLoc: SourceLocToDebugLoc(Location: R.getBegin()), |
2132 | EndLoc: SourceLocToDebugLoc(Location: R.getEnd())); |
2133 | |
2134 | // If there are any cleanups between here and the loop-exit scope, |
2135 | // create a block to stage a loop exit along. |
2136 | llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); |
2137 | if (RequiresCleanup) |
2138 | ExitBlock = createBasicBlock(name: "omp.inner.for.cond.cleanup" ); |
2139 | |
2140 | llvm::BasicBlock *LoopBody = createBasicBlock(name: "omp.inner.for.body" ); |
2141 | |
2142 | // Emit condition. |
2143 | EmitBranchOnBoolExpr(Cond: LoopCond, TrueBlock: LoopBody, FalseBlock: ExitBlock, TrueCount: getProfileCount(S: &S)); |
2144 | if (ExitBlock != LoopExit.getBlock()) { |
2145 | EmitBlock(BB: ExitBlock); |
2146 | EmitBranchThroughCleanup(Dest: LoopExit); |
2147 | } |
2148 | |
2149 | EmitBlock(BB: LoopBody); |
2150 | incrementProfileCounter(S: &S); |
2151 | |
2152 | // Create a block for the increment. |
2153 | JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.inner.for.inc" ); |
2154 | BreakContinueStack.push_back(Elt: BreakContinue(LoopExit, Continue)); |
2155 | |
2156 | BodyGen(*this); |
2157 | |
2158 | // Emit "IV = IV + 1" and a back-edge to the condition block. |
2159 | EmitBlock(BB: Continue.getBlock()); |
2160 | EmitIgnoredExpr(E: IncExpr); |
2161 | PostIncGen(*this); |
2162 | BreakContinueStack.pop_back(); |
2163 | EmitBranch(Block: CondBlock); |
2164 | LoopStack.pop(); |
2165 | // Emit the fall-through block. |
2166 | EmitBlock(BB: LoopExit.getBlock()); |
2167 | } |
2168 | |
2169 | bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { |
2170 | if (!HaveInsertPoint()) |
2171 | return false; |
2172 | // Emit inits for the linear variables. |
2173 | bool HasLinears = false; |
2174 | for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { |
2175 | for (const Expr *Init : C->inits()) { |
2176 | HasLinears = true; |
2177 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Init)->getDecl()); |
2178 | if (const auto *Ref = |
2179 | dyn_cast<DeclRefExpr>(Val: VD->getInit()->IgnoreImpCasts())) { |
2180 | AutoVarEmission Emission = EmitAutoVarAlloca(var: *VD); |
2181 | const auto *OrigVD = cast<VarDecl>(Val: Ref->getDecl()); |
2182 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
2183 | CapturedStmtInfo->lookup(VD: OrigVD) != nullptr, |
2184 | VD->getInit()->getType(), VK_LValue, |
2185 | VD->getInit()->getExprLoc()); |
2186 | EmitExprAsInit( |
2187 | init: &DRE, D: VD, |
2188 | lvalue: MakeAddrLValue(Addr: Emission.getAllocatedAddress(), T: VD->getType()), |
2189 | /*capturedByInit=*/false); |
2190 | EmitAutoVarCleanups(emission: Emission); |
2191 | } else { |
2192 | EmitVarDecl(D: *VD); |
2193 | } |
2194 | } |
2195 | // Emit the linear steps for the linear clauses. |
2196 | // If a step is not constant, it is pre-calculated before the loop. |
2197 | if (const auto *CS = cast_or_null<BinaryOperator>(Val: C->getCalcStep())) |
2198 | if (const auto *SaveRef = cast<DeclRefExpr>(Val: CS->getLHS())) { |
2199 | EmitVarDecl(D: *cast<VarDecl>(Val: SaveRef->getDecl())); |
2200 | // Emit calculation of the linear step. |
2201 | EmitIgnoredExpr(E: CS); |
2202 | } |
2203 | } |
2204 | return HasLinears; |
2205 | } |
2206 | |
2207 | void CodeGenFunction::EmitOMPLinearClauseFinal( |
2208 | const OMPLoopDirective &D, |
2209 | const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { |
2210 | if (!HaveInsertPoint()) |
2211 | return; |
2212 | llvm::BasicBlock *DoneBB = nullptr; |
2213 | // Emit the final values of the linear variables. |
2214 | for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { |
2215 | auto IC = C->varlist_begin(); |
2216 | for (const Expr *F : C->finals()) { |
2217 | if (!DoneBB) { |
2218 | if (llvm::Value *Cond = CondGen(*this)) { |
2219 | // If the first post-update expression is found, emit conditional |
2220 | // block if it was requested. |
2221 | llvm::BasicBlock *ThenBB = createBasicBlock(name: ".omp.linear.pu" ); |
2222 | DoneBB = createBasicBlock(name: ".omp.linear.pu.done" ); |
2223 | Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB); |
2224 | EmitBlock(BB: ThenBB); |
2225 | } |
2226 | } |
2227 | const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IC)->getDecl()); |
2228 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
2229 | CapturedStmtInfo->lookup(VD: OrigVD) != nullptr, |
2230 | (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); |
2231 | Address OrigAddr = EmitLValue(E: &DRE).getAddress(); |
2232 | CodeGenFunction::OMPPrivateScope VarScope(*this); |
2233 | VarScope.addPrivate(LocalVD: OrigVD, Addr: OrigAddr); |
2234 | (void)VarScope.Privatize(); |
2235 | EmitIgnoredExpr(E: F); |
2236 | ++IC; |
2237 | } |
2238 | if (const Expr *PostUpdate = C->getPostUpdateExpr()) |
2239 | EmitIgnoredExpr(E: PostUpdate); |
2240 | } |
2241 | if (DoneBB) |
2242 | EmitBlock(BB: DoneBB, /*IsFinished=*/true); |
2243 | } |
2244 | |
2245 | static void emitAlignedClause(CodeGenFunction &CGF, |
2246 | const OMPExecutableDirective &D) { |
2247 | if (!CGF.HaveInsertPoint()) |
2248 | return; |
2249 | for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { |
2250 | llvm::APInt ClauseAlignment(64, 0); |
2251 | if (const Expr *AlignmentExpr = Clause->getAlignment()) { |
2252 | auto *AlignmentCI = |
2253 | cast<llvm::ConstantInt>(Val: CGF.EmitScalarExpr(E: AlignmentExpr)); |
2254 | ClauseAlignment = AlignmentCI->getValue(); |
2255 | } |
2256 | for (const Expr *E : Clause->varlists()) { |
2257 | llvm::APInt Alignment(ClauseAlignment); |
2258 | if (Alignment == 0) { |
2259 | // OpenMP [2.8.1, Description] |
2260 | // If no optional parameter is specified, implementation-defined default |
2261 | // alignments for SIMD instructions on the target platforms are assumed. |
2262 | Alignment = |
2263 | CGF.getContext() |
2264 | .toCharUnitsFromBits(BitSize: CGF.getContext().getOpenMPDefaultSimdAlign( |
2265 | T: E->getType()->getPointeeType())) |
2266 | .getQuantity(); |
2267 | } |
2268 | assert((Alignment == 0 || Alignment.isPowerOf2()) && |
2269 | "alignment is not power of 2" ); |
2270 | if (Alignment != 0) { |
2271 | llvm::Value *PtrValue = CGF.EmitScalarExpr(E); |
2272 | CGF.emitAlignmentAssumption( |
2273 | PtrValue, E, /*No second loc needed*/ AssumptionLoc: SourceLocation(), |
2274 | Alignment: llvm::ConstantInt::get(Context&: CGF.getLLVMContext(), V: Alignment)); |
2275 | } |
2276 | } |
2277 | } |
2278 | } |
2279 | |
2280 | void CodeGenFunction::EmitOMPPrivateLoopCounters( |
2281 | const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { |
2282 | if (!HaveInsertPoint()) |
2283 | return; |
2284 | auto I = S.private_counters().begin(); |
2285 | for (const Expr *E : S.counters()) { |
2286 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
2287 | const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()); |
2288 | // Emit var without initialization. |
2289 | AutoVarEmission VarEmission = EmitAutoVarAlloca(var: *PrivateVD); |
2290 | EmitAutoVarCleanups(emission: VarEmission); |
2291 | LocalDeclMap.erase(Val: PrivateVD); |
2292 | (void)LoopScope.addPrivate(LocalVD: VD, Addr: VarEmission.getAllocatedAddress()); |
2293 | if (LocalDeclMap.count(Val: VD) || CapturedStmtInfo->lookup(VD) || |
2294 | VD->hasGlobalStorage()) { |
2295 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), |
2296 | LocalDeclMap.count(Val: VD) || CapturedStmtInfo->lookup(VD), |
2297 | E->getType(), VK_LValue, E->getExprLoc()); |
2298 | (void)LoopScope.addPrivate(LocalVD: PrivateVD, Addr: EmitLValue(E: &DRE).getAddress()); |
2299 | } else { |
2300 | (void)LoopScope.addPrivate(LocalVD: PrivateVD, Addr: VarEmission.getAllocatedAddress()); |
2301 | } |
2302 | ++I; |
2303 | } |
2304 | // Privatize extra loop counters used in loops for ordered(n) clauses. |
2305 | for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { |
2306 | if (!C->getNumForLoops()) |
2307 | continue; |
2308 | for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size(); |
2309 | I < E; ++I) { |
2310 | const auto *DRE = cast<DeclRefExpr>(Val: C->getLoopCounter(NumLoop: I)); |
2311 | const auto *VD = cast<VarDecl>(Val: DRE->getDecl()); |
2312 | // Override only those variables that can be captured to avoid re-emission |
2313 | // of the variables declared within the loops. |
2314 | if (DRE->refersToEnclosingVariableOrCapture()) { |
2315 | (void)LoopScope.addPrivate( |
2316 | LocalVD: VD, Addr: CreateMemTemp(T: DRE->getType(), Name: VD->getName())); |
2317 | } |
2318 | } |
2319 | } |
2320 | } |
2321 | |
2322 | static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, |
2323 | const Expr *Cond, llvm::BasicBlock *TrueBlock, |
2324 | llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { |
2325 | if (!CGF.HaveInsertPoint()) |
2326 | return; |
2327 | { |
2328 | CodeGenFunction::OMPPrivateScope PreCondScope(CGF); |
2329 | CGF.EmitOMPPrivateLoopCounters(S, LoopScope&: PreCondScope); |
2330 | (void)PreCondScope.Privatize(); |
2331 | // Get initial values of real counters. |
2332 | for (const Expr *I : S.inits()) { |
2333 | CGF.EmitIgnoredExpr(E: I); |
2334 | } |
2335 | } |
2336 | // Create temp loop control variables with their init values to support |
2337 | // non-rectangular loops. |
2338 | CodeGenFunction::OMPMapVars PreCondVars; |
2339 | for (const Expr *E : S.dependent_counters()) { |
2340 | if (!E) |
2341 | continue; |
2342 | assert(!E->getType().getNonReferenceType()->isRecordType() && |
2343 | "dependent counter must not be an iterator." ); |
2344 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
2345 | Address CounterAddr = |
2346 | CGF.CreateMemTemp(T: VD->getType().getNonReferenceType()); |
2347 | (void)PreCondVars.setVarAddr(CGF, LocalVD: VD, TempAddr: CounterAddr); |
2348 | } |
2349 | (void)PreCondVars.apply(CGF); |
2350 | for (const Expr *E : S.dependent_inits()) { |
2351 | if (!E) |
2352 | continue; |
2353 | CGF.EmitIgnoredExpr(E); |
2354 | } |
2355 | // Check that loop is executed at least one time. |
2356 | CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); |
2357 | PreCondVars.restore(CGF); |
2358 | } |
2359 | |
2360 | void CodeGenFunction::EmitOMPLinearClause( |
2361 | const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { |
2362 | if (!HaveInsertPoint()) |
2363 | return; |
2364 | llvm::DenseSet<const VarDecl *> SIMDLCVs; |
2365 | if (isOpenMPSimdDirective(DKind: D.getDirectiveKind())) { |
2366 | const auto *LoopDirective = cast<OMPLoopDirective>(Val: &D); |
2367 | for (const Expr *C : LoopDirective->counters()) { |
2368 | SIMDLCVs.insert( |
2369 | V: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: C)->getDecl())->getCanonicalDecl()); |
2370 | } |
2371 | } |
2372 | for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { |
2373 | auto CurPrivate = C->privates().begin(); |
2374 | for (const Expr *E : C->varlists()) { |
2375 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
2376 | const auto *PrivateVD = |
2377 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *CurPrivate)->getDecl()); |
2378 | if (!SIMDLCVs.count(V: VD->getCanonicalDecl())) { |
2379 | // Emit private VarDecl with copy init. |
2380 | EmitVarDecl(D: *PrivateVD); |
2381 | bool IsRegistered = |
2382 | PrivateScope.addPrivate(LocalVD: VD, Addr: GetAddrOfLocalVar(VD: PrivateVD)); |
2383 | assert(IsRegistered && "linear var already registered as private" ); |
2384 | // Silence the warning about unused variable. |
2385 | (void)IsRegistered; |
2386 | } else { |
2387 | EmitVarDecl(D: *PrivateVD); |
2388 | } |
2389 | ++CurPrivate; |
2390 | } |
2391 | } |
2392 | } |
2393 | |
2394 | static void emitSimdlenSafelenClause(CodeGenFunction &CGF, |
2395 | const OMPExecutableDirective &D) { |
2396 | if (!CGF.HaveInsertPoint()) |
2397 | return; |
2398 | if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { |
2399 | RValue Len = CGF.EmitAnyExpr(E: C->getSimdlen(), aggSlot: AggValueSlot::ignored(), |
2400 | /*ignoreResult=*/true); |
2401 | auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal()); |
2402 | CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); |
2403 | // In presence of finite 'safelen', it may be unsafe to mark all |
2404 | // the memory instructions parallel, because loop-carried |
2405 | // dependences of 'safelen' iterations are possible. |
2406 | CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); |
2407 | } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { |
2408 | RValue Len = CGF.EmitAnyExpr(E: C->getSafelen(), aggSlot: AggValueSlot::ignored(), |
2409 | /*ignoreResult=*/true); |
2410 | auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal()); |
2411 | CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); |
2412 | // In presence of finite 'safelen', it may be unsafe to mark all |
2413 | // the memory instructions parallel, because loop-carried |
2414 | // dependences of 'safelen' iterations are possible. |
2415 | CGF.LoopStack.setParallel(/*Enable=*/false); |
2416 | } |
2417 | } |
2418 | |
2419 | void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) { |
2420 | // Walk clauses and process safelen/lastprivate. |
2421 | LoopStack.setParallel(/*Enable=*/true); |
2422 | LoopStack.setVectorizeEnable(); |
2423 | emitSimdlenSafelenClause(CGF&: *this, D); |
2424 | if (const auto *C = D.getSingleClause<OMPOrderClause>()) |
2425 | if (C->getKind() == OMPC_ORDER_concurrent) |
2426 | LoopStack.setParallel(/*Enable=*/true); |
2427 | if ((D.getDirectiveKind() == OMPD_simd || |
2428 | (getLangOpts().OpenMPSimd && |
2429 | isOpenMPSimdDirective(DKind: D.getDirectiveKind()))) && |
2430 | llvm::any_of(Range: D.getClausesOfKind<OMPReductionClause>(), |
2431 | P: [](const OMPReductionClause *C) { |
2432 | return C->getModifier() == OMPC_REDUCTION_inscan; |
2433 | })) |
2434 | // Disable parallel access in case of prefix sum. |
2435 | LoopStack.setParallel(/*Enable=*/false); |
2436 | } |
2437 | |
2438 | void CodeGenFunction::EmitOMPSimdFinal( |
2439 | const OMPLoopDirective &D, |
2440 | const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { |
2441 | if (!HaveInsertPoint()) |
2442 | return; |
2443 | llvm::BasicBlock *DoneBB = nullptr; |
2444 | auto IC = D.counters().begin(); |
2445 | auto IPC = D.private_counters().begin(); |
2446 | for (const Expr *F : D.finals()) { |
2447 | const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: (*IC))->getDecl()); |
2448 | const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: (*IPC))->getDecl()); |
2449 | const auto *CED = dyn_cast<OMPCapturedExprDecl>(Val: OrigVD); |
2450 | if (LocalDeclMap.count(Val: OrigVD) || CapturedStmtInfo->lookup(VD: OrigVD) || |
2451 | OrigVD->hasGlobalStorage() || CED) { |
2452 | if (!DoneBB) { |
2453 | if (llvm::Value *Cond = CondGen(*this)) { |
2454 | // If the first post-update expression is found, emit conditional |
2455 | // block if it was requested. |
2456 | llvm::BasicBlock *ThenBB = createBasicBlock(name: ".omp.final.then" ); |
2457 | DoneBB = createBasicBlock(name: ".omp.final.done" ); |
2458 | Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB); |
2459 | EmitBlock(BB: ThenBB); |
2460 | } |
2461 | } |
2462 | Address OrigAddr = Address::invalid(); |
2463 | if (CED) { |
2464 | OrigAddr = EmitLValue(E: CED->getInit()->IgnoreImpCasts()).getAddress(); |
2465 | } else { |
2466 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD), |
2467 | /*RefersToEnclosingVariableOrCapture=*/false, |
2468 | (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); |
2469 | OrigAddr = EmitLValue(E: &DRE).getAddress(); |
2470 | } |
2471 | OMPPrivateScope VarScope(*this); |
2472 | VarScope.addPrivate(LocalVD: OrigVD, Addr: OrigAddr); |
2473 | (void)VarScope.Privatize(); |
2474 | EmitIgnoredExpr(E: F); |
2475 | } |
2476 | ++IC; |
2477 | ++IPC; |
2478 | } |
2479 | if (DoneBB) |
2480 | EmitBlock(BB: DoneBB, /*IsFinished=*/true); |
2481 | } |
2482 | |
2483 | static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, |
2484 | const OMPLoopDirective &S, |
2485 | CodeGenFunction::JumpDest LoopExit) { |
2486 | CGF.EmitOMPLoopBody(D: S, LoopExit); |
2487 | CGF.EmitStopPoint(S: &S); |
2488 | } |
2489 | |
2490 | /// Emit a helper variable and return corresponding lvalue. |
2491 | static LValue EmitOMPHelperVar(CodeGenFunction &CGF, |
2492 | const DeclRefExpr *Helper) { |
2493 | auto VDecl = cast<VarDecl>(Val: Helper->getDecl()); |
2494 | CGF.EmitVarDecl(D: *VDecl); |
2495 | return CGF.EmitLValue(E: Helper); |
2496 | } |
2497 | |
2498 | static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S, |
2499 | const RegionCodeGenTy &SimdInitGen, |
2500 | const RegionCodeGenTy &BodyCodeGen) { |
2501 | auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF, |
2502 | PrePostActionTy &) { |
2503 | CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S); |
2504 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
2505 | SimdInitGen(CGF); |
2506 | |
2507 | BodyCodeGen(CGF); |
2508 | }; |
2509 | auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) { |
2510 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
2511 | CGF.LoopStack.setVectorizeEnable(/*Enable=*/false); |
2512 | |
2513 | BodyCodeGen(CGF); |
2514 | }; |
2515 | const Expr *IfCond = nullptr; |
2516 | if (isOpenMPSimdDirective(DKind: S.getDirectiveKind())) { |
2517 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
2518 | if (CGF.getLangOpts().OpenMP >= 50 && |
2519 | (C->getNameModifier() == OMPD_unknown || |
2520 | C->getNameModifier() == OMPD_simd)) { |
2521 | IfCond = C->getCondition(); |
2522 | break; |
2523 | } |
2524 | } |
2525 | } |
2526 | if (IfCond) { |
2527 | CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, Cond: IfCond, ThenGen, ElseGen); |
2528 | } else { |
2529 | RegionCodeGenTy ThenRCG(ThenGen); |
2530 | ThenRCG(CGF); |
2531 | } |
2532 | } |
2533 | |
2534 | static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, |
2535 | PrePostActionTy &Action) { |
2536 | Action.Enter(CGF); |
2537 | assert(isOpenMPSimdDirective(S.getDirectiveKind()) && |
2538 | "Expected simd directive" ); |
2539 | OMPLoopScope PreInitScope(CGF, S); |
2540 | // if (PreCond) { |
2541 | // for (IV in 0..LastIteration) BODY; |
2542 | // <Final counter/linear vars updates>; |
2543 | // } |
2544 | // |
2545 | if (isOpenMPDistributeDirective(DKind: S.getDirectiveKind()) || |
2546 | isOpenMPWorksharingDirective(DKind: S.getDirectiveKind()) || |
2547 | isOpenMPTaskLoopDirective(DKind: S.getDirectiveKind())) { |
2548 | (void)EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: S.getLowerBoundVariable())); |
2549 | (void)EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: S.getUpperBoundVariable())); |
2550 | } |
2551 | |
2552 | // Emit: if (PreCond) - begin. |
2553 | // If the condition constant folds and can be elided, avoid emitting the |
2554 | // whole loop. |
2555 | bool CondConstant; |
2556 | llvm::BasicBlock *ContBlock = nullptr; |
2557 | if (CGF.ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) { |
2558 | if (!CondConstant) |
2559 | return; |
2560 | } else { |
2561 | llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "simd.if.then" ); |
2562 | ContBlock = CGF.createBasicBlock(name: "simd.if.end" ); |
2563 | emitPreCond(CGF, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock, |
2564 | TrueCount: CGF.getProfileCount(S: &S)); |
2565 | CGF.EmitBlock(BB: ThenBlock); |
2566 | CGF.incrementProfileCounter(S: &S); |
2567 | } |
2568 | |
2569 | // Emit the loop iteration variable. |
2570 | const Expr *IVExpr = S.getIterationVariable(); |
2571 | const auto *IVDecl = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IVExpr)->getDecl()); |
2572 | CGF.EmitVarDecl(D: *IVDecl); |
2573 | CGF.EmitIgnoredExpr(E: S.getInit()); |
2574 | |
2575 | // Emit the iterations count variable. |
2576 | // If it is not a variable, Sema decided to calculate iterations count on |
2577 | // each iteration (e.g., it is foldable into a constant). |
2578 | if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) { |
2579 | CGF.EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl())); |
2580 | // Emit calculation of the iterations count. |
2581 | CGF.EmitIgnoredExpr(E: S.getCalcLastIteration()); |
2582 | } |
2583 | |
2584 | emitAlignedClause(CGF, D: S); |
2585 | (void)CGF.EmitOMPLinearClauseInit(D: S); |
2586 | { |
2587 | CodeGenFunction::OMPPrivateScope LoopScope(CGF); |
2588 | CGF.EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope); |
2589 | CGF.EmitOMPPrivateLoopCounters(S, LoopScope); |
2590 | CGF.EmitOMPLinearClause(D: S, PrivateScope&: LoopScope); |
2591 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope); |
2592 | CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( |
2593 | CGF, S, CGF.EmitLValue(E: S.getIterationVariable())); |
2594 | bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope); |
2595 | (void)LoopScope.Privatize(); |
2596 | if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind())) |
2597 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S); |
2598 | |
2599 | emitCommonSimdLoop( |
2600 | CGF, S, |
2601 | SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
2602 | CGF.EmitOMPSimdInit(D: S); |
2603 | }, |
2604 | BodyCodeGen: [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
2605 | CGF.EmitOMPInnerLoop( |
2606 | S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: S.getCond(), IncExpr: S.getInc(), |
2607 | BodyGen: [&S](CodeGenFunction &CGF) { |
2608 | emitOMPLoopBodyWithStopPoint(CGF, S, |
2609 | LoopExit: CodeGenFunction::JumpDest()); |
2610 | }, |
2611 | PostIncGen: [](CodeGenFunction &) {}); |
2612 | }); |
2613 | CGF.EmitOMPSimdFinal(D: S, CondGen: [](CodeGenFunction &) { return nullptr; }); |
2614 | // Emit final copy of the lastprivate variables at the end of loops. |
2615 | if (HasLastprivateClause) |
2616 | CGF.EmitOMPLastprivateClauseFinal(D: S, /*NoFinals=*/true); |
2617 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_simd); |
2618 | emitPostUpdateForReductionClause(CGF, D: S, |
2619 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
2620 | LoopScope.restoreMap(); |
2621 | CGF.EmitOMPLinearClauseFinal(D: S, CondGen: [](CodeGenFunction &) { return nullptr; }); |
2622 | } |
2623 | // Emit: if (PreCond) - end. |
2624 | if (ContBlock) { |
2625 | CGF.EmitBranch(Block: ContBlock); |
2626 | CGF.EmitBlock(BB: ContBlock, IsFinished: true); |
2627 | } |
2628 | } |
2629 | |
2630 | static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) { |
2631 | // Check for unsupported clauses |
2632 | for (OMPClause *C : S.clauses()) { |
2633 | // Currently only order, simdlen and safelen clauses are supported |
2634 | if (!(isa<OMPSimdlenClause>(Val: C) || isa<OMPSafelenClause>(Val: C) || |
2635 | isa<OMPOrderClause>(Val: C) || isa<OMPAlignedClause>(Val: C))) |
2636 | return false; |
2637 | } |
2638 | |
2639 | // Check if we have a statement with the ordered directive. |
2640 | // Visit the statement hierarchy to find a compound statement |
2641 | // with a ordered directive in it. |
2642 | if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(Val: S.getRawStmt())) { |
2643 | if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) { |
2644 | for (const Stmt *SubStmt : SyntacticalLoop->children()) { |
2645 | if (!SubStmt) |
2646 | continue; |
2647 | if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(Val: SubStmt)) { |
2648 | for (const Stmt *CSSubStmt : CS->children()) { |
2649 | if (!CSSubStmt) |
2650 | continue; |
2651 | if (isa<OMPOrderedDirective>(Val: CSSubStmt)) { |
2652 | return false; |
2653 | } |
2654 | } |
2655 | } |
2656 | } |
2657 | } |
2658 | } |
2659 | return true; |
2660 | } |
2661 | static llvm::MapVector<llvm::Value *, llvm::Value *> |
2662 | GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) { |
2663 | llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars; |
2664 | for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) { |
2665 | llvm::APInt ClauseAlignment(64, 0); |
2666 | if (const Expr *AlignmentExpr = Clause->getAlignment()) { |
2667 | auto *AlignmentCI = |
2668 | cast<llvm::ConstantInt>(Val: CGF.EmitScalarExpr(E: AlignmentExpr)); |
2669 | ClauseAlignment = AlignmentCI->getValue(); |
2670 | } |
2671 | for (const Expr *E : Clause->varlists()) { |
2672 | llvm::APInt Alignment(ClauseAlignment); |
2673 | if (Alignment == 0) { |
2674 | // OpenMP [2.8.1, Description] |
2675 | // If no optional parameter is specified, implementation-defined default |
2676 | // alignments for SIMD instructions on the target platforms are assumed. |
2677 | Alignment = |
2678 | CGF.getContext() |
2679 | .toCharUnitsFromBits(BitSize: CGF.getContext().getOpenMPDefaultSimdAlign( |
2680 | T: E->getType()->getPointeeType())) |
2681 | .getQuantity(); |
2682 | } |
2683 | assert((Alignment == 0 || Alignment.isPowerOf2()) && |
2684 | "alignment is not power of 2" ); |
2685 | llvm::Value *PtrValue = CGF.EmitScalarExpr(E); |
2686 | AlignedVars[PtrValue] = CGF.Builder.getInt64(C: Alignment.getSExtValue()); |
2687 | } |
2688 | } |
2689 | return AlignedVars; |
2690 | } |
2691 | |
2692 | void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { |
2693 | bool UseOMPIRBuilder = |
2694 | CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S); |
2695 | if (UseOMPIRBuilder) { |
2696 | auto &&CodeGenIRBuilder = [this, &S, UseOMPIRBuilder](CodeGenFunction &CGF, |
2697 | PrePostActionTy &) { |
2698 | // Use the OpenMPIRBuilder if enabled. |
2699 | if (UseOMPIRBuilder) { |
2700 | llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars = |
2701 | GetAlignedMapping(S, CGF); |
2702 | // Emit the associated statement and get its loop representation. |
2703 | const Stmt *Inner = S.getRawStmt(); |
2704 | llvm::CanonicalLoopInfo *CLI = |
2705 | EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1); |
2706 | |
2707 | llvm::OpenMPIRBuilder &OMPBuilder = |
2708 | CGM.getOpenMPRuntime().getOMPBuilder(); |
2709 | // Add SIMD specific metadata |
2710 | llvm::ConstantInt *Simdlen = nullptr; |
2711 | if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) { |
2712 | RValue Len = |
2713 | this->EmitAnyExpr(E: C->getSimdlen(), aggSlot: AggValueSlot::ignored(), |
2714 | /*ignoreResult=*/true); |
2715 | auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal()); |
2716 | Simdlen = Val; |
2717 | } |
2718 | llvm::ConstantInt *Safelen = nullptr; |
2719 | if (const auto *C = S.getSingleClause<OMPSafelenClause>()) { |
2720 | RValue Len = |
2721 | this->EmitAnyExpr(E: C->getSafelen(), aggSlot: AggValueSlot::ignored(), |
2722 | /*ignoreResult=*/true); |
2723 | auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal()); |
2724 | Safelen = Val; |
2725 | } |
2726 | llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown; |
2727 | if (const auto *C = S.getSingleClause<OMPOrderClause>()) { |
2728 | if (C->getKind() == OpenMPOrderClauseKind ::OMPC_ORDER_concurrent) { |
2729 | Order = llvm::omp::OrderKind::OMP_ORDER_concurrent; |
2730 | } |
2731 | } |
2732 | // Add simd metadata to the collapsed loop. Do not generate |
2733 | // another loop for if clause. Support for if clause is done earlier. |
2734 | OMPBuilder.applySimd(Loop: CLI, AlignedVars, |
2735 | /*IfCond*/ nullptr, Order, Simdlen, Safelen); |
2736 | return; |
2737 | } |
2738 | }; |
2739 | { |
2740 | auto LPCRegion = |
2741 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
2742 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
2743 | CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_simd, |
2744 | CodeGen: CodeGenIRBuilder); |
2745 | } |
2746 | return; |
2747 | } |
2748 | |
2749 | ParentLoopDirectiveForScanRegion ScanRegion(*this, S); |
2750 | OMPFirstScanLoop = true; |
2751 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
2752 | emitOMPSimdRegion(CGF, S, Action); |
2753 | }; |
2754 | { |
2755 | auto LPCRegion = |
2756 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
2757 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
2758 | CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_simd, CodeGen); |
2759 | } |
2760 | // Check for outer lastprivate conditional update. |
2761 | checkForLastprivateConditionalUpdate(CGF&: *this, S); |
2762 | } |
2763 | |
2764 | void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) { |
2765 | // Emit the de-sugared statement. |
2766 | OMPTransformDirectiveScopeRAII TileScope(*this, &S); |
2767 | EmitStmt(S: S.getTransformedStmt()); |
2768 | } |
2769 | |
2770 | void CodeGenFunction::EmitOMPReverseDirective(const OMPReverseDirective &S) { |
2771 | // Emit the de-sugared statement. |
2772 | OMPTransformDirectiveScopeRAII ReverseScope(*this, &S); |
2773 | EmitStmt(S: S.getTransformedStmt()); |
2774 | } |
2775 | |
2776 | void CodeGenFunction::EmitOMPInterchangeDirective( |
2777 | const OMPInterchangeDirective &S) { |
2778 | // Emit the de-sugared statement. |
2779 | OMPTransformDirectiveScopeRAII InterchangeScope(*this, &S); |
2780 | EmitStmt(S: S.getTransformedStmt()); |
2781 | } |
2782 | |
2783 | void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { |
2784 | bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder; |
2785 | |
2786 | if (UseOMPIRBuilder) { |
2787 | auto DL = SourceLocToDebugLoc(Location: S.getBeginLoc()); |
2788 | const Stmt *Inner = S.getRawStmt(); |
2789 | |
2790 | // Consume nested loop. Clear the entire remaining loop stack because a |
2791 | // fully unrolled loop is non-transformable. For partial unrolling the |
2792 | // generated outer loop is pushed back to the stack. |
2793 | llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1); |
2794 | OMPLoopNestStack.clear(); |
2795 | |
2796 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
2797 | |
2798 | bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1; |
2799 | llvm::CanonicalLoopInfo *UnrolledCLI = nullptr; |
2800 | |
2801 | if (S.hasClausesOfKind<OMPFullClause>()) { |
2802 | assert(ExpectedOMPLoopDepth == 0); |
2803 | OMPBuilder.unrollLoopFull(DL, Loop: CLI); |
2804 | } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { |
2805 | uint64_t Factor = 0; |
2806 | if (Expr *FactorExpr = PartialClause->getFactor()) { |
2807 | Factor = FactorExpr->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue(); |
2808 | assert(Factor >= 1 && "Only positive factors are valid" ); |
2809 | } |
2810 | OMPBuilder.unrollLoopPartial(DL, Loop: CLI, Factor, |
2811 | UnrolledCLI: NeedsUnrolledCLI ? &UnrolledCLI : nullptr); |
2812 | } else { |
2813 | OMPBuilder.unrollLoopHeuristic(DL, Loop: CLI); |
2814 | } |
2815 | |
2816 | assert((!NeedsUnrolledCLI || UnrolledCLI) && |
2817 | "NeedsUnrolledCLI implies UnrolledCLI to be set" ); |
2818 | if (UnrolledCLI) |
2819 | OMPLoopNestStack.push_back(Elt: UnrolledCLI); |
2820 | |
2821 | return; |
2822 | } |
2823 | |
2824 | // This function is only called if the unrolled loop is not consumed by any |
2825 | // other loop-associated construct. Such a loop-associated construct will have |
2826 | // used the transformed AST. |
2827 | |
2828 | // Set the unroll metadata for the next emitted loop. |
2829 | LoopStack.setUnrollState(LoopAttributes::Enable); |
2830 | |
2831 | if (S.hasClausesOfKind<OMPFullClause>()) { |
2832 | LoopStack.setUnrollState(LoopAttributes::Full); |
2833 | } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { |
2834 | if (Expr *FactorExpr = PartialClause->getFactor()) { |
2835 | uint64_t Factor = |
2836 | FactorExpr->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue(); |
2837 | assert(Factor >= 1 && "Only positive factors are valid" ); |
2838 | LoopStack.setUnrollCount(Factor); |
2839 | } |
2840 | } |
2841 | |
2842 | EmitStmt(S: S.getAssociatedStmt()); |
2843 | } |
2844 | |
2845 | void CodeGenFunction::EmitOMPOuterLoop( |
2846 | bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, |
2847 | CodeGenFunction::OMPPrivateScope &LoopScope, |
2848 | const CodeGenFunction::OMPLoopArguments &LoopArgs, |
2849 | const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, |
2850 | const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { |
2851 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
2852 | |
2853 | const Expr *IVExpr = S.getIterationVariable(); |
2854 | const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType()); |
2855 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
2856 | |
2857 | JumpDest LoopExit = getJumpDestInCurrentScope(Name: "omp.dispatch.end" ); |
2858 | |
2859 | // Start the loop with a block that tests the condition. |
2860 | llvm::BasicBlock *CondBlock = createBasicBlock(name: "omp.dispatch.cond" ); |
2861 | EmitBlock(BB: CondBlock); |
2862 | const SourceRange R = S.getSourceRange(); |
2863 | OMPLoopNestStack.clear(); |
2864 | LoopStack.push(Header: CondBlock, StartLoc: SourceLocToDebugLoc(Location: R.getBegin()), |
2865 | EndLoc: SourceLocToDebugLoc(Location: R.getEnd())); |
2866 | |
2867 | llvm::Value *BoolCondVal = nullptr; |
2868 | if (!DynamicOrOrdered) { |
2869 | // UB = min(UB, GlobalUB) or |
2870 | // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. |
2871 | // 'distribute parallel for') |
2872 | EmitIgnoredExpr(E: LoopArgs.EUB); |
2873 | // IV = LB |
2874 | EmitIgnoredExpr(E: LoopArgs.Init); |
2875 | // IV < UB |
2876 | BoolCondVal = EvaluateExprAsBool(E: LoopArgs.Cond); |
2877 | } else { |
2878 | BoolCondVal = |
2879 | RT.emitForNext(CGF&: *this, Loc: S.getBeginLoc(), IVSize, IVSigned, IL: LoopArgs.IL, |
2880 | LB: LoopArgs.LB, UB: LoopArgs.UB, ST: LoopArgs.ST); |
2881 | } |
2882 | |
2883 | // If there are any cleanups between here and the loop-exit scope, |
2884 | // create a block to stage a loop exit along. |
2885 | llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); |
2886 | if (LoopScope.requiresCleanups()) |
2887 | ExitBlock = createBasicBlock(name: "omp.dispatch.cleanup" ); |
2888 | |
2889 | llvm::BasicBlock *LoopBody = createBasicBlock(name: "omp.dispatch.body" ); |
2890 | Builder.CreateCondBr(Cond: BoolCondVal, True: LoopBody, False: ExitBlock); |
2891 | if (ExitBlock != LoopExit.getBlock()) { |
2892 | EmitBlock(BB: ExitBlock); |
2893 | EmitBranchThroughCleanup(Dest: LoopExit); |
2894 | } |
2895 | EmitBlock(BB: LoopBody); |
2896 | |
2897 | // Emit "IV = LB" (in case of static schedule, we have already calculated new |
2898 | // LB for loop condition and emitted it above). |
2899 | if (DynamicOrOrdered) |
2900 | EmitIgnoredExpr(E: LoopArgs.Init); |
2901 | |
2902 | // Create a block for the increment. |
2903 | JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.dispatch.inc" ); |
2904 | BreakContinueStack.push_back(Elt: BreakContinue(LoopExit, Continue)); |
2905 | |
2906 | emitCommonSimdLoop( |
2907 | CGF&: *this, S, |
2908 | SimdInitGen: [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { |
2909 | // Generate !llvm.loop.parallel metadata for loads and stores for loops |
2910 | // with dynamic/guided scheduling and without ordered clause. |
2911 | if (!isOpenMPSimdDirective(DKind: S.getDirectiveKind())) { |
2912 | CGF.LoopStack.setParallel(!IsMonotonic); |
2913 | if (const auto *C = S.getSingleClause<OMPOrderClause>()) |
2914 | if (C->getKind() == OMPC_ORDER_concurrent) |
2915 | CGF.LoopStack.setParallel(/*Enable=*/true); |
2916 | } else { |
2917 | CGF.EmitOMPSimdInit(D: S); |
2918 | } |
2919 | }, |
2920 | BodyCodeGen: [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, |
2921 | &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
2922 | SourceLocation Loc = S.getBeginLoc(); |
2923 | // when 'distribute' is not combined with a 'for': |
2924 | // while (idx <= UB) { BODY; ++idx; } |
2925 | // when 'distribute' is combined with a 'for' |
2926 | // (e.g. 'distribute parallel for') |
2927 | // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } |
2928 | CGF.EmitOMPInnerLoop( |
2929 | S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: LoopArgs.Cond, IncExpr: LoopArgs.IncExpr, |
2930 | BodyGen: [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { |
2931 | CodeGenLoop(CGF, S, LoopExit); |
2932 | }, |
2933 | PostIncGen: [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { |
2934 | CodeGenOrdered(CGF, Loc, IVSize, IVSigned); |
2935 | }); |
2936 | }); |
2937 | |
2938 | EmitBlock(BB: Continue.getBlock()); |
2939 | BreakContinueStack.pop_back(); |
2940 | if (!DynamicOrOrdered) { |
2941 | // Emit "LB = LB + Stride", "UB = UB + Stride". |
2942 | EmitIgnoredExpr(E: LoopArgs.NextLB); |
2943 | EmitIgnoredExpr(E: LoopArgs.NextUB); |
2944 | } |
2945 | |
2946 | EmitBranch(Block: CondBlock); |
2947 | OMPLoopNestStack.clear(); |
2948 | LoopStack.pop(); |
2949 | // Emit the fall-through block. |
2950 | EmitBlock(BB: LoopExit.getBlock()); |
2951 | |
2952 | // Tell the runtime we are done. |
2953 | auto &&CodeGen = [DynamicOrOrdered, &S, &LoopArgs](CodeGenFunction &CGF) { |
2954 | if (!DynamicOrOrdered) |
2955 | CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, Loc: S.getEndLoc(), |
2956 | DKind: LoopArgs.DKind); |
2957 | }; |
2958 | OMPCancelStack.emitExit(CGF&: *this, Kind: S.getDirectiveKind(), CodeGen); |
2959 | } |
2960 | |
2961 | void CodeGenFunction::EmitOMPForOuterLoop( |
2962 | const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, |
2963 | const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, |
2964 | const OMPLoopArguments &LoopArgs, |
2965 | const CodeGenDispatchBoundsTy &CGDispatchBounds) { |
2966 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
2967 | |
2968 | // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). |
2969 | const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind: ScheduleKind.Schedule); |
2970 | |
2971 | assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule, |
2972 | LoopArgs.Chunk != nullptr)) && |
2973 | "static non-chunked schedule does not need outer loop" ); |
2974 | |
2975 | // Emit outer loop. |
2976 | // |
2977 | // OpenMP [2.7.1, Loop Construct, Description, table 2-1] |
2978 | // When schedule(dynamic,chunk_size) is specified, the iterations are |
2979 | // distributed to threads in the team in chunks as the threads request them. |
2980 | // Each thread executes a chunk of iterations, then requests another chunk, |
2981 | // until no chunks remain to be distributed. Each chunk contains chunk_size |
2982 | // iterations, except for the last chunk to be distributed, which may have |
2983 | // fewer iterations. When no chunk_size is specified, it defaults to 1. |
2984 | // |
2985 | // When schedule(guided,chunk_size) is specified, the iterations are assigned |
2986 | // to threads in the team in chunks as the executing threads request them. |
2987 | // Each thread executes a chunk of iterations, then requests another chunk, |
2988 | // until no chunks remain to be assigned. For a chunk_size of 1, the size of |
2989 | // each chunk is proportional to the number of unassigned iterations divided |
2990 | // by the number of threads in the team, decreasing to 1. For a chunk_size |
2991 | // with value k (greater than 1), the size of each chunk is determined in the |
2992 | // same way, with the restriction that the chunks do not contain fewer than k |
2993 | // iterations (except for the last chunk to be assigned, which may have fewer |
2994 | // than k iterations). |
2995 | // |
2996 | // When schedule(auto) is specified, the decision regarding scheduling is |
2997 | // delegated to the compiler and/or runtime system. The programmer gives the |
2998 | // implementation the freedom to choose any possible mapping of iterations to |
2999 | // threads in the team. |
3000 | // |
3001 | // When schedule(runtime) is specified, the decision regarding scheduling is |
3002 | // deferred until run time, and the schedule and chunk size are taken from the |
3003 | // run-sched-var ICV. If the ICV is set to auto, the schedule is |
3004 | // implementation defined |
3005 | // |
3006 | // __kmpc_dispatch_init(); |
3007 | // while(__kmpc_dispatch_next(&LB, &UB)) { |
3008 | // idx = LB; |
3009 | // while (idx <= UB) { BODY; ++idx; |
3010 | // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. |
3011 | // } // inner loop |
3012 | // } |
3013 | // __kmpc_dispatch_deinit(); |
3014 | // |
3015 | // OpenMP [2.7.1, Loop Construct, Description, table 2-1] |
3016 | // When schedule(static, chunk_size) is specified, iterations are divided into |
3017 | // chunks of size chunk_size, and the chunks are assigned to the threads in |
3018 | // the team in a round-robin fashion in the order of the thread number. |
3019 | // |
3020 | // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { |
3021 | // while (idx <= UB) { BODY; ++idx; } // inner loop |
3022 | // LB = LB + ST; |
3023 | // UB = UB + ST; |
3024 | // } |
3025 | // |
3026 | |
3027 | const Expr *IVExpr = S.getIterationVariable(); |
3028 | const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType()); |
3029 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
3030 | |
3031 | if (DynamicOrOrdered) { |
3032 | const std::pair<llvm::Value *, llvm::Value *> DispatchBounds = |
3033 | CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); |
3034 | llvm::Value *LBVal = DispatchBounds.first; |
3035 | llvm::Value *UBVal = DispatchBounds.second; |
3036 | CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, |
3037 | LoopArgs.Chunk}; |
3038 | RT.emitForDispatchInit(CGF&: *this, Loc: S.getBeginLoc(), ScheduleKind, IVSize, |
3039 | IVSigned, Ordered, DispatchValues: DipatchRTInputValues); |
3040 | } else { |
3041 | CGOpenMPRuntime::StaticRTInput StaticInit( |
3042 | IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, |
3043 | LoopArgs.ST, LoopArgs.Chunk); |
3044 | RT.emitForStaticInit(CGF&: *this, Loc: S.getBeginLoc(), DKind: S.getDirectiveKind(), |
3045 | ScheduleKind, Values: StaticInit); |
3046 | } |
3047 | |
3048 | auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, |
3049 | const unsigned IVSize, |
3050 | const bool IVSigned) { |
3051 | if (Ordered) { |
3052 | CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, |
3053 | IVSigned); |
3054 | } |
3055 | }; |
3056 | |
3057 | OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, |
3058 | LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); |
3059 | OuterLoopArgs.IncExpr = S.getInc(); |
3060 | OuterLoopArgs.Init = S.getInit(); |
3061 | OuterLoopArgs.Cond = S.getCond(); |
3062 | OuterLoopArgs.NextLB = S.getNextLowerBound(); |
3063 | OuterLoopArgs.NextUB = S.getNextUpperBound(); |
3064 | OuterLoopArgs.DKind = LoopArgs.DKind; |
3065 | EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, LoopArgs: OuterLoopArgs, |
3066 | CodeGenLoop: emitOMPLoopBodyWithStopPoint, CodeGenOrdered); |
3067 | if (DynamicOrOrdered) { |
3068 | RT.emitForDispatchDeinit(CGF&: *this, Loc: S.getBeginLoc()); |
3069 | } |
3070 | } |
3071 | |
3072 | static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, |
3073 | const unsigned IVSize, const bool IVSigned) {} |
3074 | |
3075 | void CodeGenFunction::EmitOMPDistributeOuterLoop( |
3076 | OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, |
3077 | OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, |
3078 | const CodeGenLoopTy &CodeGenLoopContent) { |
3079 | |
3080 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
3081 | |
3082 | // Emit outer loop. |
3083 | // Same behavior as a OMPForOuterLoop, except that schedule cannot be |
3084 | // dynamic |
3085 | // |
3086 | |
3087 | const Expr *IVExpr = S.getIterationVariable(); |
3088 | const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType()); |
3089 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
3090 | |
3091 | CGOpenMPRuntime::StaticRTInput StaticInit( |
3092 | IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, |
3093 | LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); |
3094 | RT.emitDistributeStaticInit(CGF&: *this, Loc: S.getBeginLoc(), SchedKind: ScheduleKind, Values: StaticInit); |
3095 | |
3096 | // for combined 'distribute' and 'for' the increment expression of distribute |
3097 | // is stored in DistInc. For 'distribute' alone, it is in Inc. |
3098 | Expr *IncExpr; |
3099 | if (isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())) |
3100 | IncExpr = S.getDistInc(); |
3101 | else |
3102 | IncExpr = S.getInc(); |
3103 | |
3104 | // this routine is shared by 'omp distribute parallel for' and |
3105 | // 'omp distribute': select the right EUB expression depending on the |
3106 | // directive |
3107 | OMPLoopArguments OuterLoopArgs; |
3108 | OuterLoopArgs.LB = LoopArgs.LB; |
3109 | OuterLoopArgs.UB = LoopArgs.UB; |
3110 | OuterLoopArgs.ST = LoopArgs.ST; |
3111 | OuterLoopArgs.IL = LoopArgs.IL; |
3112 | OuterLoopArgs.Chunk = LoopArgs.Chunk; |
3113 | OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind()) |
3114 | ? S.getCombinedEnsureUpperBound() |
3115 | : S.getEnsureUpperBound(); |
3116 | OuterLoopArgs.IncExpr = IncExpr; |
3117 | OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind()) |
3118 | ? S.getCombinedInit() |
3119 | : S.getInit(); |
3120 | OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind()) |
3121 | ? S.getCombinedCond() |
3122 | : S.getCond(); |
3123 | OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind()) |
3124 | ? S.getCombinedNextLowerBound() |
3125 | : S.getNextLowerBound(); |
3126 | OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind()) |
3127 | ? S.getCombinedNextUpperBound() |
3128 | : S.getNextUpperBound(); |
3129 | OuterLoopArgs.DKind = OMPD_distribute; |
3130 | |
3131 | EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, |
3132 | LoopScope, LoopArgs: OuterLoopArgs, CodeGenLoop: CodeGenLoopContent, |
3133 | CodeGenOrdered: emitEmptyOrdered); |
3134 | } |
3135 | |
3136 | static std::pair<LValue, LValue> |
3137 | emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, |
3138 | const OMPExecutableDirective &S) { |
3139 | const OMPLoopDirective &LS = cast<OMPLoopDirective>(Val: S); |
3140 | LValue LB = |
3141 | EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getLowerBoundVariable())); |
3142 | LValue UB = |
3143 | EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getUpperBoundVariable())); |
3144 | |
3145 | // When composing 'distribute' with 'for' (e.g. as in 'distribute |
3146 | // parallel for') we need to use the 'distribute' |
3147 | // chunk lower and upper bounds rather than the whole loop iteration |
3148 | // space. These are parameters to the outlined function for 'parallel' |
3149 | // and we copy the bounds of the previous schedule into the |
3150 | // the current ones. |
3151 | LValue PrevLB = CGF.EmitLValue(E: LS.getPrevLowerBoundVariable()); |
3152 | LValue PrevUB = CGF.EmitLValue(E: LS.getPrevUpperBoundVariable()); |
3153 | llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar( |
3154 | lvalue: PrevLB, Loc: LS.getPrevLowerBoundVariable()->getExprLoc()); |
3155 | PrevLBVal = CGF.EmitScalarConversion( |
3156 | Src: PrevLBVal, SrcTy: LS.getPrevLowerBoundVariable()->getType(), |
3157 | DstTy: LS.getIterationVariable()->getType(), |
3158 | Loc: LS.getPrevLowerBoundVariable()->getExprLoc()); |
3159 | llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar( |
3160 | lvalue: PrevUB, Loc: LS.getPrevUpperBoundVariable()->getExprLoc()); |
3161 | PrevUBVal = CGF.EmitScalarConversion( |
3162 | Src: PrevUBVal, SrcTy: LS.getPrevUpperBoundVariable()->getType(), |
3163 | DstTy: LS.getIterationVariable()->getType(), |
3164 | Loc: LS.getPrevUpperBoundVariable()->getExprLoc()); |
3165 | |
3166 | CGF.EmitStoreOfScalar(value: PrevLBVal, lvalue: LB); |
3167 | CGF.EmitStoreOfScalar(value: PrevUBVal, lvalue: UB); |
3168 | |
3169 | return {LB, UB}; |
3170 | } |
3171 | |
3172 | /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then |
3173 | /// we need to use the LB and UB expressions generated by the worksharing |
3174 | /// code generation support, whereas in non combined situations we would |
3175 | /// just emit 0 and the LastIteration expression |
3176 | /// This function is necessary due to the difference of the LB and UB |
3177 | /// types for the RT emission routines for 'for_static_init' and |
3178 | /// 'for_dispatch_init' |
3179 | static std::pair<llvm::Value *, llvm::Value *> |
3180 | emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, |
3181 | const OMPExecutableDirective &S, |
3182 | Address LB, Address UB) { |
3183 | const OMPLoopDirective &LS = cast<OMPLoopDirective>(Val: S); |
3184 | const Expr *IVExpr = LS.getIterationVariable(); |
3185 | // when implementing a dynamic schedule for a 'for' combined with a |
3186 | // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop |
3187 | // is not normalized as each team only executes its own assigned |
3188 | // distribute chunk |
3189 | QualType IteratorTy = IVExpr->getType(); |
3190 | llvm::Value *LBVal = |
3191 | CGF.EmitLoadOfScalar(Addr: LB, /*Volatile=*/false, Ty: IteratorTy, Loc: S.getBeginLoc()); |
3192 | llvm::Value *UBVal = |
3193 | CGF.EmitLoadOfScalar(Addr: UB, /*Volatile=*/false, Ty: IteratorTy, Loc: S.getBeginLoc()); |
3194 | return {LBVal, UBVal}; |
3195 | } |
3196 | |
3197 | static void emitDistributeParallelForDistributeInnerBoundParams( |
3198 | CodeGenFunction &CGF, const OMPExecutableDirective &S, |
3199 | llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { |
3200 | const auto &Dir = cast<OMPLoopDirective>(Val: S); |
3201 | LValue LB = |
3202 | CGF.EmitLValue(E: cast<DeclRefExpr>(Val: Dir.getCombinedLowerBoundVariable())); |
3203 | llvm::Value *LBCast = CGF.Builder.CreateIntCast( |
3204 | V: CGF.Builder.CreateLoad(Addr: LB.getAddress()), DestTy: CGF.SizeTy, /*isSigned=*/false); |
3205 | CapturedVars.push_back(Elt: LBCast); |
3206 | LValue UB = |
3207 | CGF.EmitLValue(E: cast<DeclRefExpr>(Val: Dir.getCombinedUpperBoundVariable())); |
3208 | |
3209 | llvm::Value *UBCast = CGF.Builder.CreateIntCast( |
3210 | V: CGF.Builder.CreateLoad(Addr: UB.getAddress()), DestTy: CGF.SizeTy, /*isSigned=*/false); |
3211 | CapturedVars.push_back(Elt: UBCast); |
3212 | } |
3213 | |
3214 | static void |
3215 | emitInnerParallelForWhenCombined(CodeGenFunction &CGF, |
3216 | const OMPLoopDirective &S, |
3217 | CodeGenFunction::JumpDest LoopExit) { |
3218 | auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, |
3219 | PrePostActionTy &Action) { |
3220 | Action.Enter(CGF); |
3221 | bool HasCancel = false; |
3222 | if (!isOpenMPSimdDirective(DKind: S.getDirectiveKind())) { |
3223 | if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(Val: &S)) |
3224 | HasCancel = D->hasCancel(); |
3225 | else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(Val: &S)) |
3226 | HasCancel = D->hasCancel(); |
3227 | else if (const auto *D = |
3228 | dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(Val: &S)) |
3229 | HasCancel = D->hasCancel(); |
3230 | } |
3231 | CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), |
3232 | HasCancel); |
3233 | CGF.EmitOMPWorksharingLoop(S, EUB: S.getPrevEnsureUpperBound(), |
3234 | CodeGenLoopBounds: emitDistributeParallelForInnerBounds, |
3235 | CGDispatchBounds: emitDistributeParallelForDispatchBounds); |
3236 | }; |
3237 | |
3238 | emitCommonOMPParallelDirective( |
3239 | CGF, S, |
3240 | InnermostKind: isOpenMPSimdDirective(DKind: S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for, |
3241 | CodeGen: CGInlinedWorksharingLoop, |
3242 | CodeGenBoundParameters: emitDistributeParallelForDistributeInnerBoundParams); |
3243 | } |
3244 | |
3245 | void CodeGenFunction::EmitOMPDistributeParallelForDirective( |
3246 | const OMPDistributeParallelForDirective &S) { |
3247 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
3248 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined, |
3249 | IncExpr: S.getDistInc()); |
3250 | }; |
3251 | OMPLexicalScope Scope(*this, S, OMPD_parallel); |
3252 | CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_distribute, CodeGen); |
3253 | } |
3254 | |
3255 | void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( |
3256 | const OMPDistributeParallelForSimdDirective &S) { |
3257 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
3258 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined, |
3259 | IncExpr: S.getDistInc()); |
3260 | }; |
3261 | OMPLexicalScope Scope(*this, S, OMPD_parallel); |
3262 | CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_distribute, CodeGen); |
3263 | } |
3264 | |
3265 | void CodeGenFunction::EmitOMPDistributeSimdDirective( |
3266 | const OMPDistributeSimdDirective &S) { |
3267 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
3268 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc()); |
3269 | }; |
3270 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
3271 | CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_simd, CodeGen); |
3272 | } |
3273 | |
3274 | void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( |
3275 | CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { |
3276 | // Emit SPMD target parallel for region as a standalone region. |
3277 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3278 | emitOMPSimdRegion(CGF, S, Action); |
3279 | }; |
3280 | llvm::Function *Fn; |
3281 | llvm::Constant *Addr; |
3282 | // Emit target region as a standalone region. |
3283 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
3284 | D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen); |
3285 | assert(Fn && Addr && "Target device function emission failed." ); |
3286 | } |
3287 | |
3288 | void CodeGenFunction::EmitOMPTargetSimdDirective( |
3289 | const OMPTargetSimdDirective &S) { |
3290 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3291 | emitOMPSimdRegion(CGF, S, Action); |
3292 | }; |
3293 | emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen); |
3294 | } |
3295 | |
3296 | namespace { |
3297 | struct ScheduleKindModifiersTy { |
3298 | OpenMPScheduleClauseKind Kind; |
3299 | OpenMPScheduleClauseModifier M1; |
3300 | OpenMPScheduleClauseModifier M2; |
3301 | ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, |
3302 | OpenMPScheduleClauseModifier M1, |
3303 | OpenMPScheduleClauseModifier M2) |
3304 | : Kind(Kind), M1(M1), M2(M2) {} |
3305 | }; |
3306 | } // namespace |
3307 | |
3308 | bool CodeGenFunction::EmitOMPWorksharingLoop( |
3309 | const OMPLoopDirective &S, Expr *EUB, |
3310 | const CodeGenLoopBoundsTy &CodeGenLoopBounds, |
3311 | const CodeGenDispatchBoundsTy &CGDispatchBounds) { |
3312 | // Emit the loop iteration variable. |
3313 | const auto *IVExpr = cast<DeclRefExpr>(Val: S.getIterationVariable()); |
3314 | const auto *IVDecl = cast<VarDecl>(Val: IVExpr->getDecl()); |
3315 | EmitVarDecl(D: *IVDecl); |
3316 | |
3317 | // Emit the iterations count variable. |
3318 | // If it is not a variable, Sema decided to calculate iterations count on each |
3319 | // iteration (e.g., it is foldable into a constant). |
3320 | if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) { |
3321 | EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl())); |
3322 | // Emit calculation of the iterations count. |
3323 | EmitIgnoredExpr(E: S.getCalcLastIteration()); |
3324 | } |
3325 | |
3326 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
3327 | |
3328 | bool HasLastprivateClause; |
3329 | // Check pre-condition. |
3330 | { |
3331 | OMPLoopScope PreInitScope(*this, S); |
3332 | // Skip the entire loop if we don't meet the precondition. |
3333 | // If the condition constant folds and can be elided, avoid emitting the |
3334 | // whole loop. |
3335 | bool CondConstant; |
3336 | llvm::BasicBlock *ContBlock = nullptr; |
3337 | if (ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) { |
3338 | if (!CondConstant) |
3339 | return false; |
3340 | } else { |
3341 | llvm::BasicBlock *ThenBlock = createBasicBlock(name: "omp.precond.then" ); |
3342 | ContBlock = createBasicBlock(name: "omp.precond.end" ); |
3343 | emitPreCond(CGF&: *this, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock, |
3344 | TrueCount: getProfileCount(S: &S)); |
3345 | EmitBlock(BB: ThenBlock); |
3346 | incrementProfileCounter(S: &S); |
3347 | } |
3348 | |
3349 | RunCleanupsScope DoacrossCleanupScope(*this); |
3350 | bool Ordered = false; |
3351 | if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { |
3352 | if (OrderedClause->getNumForLoops()) |
3353 | RT.emitDoacrossInit(CGF&: *this, D: S, NumIterations: OrderedClause->getLoopNumIterations()); |
3354 | else |
3355 | Ordered = true; |
3356 | } |
3357 | |
3358 | llvm::DenseSet<const Expr *> EmittedFinals; |
3359 | emitAlignedClause(CGF&: *this, D: S); |
3360 | bool HasLinears = EmitOMPLinearClauseInit(D: S); |
3361 | // Emit helper vars inits. |
3362 | |
3363 | std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); |
3364 | LValue LB = Bounds.first; |
3365 | LValue UB = Bounds.second; |
3366 | LValue ST = |
3367 | EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable())); |
3368 | LValue IL = |
3369 | EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable())); |
3370 | |
3371 | // Emit 'then' code. |
3372 | { |
3373 | OMPPrivateScope LoopScope(*this); |
3374 | if (EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope) || HasLinears) { |
3375 | // Emit implicit barrier to synchronize threads and avoid data races on |
3376 | // initialization of firstprivate variables and post-update of |
3377 | // lastprivate variables. |
3378 | CGM.getOpenMPRuntime().emitBarrierCall( |
3379 | CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false, |
3380 | /*ForceSimpleCall=*/true); |
3381 | } |
3382 | EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope); |
3383 | CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( |
3384 | *this, S, EmitLValue(E: S.getIterationVariable())); |
3385 | HasLastprivateClause = EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope); |
3386 | EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope); |
3387 | EmitOMPPrivateLoopCounters(S, LoopScope); |
3388 | EmitOMPLinearClause(D: S, PrivateScope&: LoopScope); |
3389 | (void)LoopScope.Privatize(); |
3390 | if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind())) |
3391 | CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF&: *this, D: S); |
3392 | |
3393 | // Detect the loop schedule kind and chunk. |
3394 | const Expr *ChunkExpr = nullptr; |
3395 | OpenMPScheduleTy ScheduleKind; |
3396 | if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { |
3397 | ScheduleKind.Schedule = C->getScheduleKind(); |
3398 | ScheduleKind.M1 = C->getFirstScheduleModifier(); |
3399 | ScheduleKind.M2 = C->getSecondScheduleModifier(); |
3400 | ChunkExpr = C->getChunkSize(); |
3401 | } else { |
3402 | // Default behaviour for schedule clause. |
3403 | CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( |
3404 | CGF&: *this, S, ScheduleKind&: ScheduleKind.Schedule, ChunkExpr); |
3405 | } |
3406 | bool HasChunkSizeOne = false; |
3407 | llvm::Value *Chunk = nullptr; |
3408 | if (ChunkExpr) { |
3409 | Chunk = EmitScalarExpr(E: ChunkExpr); |
3410 | Chunk = EmitScalarConversion(Src: Chunk, SrcTy: ChunkExpr->getType(), |
3411 | DstTy: S.getIterationVariable()->getType(), |
3412 | Loc: S.getBeginLoc()); |
3413 | Expr::EvalResult Result; |
3414 | if (ChunkExpr->EvaluateAsInt(Result, Ctx: getContext())) { |
3415 | llvm::APSInt EvaluatedChunk = Result.Val.getInt(); |
3416 | HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1); |
3417 | } |
3418 | } |
3419 | const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType()); |
3420 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
3421 | // OpenMP 4.5, 2.7.1 Loop Construct, Description. |
3422 | // If the static schedule kind is specified or if the ordered clause is |
3423 | // specified, and if no monotonic modifier is specified, the effect will |
3424 | // be as if the monotonic modifier was specified. |
3425 | bool StaticChunkedOne = |
3426 | RT.isStaticChunked(ScheduleKind: ScheduleKind.Schedule, |
3427 | /* Chunked */ Chunk != nullptr) && |
3428 | HasChunkSizeOne && |
3429 | isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind()); |
3430 | bool IsMonotonic = |
3431 | Ordered || |
3432 | (ScheduleKind.Schedule == OMPC_SCHEDULE_static && |
3433 | !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || |
3434 | ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || |
3435 | ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || |
3436 | ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; |
3437 | if ((RT.isStaticNonchunked(ScheduleKind: ScheduleKind.Schedule, |
3438 | /* Chunked */ Chunk != nullptr) || |
3439 | StaticChunkedOne) && |
3440 | !Ordered) { |
3441 | JumpDest LoopExit = |
3442 | getJumpDestInCurrentScope(Target: createBasicBlock(name: "omp.loop.exit" )); |
3443 | emitCommonSimdLoop( |
3444 | CGF&: *this, S, |
3445 | SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
3446 | if (isOpenMPSimdDirective(DKind: S.getDirectiveKind())) { |
3447 | CGF.EmitOMPSimdInit(D: S); |
3448 | } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { |
3449 | if (C->getKind() == OMPC_ORDER_concurrent) |
3450 | CGF.LoopStack.setParallel(/*Enable=*/true); |
3451 | } |
3452 | }, |
3453 | BodyCodeGen: [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk, |
3454 | &S, ScheduleKind, LoopExit, |
3455 | &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
3456 | // OpenMP [2.7.1, Loop Construct, Description, table 2-1] |
3457 | // When no chunk_size is specified, the iteration space is divided |
3458 | // into chunks that are approximately equal in size, and at most |
3459 | // one chunk is distributed to each thread. Note that the size of |
3460 | // the chunks is unspecified in this case. |
3461 | CGOpenMPRuntime::StaticRTInput StaticInit( |
3462 | IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), |
3463 | UB.getAddress(), ST.getAddress(), |
3464 | StaticChunkedOne ? Chunk : nullptr); |
3465 | CGF.CGM.getOpenMPRuntime().emitForStaticInit( |
3466 | CGF, Loc: S.getBeginLoc(), DKind: S.getDirectiveKind(), ScheduleKind, |
3467 | Values: StaticInit); |
3468 | // UB = min(UB, GlobalUB); |
3469 | if (!StaticChunkedOne) |
3470 | CGF.EmitIgnoredExpr(E: S.getEnsureUpperBound()); |
3471 | // IV = LB; |
3472 | CGF.EmitIgnoredExpr(E: S.getInit()); |
3473 | // For unchunked static schedule generate: |
3474 | // |
3475 | // while (idx <= UB) { |
3476 | // BODY; |
3477 | // ++idx; |
3478 | // } |
3479 | // |
3480 | // For static schedule with chunk one: |
3481 | // |
3482 | // while (IV <= PrevUB) { |
3483 | // BODY; |
3484 | // IV += ST; |
3485 | // } |
3486 | CGF.EmitOMPInnerLoop( |
3487 | S, RequiresCleanup: LoopScope.requiresCleanups(), |
3488 | LoopCond: StaticChunkedOne ? S.getCombinedParForInDistCond() |
3489 | : S.getCond(), |
3490 | IncExpr: StaticChunkedOne ? S.getDistInc() : S.getInc(), |
3491 | BodyGen: [&S, LoopExit](CodeGenFunction &CGF) { |
3492 | emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit); |
3493 | }, |
3494 | PostIncGen: [](CodeGenFunction &) {}); |
3495 | }); |
3496 | EmitBlock(BB: LoopExit.getBlock()); |
3497 | // Tell the runtime we are done. |
3498 | auto &&CodeGen = [&S](CodeGenFunction &CGF) { |
3499 | CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, Loc: S.getEndLoc(), |
3500 | DKind: OMPD_for); |
3501 | }; |
3502 | OMPCancelStack.emitExit(CGF&: *this, Kind: S.getDirectiveKind(), CodeGen); |
3503 | } else { |
3504 | // Emit the outer loop, which requests its work chunk [LB..UB] from |
3505 | // runtime and runs the inner loop to process it. |
3506 | OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), |
3507 | ST.getAddress(), IL.getAddress(), Chunk, |
3508 | EUB); |
3509 | LoopArguments.DKind = OMPD_for; |
3510 | EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, |
3511 | LoopArgs: LoopArguments, CGDispatchBounds); |
3512 | } |
3513 | if (isOpenMPSimdDirective(DKind: S.getDirectiveKind())) { |
3514 | EmitOMPSimdFinal(D: S, CondGen: [IL, &S](CodeGenFunction &CGF) { |
3515 | return CGF.Builder.CreateIsNotNull( |
3516 | Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())); |
3517 | }); |
3518 | } |
3519 | EmitOMPReductionClauseFinal( |
3520 | D: S, /*ReductionKind=*/isOpenMPSimdDirective(DKind: S.getDirectiveKind()) |
3521 | ? /*Parallel and Simd*/ OMPD_parallel_for_simd |
3522 | : /*Parallel only*/ OMPD_parallel); |
3523 | // Emit post-update of the reduction variables if IsLastIter != 0. |
3524 | emitPostUpdateForReductionClause( |
3525 | CGF&: *this, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) { |
3526 | return CGF.Builder.CreateIsNotNull( |
3527 | Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())); |
3528 | }); |
3529 | // Emit final copy of the lastprivate variables if IsLastIter != 0. |
3530 | if (HasLastprivateClause) |
3531 | EmitOMPLastprivateClauseFinal( |
3532 | D: S, NoFinals: isOpenMPSimdDirective(DKind: S.getDirectiveKind()), |
3533 | IsLastIterCond: Builder.CreateIsNotNull(Arg: EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()))); |
3534 | LoopScope.restoreMap(); |
3535 | EmitOMPLinearClauseFinal(D: S, CondGen: [IL, &S](CodeGenFunction &CGF) { |
3536 | return CGF.Builder.CreateIsNotNull( |
3537 | Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())); |
3538 | }); |
3539 | } |
3540 | DoacrossCleanupScope.ForceCleanup(); |
3541 | // We're now done with the loop, so jump to the continuation block. |
3542 | if (ContBlock) { |
3543 | EmitBranch(Block: ContBlock); |
3544 | EmitBlock(BB: ContBlock, /*IsFinished=*/true); |
3545 | } |
3546 | } |
3547 | return HasLastprivateClause; |
3548 | } |
3549 | |
3550 | /// The following two functions generate expressions for the loop lower |
3551 | /// and upper bounds in case of static and dynamic (dispatch) schedule |
3552 | /// of the associated 'for' or 'distribute' loop. |
3553 | static std::pair<LValue, LValue> |
3554 | emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
3555 | const auto &LS = cast<OMPLoopDirective>(Val: S); |
3556 | LValue LB = |
3557 | EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getLowerBoundVariable())); |
3558 | LValue UB = |
3559 | EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getUpperBoundVariable())); |
3560 | return {LB, UB}; |
3561 | } |
3562 | |
3563 | /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not |
3564 | /// consider the lower and upper bound expressions generated by the |
3565 | /// worksharing loop support, but we use 0 and the iteration space size as |
3566 | /// constants |
3567 | static std::pair<llvm::Value *, llvm::Value *> |
3568 | emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, |
3569 | Address LB, Address UB) { |
3570 | const auto &LS = cast<OMPLoopDirective>(Val: S); |
3571 | const Expr *IVExpr = LS.getIterationVariable(); |
3572 | const unsigned IVSize = CGF.getContext().getTypeSize(T: IVExpr->getType()); |
3573 | llvm::Value *LBVal = CGF.Builder.getIntN(N: IVSize, C: 0); |
3574 | llvm::Value *UBVal = CGF.EmitScalarExpr(E: LS.getLastIteration()); |
3575 | return {LBVal, UBVal}; |
3576 | } |
3577 | |
3578 | /// Emits internal temp array declarations for the directive with inscan |
3579 | /// reductions. |
3580 | /// The code is the following: |
3581 | /// \code |
3582 | /// size num_iters = <num_iters>; |
3583 | /// <type> buffer[num_iters]; |
3584 | /// \endcode |
3585 | static void emitScanBasedDirectiveDecls( |
3586 | CodeGenFunction &CGF, const OMPLoopDirective &S, |
3587 | llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) { |
3588 | llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( |
3589 | V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false); |
3590 | SmallVector<const Expr *, 4> Shareds; |
3591 | SmallVector<const Expr *, 4> Privates; |
3592 | SmallVector<const Expr *, 4> ReductionOps; |
3593 | SmallVector<const Expr *, 4> CopyArrayTemps; |
3594 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
3595 | assert(C->getModifier() == OMPC_REDUCTION_inscan && |
3596 | "Only inscan reductions are expected." ); |
3597 | Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
3598 | Privates.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
3599 | ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end()); |
3600 | CopyArrayTemps.append(in_start: C->copy_array_temps().begin(), |
3601 | in_end: C->copy_array_temps().end()); |
3602 | } |
3603 | { |
3604 | // Emit buffers for each reduction variables. |
3605 | // ReductionCodeGen is required to emit correctly the code for array |
3606 | // reductions. |
3607 | ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); |
3608 | unsigned Count = 0; |
3609 | auto *ITA = CopyArrayTemps.begin(); |
3610 | for (const Expr *IRef : Privates) { |
3611 | const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IRef)->getDecl()); |
3612 | // Emit variably modified arrays, used for arrays/array sections |
3613 | // reductions. |
3614 | if (PrivateVD->getType()->isVariablyModifiedType()) { |
3615 | RedCG.emitSharedOrigLValue(CGF, N: Count); |
3616 | RedCG.emitAggregateType(CGF, N: Count); |
3617 | } |
3618 | CodeGenFunction::OpaqueValueMapping DimMapping( |
3619 | CGF, |
3620 | cast<OpaqueValueExpr>( |
3621 | Val: cast<VariableArrayType>(Val: (*ITA)->getType()->getAsArrayTypeUnsafe()) |
3622 | ->getSizeExpr()), |
3623 | RValue::get(V: OMPScanNumIterations)); |
3624 | // Emit temp buffer. |
3625 | CGF.EmitVarDecl(D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ITA)->getDecl())); |
3626 | ++ITA; |
3627 | ++Count; |
3628 | } |
3629 | } |
3630 | } |
3631 | |
3632 | /// Copies final inscan reductions values to the original variables. |
3633 | /// The code is the following: |
3634 | /// \code |
3635 | /// <orig_var> = buffer[num_iters-1]; |
3636 | /// \endcode |
3637 | static void emitScanBasedDirectiveFinals( |
3638 | CodeGenFunction &CGF, const OMPLoopDirective &S, |
3639 | llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) { |
3640 | llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( |
3641 | V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false); |
3642 | SmallVector<const Expr *, 4> Shareds; |
3643 | SmallVector<const Expr *, 4> LHSs; |
3644 | SmallVector<const Expr *, 4> RHSs; |
3645 | SmallVector<const Expr *, 4> Privates; |
3646 | SmallVector<const Expr *, 4> CopyOps; |
3647 | SmallVector<const Expr *, 4> CopyArrayElems; |
3648 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
3649 | assert(C->getModifier() == OMPC_REDUCTION_inscan && |
3650 | "Only inscan reductions are expected." ); |
3651 | Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
3652 | LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end()); |
3653 | RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end()); |
3654 | Privates.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
3655 | CopyOps.append(in_start: C->copy_ops().begin(), in_end: C->copy_ops().end()); |
3656 | CopyArrayElems.append(in_start: C->copy_array_elems().begin(), |
3657 | in_end: C->copy_array_elems().end()); |
3658 | } |
3659 | // Create temp var and copy LHS value to this temp value. |
3660 | // LHS = TMP[LastIter]; |
3661 | llvm::Value *OMPLast = CGF.Builder.CreateNSWSub( |
3662 | LHS: OMPScanNumIterations, |
3663 | RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1, /*isSigned=*/IsSigned: false)); |
3664 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
3665 | const Expr *PrivateExpr = Privates[I]; |
3666 | const Expr *OrigExpr = Shareds[I]; |
3667 | const Expr *CopyArrayElem = CopyArrayElems[I]; |
3668 | CodeGenFunction::OpaqueValueMapping IdxMapping( |
3669 | CGF, |
3670 | cast<OpaqueValueExpr>( |
3671 | Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()), |
3672 | RValue::get(V: OMPLast)); |
3673 | LValue DestLVal = CGF.EmitLValue(E: OrigExpr); |
3674 | LValue SrcLVal = CGF.EmitLValue(E: CopyArrayElem); |
3675 | CGF.EmitOMPCopy( |
3676 | OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(), |
3677 | DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()), |
3678 | SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]); |
3679 | } |
3680 | } |
3681 | |
3682 | /// Emits the code for the directive with inscan reductions. |
3683 | /// The code is the following: |
3684 | /// \code |
3685 | /// #pragma omp ... |
3686 | /// for (i: 0..<num_iters>) { |
3687 | /// <input phase>; |
3688 | /// buffer[i] = red; |
3689 | /// } |
3690 | /// #pragma omp master // in parallel region |
3691 | /// for (int k = 0; k != ceil(log2(num_iters)); ++k) |
3692 | /// for (size cnt = last_iter; cnt >= pow(2, k); --k) |
3693 | /// buffer[i] op= buffer[i-pow(2,k)]; |
3694 | /// #pragma omp barrier // in parallel region |
3695 | /// #pragma omp ... |
3696 | /// for (0..<num_iters>) { |
3697 | /// red = InclusiveScan ? buffer[i] : buffer[i-1]; |
3698 | /// <scan phase>; |
3699 | /// } |
3700 | /// \endcode |
3701 | static void emitScanBasedDirective( |
3702 | CodeGenFunction &CGF, const OMPLoopDirective &S, |
3703 | llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen, |
3704 | llvm::function_ref<void(CodeGenFunction &)> FirstGen, |
3705 | llvm::function_ref<void(CodeGenFunction &)> SecondGen) { |
3706 | llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( |
3707 | V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false); |
3708 | SmallVector<const Expr *, 4> Privates; |
3709 | SmallVector<const Expr *, 4> ReductionOps; |
3710 | SmallVector<const Expr *, 4> LHSs; |
3711 | SmallVector<const Expr *, 4> RHSs; |
3712 | SmallVector<const Expr *, 4> CopyArrayElems; |
3713 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
3714 | assert(C->getModifier() == OMPC_REDUCTION_inscan && |
3715 | "Only inscan reductions are expected." ); |
3716 | Privates.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
3717 | ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end()); |
3718 | LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end()); |
3719 | RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end()); |
3720 | CopyArrayElems.append(in_start: C->copy_array_elems().begin(), |
3721 | in_end: C->copy_array_elems().end()); |
3722 | } |
3723 | CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S); |
3724 | { |
3725 | // Emit loop with input phase: |
3726 | // #pragma omp ... |
3727 | // for (i: 0..<num_iters>) { |
3728 | // <input phase>; |
3729 | // buffer[i] = red; |
3730 | // } |
3731 | CGF.OMPFirstScanLoop = true; |
3732 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
3733 | FirstGen(CGF); |
3734 | } |
3735 | // #pragma omp barrier // in parallel region |
3736 | auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems, |
3737 | &ReductionOps, |
3738 | &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3739 | Action.Enter(CGF); |
3740 | // Emit prefix reduction: |
3741 | // #pragma omp master // in parallel region |
3742 | // for (int k = 0; k <= ceil(log2(n)); ++k) |
3743 | llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock(); |
3744 | llvm::BasicBlock *LoopBB = CGF.createBasicBlock(name: "omp.outer.log.scan.body" ); |
3745 | llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: "omp.outer.log.scan.exit" ); |
3746 | llvm::Function *F = |
3747 | CGF.CGM.getIntrinsic(IID: llvm::Intrinsic::log2, Tys: CGF.DoubleTy); |
3748 | llvm::Value *Arg = |
3749 | CGF.Builder.CreateUIToFP(V: OMPScanNumIterations, DestTy: CGF.DoubleTy); |
3750 | llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(callee: F, args: Arg); |
3751 | F = CGF.CGM.getIntrinsic(IID: llvm::Intrinsic::ceil, Tys: CGF.DoubleTy); |
3752 | LogVal = CGF.EmitNounwindRuntimeCall(callee: F, args: LogVal); |
3753 | LogVal = CGF.Builder.CreateFPToUI(V: LogVal, DestTy: CGF.IntTy); |
3754 | llvm::Value *NMin1 = CGF.Builder.CreateNUWSub( |
3755 | LHS: OMPScanNumIterations, RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1)); |
3756 | auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: S.getBeginLoc()); |
3757 | CGF.EmitBlock(BB: LoopBB); |
3758 | auto *Counter = CGF.Builder.CreatePHI(Ty: CGF.IntTy, NumReservedValues: 2); |
3759 | // size pow2k = 1; |
3760 | auto *Pow2K = CGF.Builder.CreatePHI(Ty: CGF.SizeTy, NumReservedValues: 2); |
3761 | Counter->addIncoming(V: llvm::ConstantInt::get(Ty: CGF.IntTy, V: 0), BB: InputBB); |
3762 | Pow2K->addIncoming(V: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1), BB: InputBB); |
3763 | // for (size i = n - 1; i >= 2 ^ k; --i) |
3764 | // tmp[i] op= tmp[i-pow2k]; |
3765 | llvm::BasicBlock *InnerLoopBB = |
3766 | CGF.createBasicBlock(name: "omp.inner.log.scan.body" ); |
3767 | llvm::BasicBlock *InnerExitBB = |
3768 | CGF.createBasicBlock(name: "omp.inner.log.scan.exit" ); |
3769 | llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(LHS: NMin1, RHS: Pow2K); |
3770 | CGF.Builder.CreateCondBr(Cond: CmpI, True: InnerLoopBB, False: InnerExitBB); |
3771 | CGF.EmitBlock(BB: InnerLoopBB); |
3772 | auto *IVal = CGF.Builder.CreatePHI(Ty: CGF.SizeTy, NumReservedValues: 2); |
3773 | IVal->addIncoming(V: NMin1, BB: LoopBB); |
3774 | { |
3775 | CodeGenFunction::OMPPrivateScope PrivScope(CGF); |
3776 | auto *ILHS = LHSs.begin(); |
3777 | auto *IRHS = RHSs.begin(); |
3778 | for (const Expr *CopyArrayElem : CopyArrayElems) { |
3779 | const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl()); |
3780 | const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl()); |
3781 | Address LHSAddr = Address::invalid(); |
3782 | { |
3783 | CodeGenFunction::OpaqueValueMapping IdxMapping( |
3784 | CGF, |
3785 | cast<OpaqueValueExpr>( |
3786 | Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()), |
3787 | RValue::get(V: IVal)); |
3788 | LHSAddr = CGF.EmitLValue(E: CopyArrayElem).getAddress(); |
3789 | } |
3790 | PrivScope.addPrivate(LocalVD: LHSVD, Addr: LHSAddr); |
3791 | Address RHSAddr = Address::invalid(); |
3792 | { |
3793 | llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(LHS: IVal, RHS: Pow2K); |
3794 | CodeGenFunction::OpaqueValueMapping IdxMapping( |
3795 | CGF, |
3796 | cast<OpaqueValueExpr>( |
3797 | Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()), |
3798 | RValue::get(V: OffsetIVal)); |
3799 | RHSAddr = CGF.EmitLValue(E: CopyArrayElem).getAddress(); |
3800 | } |
3801 | PrivScope.addPrivate(LocalVD: RHSVD, Addr: RHSAddr); |
3802 | ++ILHS; |
3803 | ++IRHS; |
3804 | } |
3805 | PrivScope.Privatize(); |
3806 | CGF.CGM.getOpenMPRuntime().emitReduction( |
3807 | CGF, Loc: S.getEndLoc(), Privates, LHSExprs: LHSs, RHSExprs: RHSs, ReductionOps, |
3808 | Options: {/*WithNowait=*/true, /*SimpleReduction=*/true, .ReductionKind: OMPD_unknown}); |
3809 | } |
3810 | llvm::Value *NextIVal = |
3811 | CGF.Builder.CreateNUWSub(LHS: IVal, RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1)); |
3812 | IVal->addIncoming(V: NextIVal, BB: CGF.Builder.GetInsertBlock()); |
3813 | CmpI = CGF.Builder.CreateICmpUGE(LHS: NextIVal, RHS: Pow2K); |
3814 | CGF.Builder.CreateCondBr(Cond: CmpI, True: InnerLoopBB, False: InnerExitBB); |
3815 | CGF.EmitBlock(BB: InnerExitBB); |
3816 | llvm::Value *Next = |
3817 | CGF.Builder.CreateNUWAdd(LHS: Counter, RHS: llvm::ConstantInt::get(Ty: CGF.IntTy, V: 1)); |
3818 | Counter->addIncoming(V: Next, BB: CGF.Builder.GetInsertBlock()); |
3819 | // pow2k <<= 1; |
3820 | llvm::Value *NextPow2K = |
3821 | CGF.Builder.CreateShl(LHS: Pow2K, RHS: 1, Name: "" , /*HasNUW=*/true); |
3822 | Pow2K->addIncoming(V: NextPow2K, BB: CGF.Builder.GetInsertBlock()); |
3823 | llvm::Value *Cmp = CGF.Builder.CreateICmpNE(LHS: Next, RHS: LogVal); |
3824 | CGF.Builder.CreateCondBr(Cond: Cmp, True: LoopBB, False: ExitBB); |
3825 | auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: S.getEndLoc()); |
3826 | CGF.EmitBlock(BB: ExitBB); |
3827 | }; |
3828 | if (isOpenMPParallelDirective(DKind: S.getDirectiveKind())) { |
3829 | CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: CodeGen, Loc: S.getBeginLoc()); |
3830 | CGF.CGM.getOpenMPRuntime().emitBarrierCall( |
3831 | CGF, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false, |
3832 | /*ForceSimpleCall=*/true); |
3833 | } else { |
3834 | RegionCodeGenTy RCG(CodeGen); |
3835 | RCG(CGF); |
3836 | } |
3837 | |
3838 | CGF.OMPFirstScanLoop = false; |
3839 | SecondGen(CGF); |
3840 | } |
3841 | |
3842 | static bool emitWorksharingDirective(CodeGenFunction &CGF, |
3843 | const OMPLoopDirective &S, |
3844 | bool HasCancel) { |
3845 | bool HasLastprivates; |
3846 | if (llvm::any_of(Range: S.getClausesOfKind<OMPReductionClause>(), |
3847 | P: [](const OMPReductionClause *C) { |
3848 | return C->getModifier() == OMPC_REDUCTION_inscan; |
3849 | })) { |
3850 | const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { |
3851 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
3852 | OMPLoopScope LoopScope(CGF, S); |
3853 | return CGF.EmitScalarExpr(E: S.getNumIterations()); |
3854 | }; |
3855 | const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) { |
3856 | CodeGenFunction::OMPCancelStackRAII CancelRegion( |
3857 | CGF, S.getDirectiveKind(), HasCancel); |
3858 | (void)CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), |
3859 | CodeGenLoopBounds: emitForLoopBounds, |
3860 | CGDispatchBounds: emitDispatchForLoopBounds); |
3861 | // Emit an implicit barrier at the end. |
3862 | CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc: S.getBeginLoc(), |
3863 | Kind: OMPD_for); |
3864 | }; |
3865 | const auto &&SecondGen = [&S, HasCancel, |
3866 | &HasLastprivates](CodeGenFunction &CGF) { |
3867 | CodeGenFunction::OMPCancelStackRAII CancelRegion( |
3868 | CGF, S.getDirectiveKind(), HasCancel); |
3869 | HasLastprivates = CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), |
3870 | CodeGenLoopBounds: emitForLoopBounds, |
3871 | CGDispatchBounds: emitDispatchForLoopBounds); |
3872 | }; |
3873 | if (!isOpenMPParallelDirective(DKind: S.getDirectiveKind())) |
3874 | emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen); |
3875 | emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen); |
3876 | if (!isOpenMPParallelDirective(DKind: S.getDirectiveKind())) |
3877 | emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen); |
3878 | } else { |
3879 | CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), |
3880 | HasCancel); |
3881 | HasLastprivates = CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), |
3882 | CodeGenLoopBounds: emitForLoopBounds, |
3883 | CGDispatchBounds: emitDispatchForLoopBounds); |
3884 | } |
3885 | return HasLastprivates; |
3886 | } |
3887 | |
3888 | static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) { |
3889 | if (S.hasCancel()) |
3890 | return false; |
3891 | for (OMPClause *C : S.clauses()) { |
3892 | if (isa<OMPNowaitClause>(Val: C)) |
3893 | continue; |
3894 | |
3895 | if (auto *SC = dyn_cast<OMPScheduleClause>(Val: C)) { |
3896 | if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown) |
3897 | return false; |
3898 | if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown) |
3899 | return false; |
3900 | switch (SC->getScheduleKind()) { |
3901 | case OMPC_SCHEDULE_auto: |
3902 | case OMPC_SCHEDULE_dynamic: |
3903 | case OMPC_SCHEDULE_runtime: |
3904 | case OMPC_SCHEDULE_guided: |
3905 | case OMPC_SCHEDULE_static: |
3906 | continue; |
3907 | case OMPC_SCHEDULE_unknown: |
3908 | return false; |
3909 | } |
3910 | } |
3911 | |
3912 | return false; |
3913 | } |
3914 | |
3915 | return true; |
3916 | } |
3917 | |
3918 | static llvm::omp::ScheduleKind |
3919 | convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) { |
3920 | switch (ScheduleClauseKind) { |
3921 | case OMPC_SCHEDULE_unknown: |
3922 | return llvm::omp::OMP_SCHEDULE_Default; |
3923 | case OMPC_SCHEDULE_auto: |
3924 | return llvm::omp::OMP_SCHEDULE_Auto; |
3925 | case OMPC_SCHEDULE_dynamic: |
3926 | return llvm::omp::OMP_SCHEDULE_Dynamic; |
3927 | case OMPC_SCHEDULE_guided: |
3928 | return llvm::omp::OMP_SCHEDULE_Guided; |
3929 | case OMPC_SCHEDULE_runtime: |
3930 | return llvm::omp::OMP_SCHEDULE_Runtime; |
3931 | case OMPC_SCHEDULE_static: |
3932 | return llvm::omp::OMP_SCHEDULE_Static; |
3933 | } |
3934 | llvm_unreachable("Unhandled schedule kind" ); |
3935 | } |
3936 | |
3937 | void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { |
3938 | bool HasLastprivates = false; |
3939 | bool UseOMPIRBuilder = |
3940 | CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S); |
3941 | auto &&CodeGen = [this, &S, &HasLastprivates, |
3942 | UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) { |
3943 | // Use the OpenMPIRBuilder if enabled. |
3944 | if (UseOMPIRBuilder) { |
3945 | bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>(); |
3946 | |
3947 | llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default; |
3948 | llvm::Value *ChunkSize = nullptr; |
3949 | if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) { |
3950 | SchedKind = |
3951 | convertClauseKindToSchedKind(ScheduleClauseKind: SchedClause->getScheduleKind()); |
3952 | if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize()) |
3953 | ChunkSize = EmitScalarExpr(E: ChunkSizeExpr); |
3954 | } |
3955 | |
3956 | // Emit the associated statement and get its loop representation. |
3957 | const Stmt *Inner = S.getRawStmt(); |
3958 | llvm::CanonicalLoopInfo *CLI = |
3959 | EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1); |
3960 | |
3961 | llvm::OpenMPIRBuilder &OMPBuilder = |
3962 | CGM.getOpenMPRuntime().getOMPBuilder(); |
3963 | llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( |
3964 | AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); |
3965 | OMPBuilder.applyWorkshareLoop( |
3966 | DL: Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier, |
3967 | SchedKind, ChunkSize, /*HasSimdModifier=*/false, |
3968 | /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false, |
3969 | /*HasOrderedClause=*/false); |
3970 | return; |
3971 | } |
3972 | |
3973 | HasLastprivates = emitWorksharingDirective(CGF, S, HasCancel: S.hasCancel()); |
3974 | }; |
3975 | { |
3976 | auto LPCRegion = |
3977 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
3978 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
3979 | CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_for, CodeGen, |
3980 | HasCancel: S.hasCancel()); |
3981 | } |
3982 | |
3983 | if (!UseOMPIRBuilder) { |
3984 | // Emit an implicit barrier at the end. |
3985 | if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) |
3986 | CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_for); |
3987 | } |
3988 | // Check for outer lastprivate conditional update. |
3989 | checkForLastprivateConditionalUpdate(CGF&: *this, S); |
3990 | } |
3991 | |
3992 | void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { |
3993 | bool HasLastprivates = false; |
3994 | auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, |
3995 | PrePostActionTy &) { |
3996 | HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false); |
3997 | }; |
3998 | { |
3999 | auto LPCRegion = |
4000 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
4001 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
4002 | CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_simd, CodeGen); |
4003 | } |
4004 | |
4005 | // Emit an implicit barrier at the end. |
4006 | if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) |
4007 | CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_for); |
4008 | // Check for outer lastprivate conditional update. |
4009 | checkForLastprivateConditionalUpdate(CGF&: *this, S); |
4010 | } |
4011 | |
4012 | static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, |
4013 | const Twine &Name, |
4014 | llvm::Value *Init = nullptr) { |
4015 | LValue LVal = CGF.MakeAddrLValue(Addr: CGF.CreateMemTemp(T: Ty, Name), T: Ty); |
4016 | if (Init) |
4017 | CGF.EmitStoreThroughLValue(Src: RValue::get(V: Init), Dst: LVal, /*isInit*/ true); |
4018 | return LVal; |
4019 | } |
4020 | |
4021 | void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { |
4022 | const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); |
4023 | const auto *CS = dyn_cast<CompoundStmt>(Val: CapturedStmt); |
4024 | bool HasLastprivates = false; |
4025 | auto &&CodeGen = [&S, CapturedStmt, CS, |
4026 | &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { |
4027 | const ASTContext &C = CGF.getContext(); |
4028 | QualType KmpInt32Ty = |
4029 | C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); |
4030 | // Emit helper vars inits. |
4031 | LValue LB = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.lb." , |
4032 | Init: CGF.Builder.getInt32(C: 0)); |
4033 | llvm::ConstantInt *GlobalUBVal = CS != nullptr |
4034 | ? CGF.Builder.getInt32(C: CS->size() - 1) |
4035 | : CGF.Builder.getInt32(C: 0); |
4036 | LValue UB = |
4037 | createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.ub." , Init: GlobalUBVal); |
4038 | LValue ST = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.st." , |
4039 | Init: CGF.Builder.getInt32(C: 1)); |
4040 | LValue IL = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.il." , |
4041 | Init: CGF.Builder.getInt32(C: 0)); |
4042 | // Loop counter. |
4043 | LValue IV = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.iv." ); |
4044 | OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); |
4045 | CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); |
4046 | OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); |
4047 | CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); |
4048 | // Generate condition for loop. |
4049 | BinaryOperator *Cond = BinaryOperator::Create( |
4050 | C, lhs: &IVRefExpr, rhs: &UBRefExpr, opc: BO_LE, ResTy: C.BoolTy, VK: VK_PRValue, OK: OK_Ordinary, |
4051 | opLoc: S.getBeginLoc(), FPFeatures: FPOptionsOverride()); |
4052 | // Increment for loop counter. |
4053 | UnaryOperator *Inc = UnaryOperator::Create( |
4054 | C, input: &IVRefExpr, opc: UO_PreInc, type: KmpInt32Ty, VK: VK_PRValue, OK: OK_Ordinary, |
4055 | l: S.getBeginLoc(), CanOverflow: true, FPFeatures: FPOptionsOverride()); |
4056 | auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { |
4057 | // Iterate through all sections and emit a switch construct: |
4058 | // switch (IV) { |
4059 | // case 0: |
4060 | // <SectionStmt[0]>; |
4061 | // break; |
4062 | // ... |
4063 | // case <NumSection> - 1: |
4064 | // <SectionStmt[<NumSection> - 1]>; |
4065 | // break; |
4066 | // } |
4067 | // .omp.sections.exit: |
4068 | llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".omp.sections.exit" ); |
4069 | llvm::SwitchInst *SwitchStmt = |
4070 | CGF.Builder.CreateSwitch(V: CGF.EmitLoadOfScalar(lvalue: IV, Loc: S.getBeginLoc()), |
4071 | Dest: ExitBB, NumCases: CS == nullptr ? 1 : CS->size()); |
4072 | if (CS) { |
4073 | unsigned CaseNumber = 0; |
4074 | for (const Stmt *SubStmt : CS->children()) { |
4075 | auto CaseBB = CGF.createBasicBlock(name: ".omp.sections.case" ); |
4076 | CGF.EmitBlock(BB: CaseBB); |
4077 | SwitchStmt->addCase(OnVal: CGF.Builder.getInt32(C: CaseNumber), Dest: CaseBB); |
4078 | CGF.EmitStmt(S: SubStmt); |
4079 | CGF.EmitBranch(Block: ExitBB); |
4080 | ++CaseNumber; |
4081 | } |
4082 | } else { |
4083 | llvm::BasicBlock *CaseBB = CGF.createBasicBlock(name: ".omp.sections.case" ); |
4084 | CGF.EmitBlock(BB: CaseBB); |
4085 | SwitchStmt->addCase(OnVal: CGF.Builder.getInt32(C: 0), Dest: CaseBB); |
4086 | CGF.EmitStmt(S: CapturedStmt); |
4087 | CGF.EmitBranch(Block: ExitBB); |
4088 | } |
4089 | CGF.EmitBlock(BB: ExitBB, /*IsFinished=*/true); |
4090 | }; |
4091 | |
4092 | CodeGenFunction::OMPPrivateScope LoopScope(CGF); |
4093 | if (CGF.EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope)) { |
4094 | // Emit implicit barrier to synchronize threads and avoid data races on |
4095 | // initialization of firstprivate variables and post-update of lastprivate |
4096 | // variables. |
4097 | CGF.CGM.getOpenMPRuntime().emitBarrierCall( |
4098 | CGF, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false, |
4099 | /*ForceSimpleCall=*/true); |
4100 | } |
4101 | CGF.EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope); |
4102 | CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV); |
4103 | HasLastprivates = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope); |
4104 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope); |
4105 | (void)LoopScope.Privatize(); |
4106 | if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind())) |
4107 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S); |
4108 | |
4109 | // Emit static non-chunked loop. |
4110 | OpenMPScheduleTy ScheduleKind; |
4111 | ScheduleKind.Schedule = OMPC_SCHEDULE_static; |
4112 | CGOpenMPRuntime::StaticRTInput StaticInit( |
4113 | /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), |
4114 | LB.getAddress(), UB.getAddress(), ST.getAddress()); |
4115 | CGF.CGM.getOpenMPRuntime().emitForStaticInit( |
4116 | CGF, Loc: S.getBeginLoc(), DKind: S.getDirectiveKind(), ScheduleKind, Values: StaticInit); |
4117 | // UB = min(UB, GlobalUB); |
4118 | llvm::Value *UBVal = CGF.EmitLoadOfScalar(lvalue: UB, Loc: S.getBeginLoc()); |
4119 | llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect( |
4120 | C: CGF.Builder.CreateICmpSLT(LHS: UBVal, RHS: GlobalUBVal), True: UBVal, False: GlobalUBVal); |
4121 | CGF.EmitStoreOfScalar(value: MinUBGlobalUB, lvalue: UB); |
4122 | // IV = LB; |
4123 | CGF.EmitStoreOfScalar(value: CGF.EmitLoadOfScalar(lvalue: LB, Loc: S.getBeginLoc()), lvalue: IV); |
4124 | // while (idx <= UB) { BODY; ++idx; } |
4125 | CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, LoopCond: Cond, IncExpr: Inc, BodyGen, |
4126 | PostIncGen: [](CodeGenFunction &) {}); |
4127 | // Tell the runtime we are done. |
4128 | auto &&CodeGen = [&S](CodeGenFunction &CGF) { |
4129 | CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, Loc: S.getEndLoc(), |
4130 | DKind: OMPD_sections); |
4131 | }; |
4132 | CGF.OMPCancelStack.emitExit(CGF, Kind: S.getDirectiveKind(), CodeGen); |
4133 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel); |
4134 | // Emit post-update of the reduction variables if IsLastIter != 0. |
4135 | emitPostUpdateForReductionClause(CGF, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) { |
4136 | return CGF.Builder.CreateIsNotNull( |
4137 | Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())); |
4138 | }); |
4139 | |
4140 | // Emit final copy of the lastprivate variables if IsLastIter != 0. |
4141 | if (HasLastprivates) |
4142 | CGF.EmitOMPLastprivateClauseFinal( |
4143 | D: S, /*NoFinals=*/false, |
4144 | IsLastIterCond: CGF.Builder.CreateIsNotNull( |
4145 | Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()))); |
4146 | }; |
4147 | |
4148 | bool HasCancel = false; |
4149 | if (auto *OSD = dyn_cast<OMPSectionsDirective>(Val: &S)) |
4150 | HasCancel = OSD->hasCancel(); |
4151 | else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(Val: &S)) |
4152 | HasCancel = OPSD->hasCancel(); |
4153 | OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); |
4154 | CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_sections, CodeGen, |
4155 | HasCancel); |
4156 | // Emit barrier for lastprivates only if 'sections' directive has 'nowait' |
4157 | // clause. Otherwise the barrier will be generated by the codegen for the |
4158 | // directive. |
4159 | if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { |
4160 | // Emit implicit barrier to synchronize threads and avoid data races on |
4161 | // initialization of firstprivate variables. |
4162 | CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), |
4163 | Kind: OMPD_unknown); |
4164 | } |
4165 | } |
4166 | |
4167 | void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { |
4168 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4169 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4170 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4171 | using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; |
4172 | |
4173 | auto FiniCB = [this](InsertPointTy IP) { |
4174 | OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP); |
4175 | }; |
4176 | |
4177 | const CapturedStmt *ICS = S.getInnermostCapturedStmt(); |
4178 | const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); |
4179 | const auto *CS = dyn_cast<CompoundStmt>(Val: CapturedStmt); |
4180 | llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; |
4181 | if (CS) { |
4182 | for (const Stmt *SubStmt : CS->children()) { |
4183 | auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP, |
4184 | InsertPointTy CodeGenIP) { |
4185 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4186 | CGF&: *this, RegionBodyStmt: SubStmt, AllocaIP, CodeGenIP, RegionName: "section" ); |
4187 | }; |
4188 | SectionCBVector.push_back(Elt: SectionCB); |
4189 | } |
4190 | } else { |
4191 | auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP, |
4192 | InsertPointTy CodeGenIP) { |
4193 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4194 | CGF&: *this, RegionBodyStmt: CapturedStmt, AllocaIP, CodeGenIP, RegionName: "section" ); |
4195 | }; |
4196 | SectionCBVector.push_back(Elt: SectionCB); |
4197 | } |
4198 | |
4199 | // Privatization callback that performs appropriate action for |
4200 | // shared/private/firstprivate/lastprivate/copyin/... variables. |
4201 | // |
4202 | // TODO: This defaults to shared right now. |
4203 | auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, |
4204 | llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { |
4205 | // The next line is appropriate only for variables (Val) with the |
4206 | // data-sharing attribute "shared". |
4207 | ReplVal = &Val; |
4208 | |
4209 | return CodeGenIP; |
4210 | }; |
4211 | |
4212 | CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP); |
4213 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); |
4214 | llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( |
4215 | AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); |
4216 | Builder.restoreIP(IP: OMPBuilder.createSections( |
4217 | Loc: Builder, AllocaIP, SectionCBs: SectionCBVector, PrivCB, FiniCB, IsCancellable: S.hasCancel(), |
4218 | IsNowait: S.getSingleClause<OMPNowaitClause>())); |
4219 | return; |
4220 | } |
4221 | { |
4222 | auto LPCRegion = |
4223 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
4224 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
4225 | EmitSections(S); |
4226 | } |
4227 | // Emit an implicit barrier at the end. |
4228 | if (!S.getSingleClause<OMPNowaitClause>()) { |
4229 | CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), |
4230 | Kind: OMPD_sections); |
4231 | } |
4232 | // Check for outer lastprivate conditional update. |
4233 | checkForLastprivateConditionalUpdate(CGF&: *this, S); |
4234 | } |
4235 | |
4236 | void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { |
4237 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4238 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4239 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4240 | |
4241 | const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt(); |
4242 | auto FiniCB = [this](InsertPointTy IP) { |
4243 | OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP); |
4244 | }; |
4245 | |
4246 | auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP, |
4247 | InsertPointTy CodeGenIP) { |
4248 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4249 | CGF&: *this, RegionBodyStmt: SectionRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "section" ); |
4250 | }; |
4251 | |
4252 | LexicalScope Scope(*this, S.getSourceRange()); |
4253 | EmitStopPoint(S: &S); |
4254 | Builder.restoreIP(IP: OMPBuilder.createSection(Loc: Builder, BodyGenCB, FiniCB)); |
4255 | |
4256 | return; |
4257 | } |
4258 | LexicalScope Scope(*this, S.getSourceRange()); |
4259 | EmitStopPoint(S: &S); |
4260 | EmitStmt(S: S.getAssociatedStmt()); |
4261 | } |
4262 | |
4263 | void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { |
4264 | llvm::SmallVector<const Expr *, 8> CopyprivateVars; |
4265 | llvm::SmallVector<const Expr *, 8> DestExprs; |
4266 | llvm::SmallVector<const Expr *, 8> SrcExprs; |
4267 | llvm::SmallVector<const Expr *, 8> AssignmentOps; |
4268 | // Check if there are any 'copyprivate' clauses associated with this |
4269 | // 'single' construct. |
4270 | // Build a list of copyprivate variables along with helper expressions |
4271 | // (<source>, <destination>, <destination>=<source> expressions) |
4272 | for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { |
4273 | CopyprivateVars.append(in_start: C->varlists().begin(), in_end: C->varlists().end()); |
4274 | DestExprs.append(in_start: C->destination_exprs().begin(), |
4275 | in_end: C->destination_exprs().end()); |
4276 | SrcExprs.append(in_start: C->source_exprs().begin(), in_end: C->source_exprs().end()); |
4277 | AssignmentOps.append(in_start: C->assignment_ops().begin(), |
4278 | in_end: C->assignment_ops().end()); |
4279 | } |
4280 | // Emit code for 'single' region along with 'copyprivate' clauses |
4281 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4282 | Action.Enter(CGF); |
4283 | OMPPrivateScope SingleScope(CGF); |
4284 | (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope&: SingleScope); |
4285 | CGF.EmitOMPPrivateClause(D: S, PrivateScope&: SingleScope); |
4286 | (void)SingleScope.Privatize(); |
4287 | CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt()); |
4288 | }; |
4289 | { |
4290 | auto LPCRegion = |
4291 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
4292 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
4293 | CGM.getOpenMPRuntime().emitSingleRegion(CGF&: *this, SingleOpGen: CodeGen, Loc: S.getBeginLoc(), |
4294 | CopyprivateVars, DestExprs, |
4295 | SrcExprs, AssignmentOps); |
4296 | } |
4297 | // Emit an implicit barrier at the end (to avoid data race on firstprivate |
4298 | // init or if no 'nowait' clause was specified and no 'copyprivate' clause). |
4299 | if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { |
4300 | CGM.getOpenMPRuntime().emitBarrierCall( |
4301 | CGF&: *this, Loc: S.getBeginLoc(), |
4302 | Kind: S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); |
4303 | } |
4304 | // Check for outer lastprivate conditional update. |
4305 | checkForLastprivateConditionalUpdate(CGF&: *this, S); |
4306 | } |
4307 | |
4308 | static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
4309 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4310 | Action.Enter(CGF); |
4311 | CGF.EmitStmt(S: S.getRawStmt()); |
4312 | }; |
4313 | CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: CodeGen, Loc: S.getBeginLoc()); |
4314 | } |
4315 | |
4316 | void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { |
4317 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4318 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4319 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4320 | |
4321 | const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt(); |
4322 | |
4323 | auto FiniCB = [this](InsertPointTy IP) { |
4324 | OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP); |
4325 | }; |
4326 | |
4327 | auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP, |
4328 | InsertPointTy CodeGenIP) { |
4329 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4330 | CGF&: *this, RegionBodyStmt: MasterRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "master" ); |
4331 | }; |
4332 | |
4333 | LexicalScope Scope(*this, S.getSourceRange()); |
4334 | EmitStopPoint(S: &S); |
4335 | Builder.restoreIP(IP: OMPBuilder.createMaster(Loc: Builder, BodyGenCB, FiniCB)); |
4336 | |
4337 | return; |
4338 | } |
4339 | LexicalScope Scope(*this, S.getSourceRange()); |
4340 | EmitStopPoint(S: &S); |
4341 | emitMaster(CGF&: *this, S); |
4342 | } |
4343 | |
4344 | static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
4345 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4346 | Action.Enter(CGF); |
4347 | CGF.EmitStmt(S: S.getRawStmt()); |
4348 | }; |
4349 | Expr *Filter = nullptr; |
4350 | if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) |
4351 | Filter = FilterClause->getThreadID(); |
4352 | CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, MaskedOpGen: CodeGen, Loc: S.getBeginLoc(), |
4353 | Filter); |
4354 | } |
4355 | |
4356 | void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) { |
4357 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4358 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4359 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4360 | |
4361 | const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt(); |
4362 | const Expr *Filter = nullptr; |
4363 | if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) |
4364 | Filter = FilterClause->getThreadID(); |
4365 | llvm::Value *FilterVal = Filter |
4366 | ? EmitScalarExpr(E: Filter, IgnoreResultAssign: CGM.Int32Ty) |
4367 | : llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/0); |
4368 | |
4369 | auto FiniCB = [this](InsertPointTy IP) { |
4370 | OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP); |
4371 | }; |
4372 | |
4373 | auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP, |
4374 | InsertPointTy CodeGenIP) { |
4375 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4376 | CGF&: *this, RegionBodyStmt: MaskedRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "masked" ); |
4377 | }; |
4378 | |
4379 | LexicalScope Scope(*this, S.getSourceRange()); |
4380 | EmitStopPoint(S: &S); |
4381 | Builder.restoreIP( |
4382 | IP: OMPBuilder.createMasked(Loc: Builder, BodyGenCB, FiniCB, Filter: FilterVal)); |
4383 | |
4384 | return; |
4385 | } |
4386 | LexicalScope Scope(*this, S.getSourceRange()); |
4387 | EmitStopPoint(S: &S); |
4388 | emitMasked(CGF&: *this, S); |
4389 | } |
4390 | |
4391 | void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { |
4392 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4393 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4394 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4395 | |
4396 | const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt(); |
4397 | const Expr *Hint = nullptr; |
4398 | if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) |
4399 | Hint = HintClause->getHint(); |
4400 | |
4401 | // TODO: This is slightly different from what's currently being done in |
4402 | // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything |
4403 | // about typing is final. |
4404 | llvm::Value *HintInst = nullptr; |
4405 | if (Hint) |
4406 | HintInst = |
4407 | Builder.CreateIntCast(V: EmitScalarExpr(E: Hint), DestTy: CGM.Int32Ty, isSigned: false); |
4408 | |
4409 | auto FiniCB = [this](InsertPointTy IP) { |
4410 | OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP); |
4411 | }; |
4412 | |
4413 | auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP, |
4414 | InsertPointTy CodeGenIP) { |
4415 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4416 | CGF&: *this, RegionBodyStmt: CriticalRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "critical" ); |
4417 | }; |
4418 | |
4419 | LexicalScope Scope(*this, S.getSourceRange()); |
4420 | EmitStopPoint(S: &S); |
4421 | Builder.restoreIP(IP: OMPBuilder.createCritical( |
4422 | Loc: Builder, BodyGenCB, FiniCB, CriticalName: S.getDirectiveName().getAsString(), |
4423 | HintInst)); |
4424 | |
4425 | return; |
4426 | } |
4427 | |
4428 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4429 | Action.Enter(CGF); |
4430 | CGF.EmitStmt(S: S.getAssociatedStmt()); |
4431 | }; |
4432 | const Expr *Hint = nullptr; |
4433 | if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) |
4434 | Hint = HintClause->getHint(); |
4435 | LexicalScope Scope(*this, S.getSourceRange()); |
4436 | EmitStopPoint(S: &S); |
4437 | CGM.getOpenMPRuntime().emitCriticalRegion(CGF&: *this, |
4438 | CriticalName: S.getDirectiveName().getAsString(), |
4439 | CriticalOpGen: CodeGen, Loc: S.getBeginLoc(), Hint); |
4440 | } |
4441 | |
4442 | void CodeGenFunction::EmitOMPParallelForDirective( |
4443 | const OMPParallelForDirective &S) { |
4444 | // Emit directive as a combined directive that consists of two implicit |
4445 | // directives: 'parallel' with 'for' directive. |
4446 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4447 | Action.Enter(CGF); |
4448 | emitOMPCopyinClause(CGF, S); |
4449 | (void)emitWorksharingDirective(CGF, S, HasCancel: S.hasCancel()); |
4450 | }; |
4451 | { |
4452 | const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { |
4453 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
4454 | CGCapturedStmtInfo CGSI(CR_OpenMP); |
4455 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); |
4456 | OMPLoopScope LoopScope(CGF, S); |
4457 | return CGF.EmitScalarExpr(E: S.getNumIterations()); |
4458 | }; |
4459 | bool IsInscan = llvm::any_of(Range: S.getClausesOfKind<OMPReductionClause>(), |
4460 | P: [](const OMPReductionClause *C) { |
4461 | return C->getModifier() == OMPC_REDUCTION_inscan; |
4462 | }); |
4463 | if (IsInscan) |
4464 | emitScanBasedDirectiveDecls(CGF&: *this, S, NumIteratorsGen); |
4465 | auto LPCRegion = |
4466 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
4467 | emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_for, CodeGen, |
4468 | CodeGenBoundParameters: emitEmptyBoundParameters); |
4469 | if (IsInscan) |
4470 | emitScanBasedDirectiveFinals(CGF&: *this, S, NumIteratorsGen); |
4471 | } |
4472 | // Check for outer lastprivate conditional update. |
4473 | checkForLastprivateConditionalUpdate(CGF&: *this, S); |
4474 | } |
4475 | |
4476 | void CodeGenFunction::EmitOMPParallelForSimdDirective( |
4477 | const OMPParallelForSimdDirective &S) { |
4478 | // Emit directive as a combined directive that consists of two implicit |
4479 | // directives: 'parallel' with 'for' directive. |
4480 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4481 | Action.Enter(CGF); |
4482 | emitOMPCopyinClause(CGF, S); |
4483 | (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); |
4484 | }; |
4485 | { |
4486 | const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { |
4487 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
4488 | CGCapturedStmtInfo CGSI(CR_OpenMP); |
4489 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); |
4490 | OMPLoopScope LoopScope(CGF, S); |
4491 | return CGF.EmitScalarExpr(E: S.getNumIterations()); |
4492 | }; |
4493 | bool IsInscan = llvm::any_of(Range: S.getClausesOfKind<OMPReductionClause>(), |
4494 | P: [](const OMPReductionClause *C) { |
4495 | return C->getModifier() == OMPC_REDUCTION_inscan; |
4496 | }); |
4497 | if (IsInscan) |
4498 | emitScanBasedDirectiveDecls(CGF&: *this, S, NumIteratorsGen); |
4499 | auto LPCRegion = |
4500 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
4501 | emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_for_simd, CodeGen, |
4502 | CodeGenBoundParameters: emitEmptyBoundParameters); |
4503 | if (IsInscan) |
4504 | emitScanBasedDirectiveFinals(CGF&: *this, S, NumIteratorsGen); |
4505 | } |
4506 | // Check for outer lastprivate conditional update. |
4507 | checkForLastprivateConditionalUpdate(CGF&: *this, S); |
4508 | } |
4509 | |
4510 | void CodeGenFunction::EmitOMPParallelMasterDirective( |
4511 | const OMPParallelMasterDirective &S) { |
4512 | // Emit directive as a combined directive that consists of two implicit |
4513 | // directives: 'parallel' with 'master' directive. |
4514 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4515 | Action.Enter(CGF); |
4516 | OMPPrivateScope PrivateScope(CGF); |
4517 | emitOMPCopyinClause(CGF, S); |
4518 | (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope); |
4519 | CGF.EmitOMPPrivateClause(D: S, PrivateScope); |
4520 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
4521 | (void)PrivateScope.Privatize(); |
4522 | emitMaster(CGF, S); |
4523 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel); |
4524 | }; |
4525 | { |
4526 | auto LPCRegion = |
4527 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
4528 | emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_master, CodeGen, |
4529 | CodeGenBoundParameters: emitEmptyBoundParameters); |
4530 | emitPostUpdateForReductionClause(CGF&: *this, D: S, |
4531 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
4532 | } |
4533 | // Check for outer lastprivate conditional update. |
4534 | checkForLastprivateConditionalUpdate(CGF&: *this, S); |
4535 | } |
4536 | |
4537 | void CodeGenFunction::EmitOMPParallelMaskedDirective( |
4538 | const OMPParallelMaskedDirective &S) { |
4539 | // Emit directive as a combined directive that consists of two implicit |
4540 | // directives: 'parallel' with 'masked' directive. |
4541 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4542 | Action.Enter(CGF); |
4543 | OMPPrivateScope PrivateScope(CGF); |
4544 | emitOMPCopyinClause(CGF, S); |
4545 | (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope); |
4546 | CGF.EmitOMPPrivateClause(D: S, PrivateScope); |
4547 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
4548 | (void)PrivateScope.Privatize(); |
4549 | emitMasked(CGF, S); |
4550 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel); |
4551 | }; |
4552 | { |
4553 | auto LPCRegion = |
4554 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
4555 | emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_masked, CodeGen, |
4556 | CodeGenBoundParameters: emitEmptyBoundParameters); |
4557 | emitPostUpdateForReductionClause(CGF&: *this, D: S, |
4558 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
4559 | } |
4560 | // Check for outer lastprivate conditional update. |
4561 | checkForLastprivateConditionalUpdate(CGF&: *this, S); |
4562 | } |
4563 | |
4564 | void CodeGenFunction::EmitOMPParallelSectionsDirective( |
4565 | const OMPParallelSectionsDirective &S) { |
4566 | // Emit directive as a combined directive that consists of two implicit |
4567 | // directives: 'parallel' with 'sections' directive. |
4568 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4569 | Action.Enter(CGF); |
4570 | emitOMPCopyinClause(CGF, S); |
4571 | CGF.EmitSections(S); |
4572 | }; |
4573 | { |
4574 | auto LPCRegion = |
4575 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
4576 | emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_sections, CodeGen, |
4577 | CodeGenBoundParameters: emitEmptyBoundParameters); |
4578 | } |
4579 | // Check for outer lastprivate conditional update. |
4580 | checkForLastprivateConditionalUpdate(CGF&: *this, S); |
4581 | } |
4582 | |
4583 | namespace { |
4584 | /// Get the list of variables declared in the context of the untied tasks. |
4585 | class CheckVarsEscapingUntiedTaskDeclContext final |
4586 | : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> { |
4587 | llvm::SmallVector<const VarDecl *, 4> PrivateDecls; |
4588 | |
4589 | public: |
4590 | explicit CheckVarsEscapingUntiedTaskDeclContext() = default; |
4591 | virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default; |
4592 | void VisitDeclStmt(const DeclStmt *S) { |
4593 | if (!S) |
4594 | return; |
4595 | // Need to privatize only local vars, static locals can be processed as is. |
4596 | for (const Decl *D : S->decls()) { |
4597 | if (const auto *VD = dyn_cast_or_null<VarDecl>(Val: D)) |
4598 | if (VD->hasLocalStorage()) |
4599 | PrivateDecls.push_back(Elt: VD); |
4600 | } |
4601 | } |
4602 | void VisitOMPExecutableDirective(const OMPExecutableDirective *) {} |
4603 | void VisitCapturedStmt(const CapturedStmt *) {} |
4604 | void VisitLambdaExpr(const LambdaExpr *) {} |
4605 | void VisitBlockExpr(const BlockExpr *) {} |
4606 | void VisitStmt(const Stmt *S) { |
4607 | if (!S) |
4608 | return; |
4609 | for (const Stmt *Child : S->children()) |
4610 | if (Child) |
4611 | Visit(S: Child); |
4612 | } |
4613 | |
4614 | /// Swaps list of vars with the provided one. |
4615 | ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; } |
4616 | }; |
4617 | } // anonymous namespace |
4618 | |
4619 | static void buildDependences(const OMPExecutableDirective &S, |
4620 | OMPTaskDataTy &Data) { |
4621 | |
4622 | // First look for 'omp_all_memory' and add this first. |
4623 | bool OmpAllMemory = false; |
4624 | if (llvm::any_of( |
4625 | Range: S.getClausesOfKind<OMPDependClause>(), P: [](const OMPDependClause *C) { |
4626 | return C->getDependencyKind() == OMPC_DEPEND_outallmemory || |
4627 | C->getDependencyKind() == OMPC_DEPEND_inoutallmemory; |
4628 | })) { |
4629 | OmpAllMemory = true; |
4630 | // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are |
4631 | // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to |
4632 | // simplify. |
4633 | OMPTaskDataTy::DependData &DD = |
4634 | Data.Dependences.emplace_back(Args: OMPC_DEPEND_outallmemory, |
4635 | /*IteratorExpr=*/Args: nullptr); |
4636 | // Add a nullptr Expr to simplify the codegen in emitDependData. |
4637 | DD.DepExprs.push_back(Elt: nullptr); |
4638 | } |
4639 | // Add remaining dependences skipping any 'out' or 'inout' if they are |
4640 | // overridden by 'omp_all_memory'. |
4641 | for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { |
4642 | OpenMPDependClauseKind Kind = C->getDependencyKind(); |
4643 | if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory) |
4644 | continue; |
4645 | if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout)) |
4646 | continue; |
4647 | OMPTaskDataTy::DependData &DD = |
4648 | Data.Dependences.emplace_back(Args: C->getDependencyKind(), Args: C->getModifier()); |
4649 | DD.DepExprs.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
4650 | } |
4651 | } |
4652 | |
4653 | void CodeGenFunction::EmitOMPTaskBasedDirective( |
4654 | const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, |
4655 | const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, |
4656 | OMPTaskDataTy &Data) { |
4657 | // Emit outlined function for task construct. |
4658 | const CapturedStmt *CS = S.getCapturedStmt(RegionKind: CapturedRegion); |
4659 | auto I = CS->getCapturedDecl()->param_begin(); |
4660 | auto PartId = std::next(x: I); |
4661 | auto TaskT = std::next(x: I, n: 4); |
4662 | // Check if the task is final |
4663 | if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { |
4664 | // If the condition constant folds and can be elided, try to avoid emitting |
4665 | // the condition and the dead arm of the if/else. |
4666 | const Expr *Cond = Clause->getCondition(); |
4667 | bool CondConstant; |
4668 | if (ConstantFoldsToSimpleInteger(Cond, Result&: CondConstant)) |
4669 | Data.Final.setInt(CondConstant); |
4670 | else |
4671 | Data.Final.setPointer(EvaluateExprAsBool(E: Cond)); |
4672 | } else { |
4673 | // By default the task is not final. |
4674 | Data.Final.setInt(/*IntVal=*/false); |
4675 | } |
4676 | // Check if the task has 'priority' clause. |
4677 | if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { |
4678 | const Expr *Prio = Clause->getPriority(); |
4679 | Data.Priority.setInt(/*IntVal=*/true); |
4680 | Data.Priority.setPointer(EmitScalarConversion( |
4681 | Src: EmitScalarExpr(E: Prio), SrcTy: Prio->getType(), |
4682 | DstTy: getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), |
4683 | Loc: Prio->getExprLoc())); |
4684 | } |
4685 | // The first function argument for tasks is a thread id, the second one is a |
4686 | // part id (0 for tied tasks, >=0 for untied task). |
4687 | llvm::DenseSet<const VarDecl *> EmittedAsPrivate; |
4688 | // Get list of private variables. |
4689 | for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { |
4690 | auto IRef = C->varlist_begin(); |
4691 | for (const Expr *IInit : C->private_copies()) { |
4692 | const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl()); |
4693 | if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) { |
4694 | Data.PrivateVars.push_back(Elt: *IRef); |
4695 | Data.PrivateCopies.push_back(Elt: IInit); |
4696 | } |
4697 | ++IRef; |
4698 | } |
4699 | } |
4700 | EmittedAsPrivate.clear(); |
4701 | // Get list of firstprivate variables. |
4702 | for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { |
4703 | auto IRef = C->varlist_begin(); |
4704 | auto IElemInitRef = C->inits().begin(); |
4705 | for (const Expr *IInit : C->private_copies()) { |
4706 | const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl()); |
4707 | if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) { |
4708 | Data.FirstprivateVars.push_back(Elt: *IRef); |
4709 | Data.FirstprivateCopies.push_back(Elt: IInit); |
4710 | Data.FirstprivateInits.push_back(Elt: *IElemInitRef); |
4711 | } |
4712 | ++IRef; |
4713 | ++IElemInitRef; |
4714 | } |
4715 | } |
4716 | // Get list of lastprivate variables (for taskloops). |
4717 | llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; |
4718 | for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { |
4719 | auto IRef = C->varlist_begin(); |
4720 | auto ID = C->destination_exprs().begin(); |
4721 | for (const Expr *IInit : C->private_copies()) { |
4722 | const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl()); |
4723 | if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) { |
4724 | Data.LastprivateVars.push_back(Elt: *IRef); |
4725 | Data.LastprivateCopies.push_back(Elt: IInit); |
4726 | } |
4727 | LastprivateDstsOrigs.insert( |
4728 | KV: std::make_pair(x: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ID)->getDecl()), |
4729 | y: cast<DeclRefExpr>(Val: *IRef))); |
4730 | ++IRef; |
4731 | ++ID; |
4732 | } |
4733 | } |
4734 | SmallVector<const Expr *, 4> LHSs; |
4735 | SmallVector<const Expr *, 4> RHSs; |
4736 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
4737 | Data.ReductionVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
4738 | Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
4739 | Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
4740 | Data.ReductionOps.append(in_start: C->reduction_ops().begin(), |
4741 | in_end: C->reduction_ops().end()); |
4742 | LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end()); |
4743 | RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end()); |
4744 | } |
4745 | Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( |
4746 | CGF&: *this, Loc: S.getBeginLoc(), LHSExprs: LHSs, RHSExprs: RHSs, Data); |
4747 | // Build list of dependences. |
4748 | buildDependences(S, Data); |
4749 | // Get list of local vars for untied tasks. |
4750 | if (!Data.Tied) { |
4751 | CheckVarsEscapingUntiedTaskDeclContext Checker; |
4752 | Checker.Visit(S: S.getInnermostCapturedStmt()->getCapturedStmt()); |
4753 | Data.PrivateLocals.append(in_start: Checker.getPrivateDecls().begin(), |
4754 | in_end: Checker.getPrivateDecls().end()); |
4755 | } |
4756 | auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, |
4757 | CapturedRegion](CodeGenFunction &CGF, |
4758 | PrePostActionTy &Action) { |
4759 | llvm::MapVector<CanonicalDeclPtr<const VarDecl>, |
4760 | std::pair<Address, Address>> |
4761 | UntiedLocalVars; |
4762 | // Set proper addresses for generated private copies. |
4763 | OMPPrivateScope Scope(CGF); |
4764 | // Generate debug info for variables present in shared clause. |
4765 | if (auto *DI = CGF.getDebugInfo()) { |
4766 | llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields = |
4767 | CGF.CapturedStmtInfo->getCaptureFields(); |
4768 | llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue(); |
4769 | if (CaptureFields.size() && ContextValue) { |
4770 | unsigned CharWidth = CGF.getContext().getCharWidth(); |
4771 | // The shared variables are packed together as members of structure. |
4772 | // So the address of each shared variable can be computed by adding |
4773 | // offset of it (within record) to the base address of record. For each |
4774 | // shared variable, debug intrinsic llvm.dbg.declare is generated with |
4775 | // appropriate expressions (DIExpression). |
4776 | // Ex: |
4777 | // %12 = load %struct.anon*, %struct.anon** %__context.addr.i |
4778 | // call void @llvm.dbg.declare(metadata %struct.anon* %12, |
4779 | // metadata !svar1, |
4780 | // metadata !DIExpression(DW_OP_deref)) |
4781 | // call void @llvm.dbg.declare(metadata %struct.anon* %12, |
4782 | // metadata !svar2, |
4783 | // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref)) |
4784 | for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) { |
4785 | const VarDecl *SharedVar = It->first; |
4786 | RecordDecl *CaptureRecord = It->second->getParent(); |
4787 | const ASTRecordLayout &Layout = |
4788 | CGF.getContext().getASTRecordLayout(D: CaptureRecord); |
4789 | unsigned Offset = |
4790 | Layout.getFieldOffset(FieldNo: It->second->getFieldIndex()) / CharWidth; |
4791 | if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo()) |
4792 | (void)DI->EmitDeclareOfAutoVariable(Decl: SharedVar, AI: ContextValue, |
4793 | Builder&: CGF.Builder, UsePointerValue: false); |
4794 | // Get the call dbg.declare instruction we just created and update |
4795 | // its DIExpression to add offset to base address. |
4796 | auto UpdateExpr = [](llvm::LLVMContext &Ctx, auto *Declare, |
4797 | unsigned Offset) { |
4798 | SmallVector<uint64_t, 8> Ops; |
4799 | // Add offset to the base address if non zero. |
4800 | if (Offset) { |
4801 | Ops.push_back(Elt: llvm::dwarf::DW_OP_plus_uconst); |
4802 | Ops.push_back(Elt: Offset); |
4803 | } |
4804 | Ops.push_back(Elt: llvm::dwarf::DW_OP_deref); |
4805 | Declare->setExpression(llvm::DIExpression::get(Context&: Ctx, Elements: Ops)); |
4806 | }; |
4807 | llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back(); |
4808 | if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(Val: &Last)) |
4809 | UpdateExpr(DDI->getContext(), DDI, Offset); |
4810 | // If we're emitting using the new debug info format into a block |
4811 | // without a terminator, the record will be "trailing". |
4812 | assert(!Last.isTerminator() && "unexpected terminator" ); |
4813 | if (auto *Marker = |
4814 | CGF.Builder.GetInsertBlock()->getTrailingDbgRecords()) { |
4815 | for (llvm::DbgVariableRecord &DVR : llvm::reverse( |
4816 | C: llvm::filterDbgVars(R: Marker->getDbgRecordRange()))) { |
4817 | UpdateExpr(Last.getContext(), &DVR, Offset); |
4818 | break; |
4819 | } |
4820 | } |
4821 | } |
4822 | } |
4823 | } |
4824 | llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs; |
4825 | if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || |
4826 | !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) { |
4827 | enum { PrivatesParam = 2, CopyFnParam = 3 }; |
4828 | llvm::Value *CopyFn = CGF.Builder.CreateLoad( |
4829 | Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: CopyFnParam))); |
4830 | llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar( |
4831 | VD: CS->getCapturedDecl()->getParam(i: PrivatesParam))); |
4832 | // Map privates. |
4833 | llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; |
4834 | llvm::SmallVector<llvm::Value *, 16> CallArgs; |
4835 | llvm::SmallVector<llvm::Type *, 4> ParamTypes; |
4836 | CallArgs.push_back(Elt: PrivatesPtr); |
4837 | ParamTypes.push_back(Elt: PrivatesPtr->getType()); |
4838 | for (const Expr *E : Data.PrivateVars) { |
4839 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
4840 | RawAddress PrivatePtr = CGF.CreateMemTemp( |
4841 | T: CGF.getContext().getPointerType(T: E->getType()), Name: ".priv.ptr.addr" ); |
4842 | PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr); |
4843 | CallArgs.push_back(Elt: PrivatePtr.getPointer()); |
4844 | ParamTypes.push_back(Elt: PrivatePtr.getType()); |
4845 | } |
4846 | for (const Expr *E : Data.FirstprivateVars) { |
4847 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
4848 | RawAddress PrivatePtr = |
4849 | CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()), |
4850 | Name: ".firstpriv.ptr.addr" ); |
4851 | PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr); |
4852 | FirstprivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr); |
4853 | CallArgs.push_back(Elt: PrivatePtr.getPointer()); |
4854 | ParamTypes.push_back(Elt: PrivatePtr.getType()); |
4855 | } |
4856 | for (const Expr *E : Data.LastprivateVars) { |
4857 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
4858 | RawAddress PrivatePtr = |
4859 | CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()), |
4860 | Name: ".lastpriv.ptr.addr" ); |
4861 | PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr); |
4862 | CallArgs.push_back(Elt: PrivatePtr.getPointer()); |
4863 | ParamTypes.push_back(Elt: PrivatePtr.getType()); |
4864 | } |
4865 | for (const VarDecl *VD : Data.PrivateLocals) { |
4866 | QualType Ty = VD->getType().getNonReferenceType(); |
4867 | if (VD->getType()->isLValueReferenceType()) |
4868 | Ty = CGF.getContext().getPointerType(T: Ty); |
4869 | if (isAllocatableDecl(VD)) |
4870 | Ty = CGF.getContext().getPointerType(T: Ty); |
4871 | RawAddress PrivatePtr = CGF.CreateMemTemp( |
4872 | T: CGF.getContext().getPointerType(T: Ty), Name: ".local.ptr.addr" ); |
4873 | auto Result = UntiedLocalVars.insert( |
4874 | KV: std::make_pair(x&: VD, y: std::make_pair(x&: PrivatePtr, y: Address::invalid()))); |
4875 | // If key exists update in place. |
4876 | if (Result.second == false) |
4877 | *Result.first = std::make_pair( |
4878 | x&: VD, y: std::make_pair(x&: PrivatePtr, y: Address::invalid())); |
4879 | CallArgs.push_back(Elt: PrivatePtr.getPointer()); |
4880 | ParamTypes.push_back(Elt: PrivatePtr.getType()); |
4881 | } |
4882 | auto *CopyFnTy = llvm::FunctionType::get(Result: CGF.Builder.getVoidTy(), |
4883 | Params: ParamTypes, /*isVarArg=*/false); |
4884 | CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( |
4885 | CGF, Loc: S.getBeginLoc(), OutlinedFn: {CopyFnTy, CopyFn}, Args: CallArgs); |
4886 | for (const auto &Pair : LastprivateDstsOrigs) { |
4887 | const auto *OrigVD = cast<VarDecl>(Val: Pair.second->getDecl()); |
4888 | DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), |
4889 | /*RefersToEnclosingVariableOrCapture=*/ |
4890 | CGF.CapturedStmtInfo->lookup(VD: OrigVD) != nullptr, |
4891 | Pair.second->getType(), VK_LValue, |
4892 | Pair.second->getExprLoc()); |
4893 | Scope.addPrivate(LocalVD: Pair.first, Addr: CGF.EmitLValue(E: &DRE).getAddress()); |
4894 | } |
4895 | for (const auto &Pair : PrivatePtrs) { |
4896 | Address Replacement = Address( |
4897 | CGF.Builder.CreateLoad(Addr: Pair.second), |
4898 | CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()), |
4899 | CGF.getContext().getDeclAlign(D: Pair.first)); |
4900 | Scope.addPrivate(LocalVD: Pair.first, Addr: Replacement); |
4901 | if (auto *DI = CGF.getDebugInfo()) |
4902 | if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo()) |
4903 | (void)DI->EmitDeclareOfAutoVariable( |
4904 | Decl: Pair.first, AI: Pair.second.getBasePointer(), Builder&: CGF.Builder, |
4905 | /*UsePointerValue*/ true); |
4906 | } |
4907 | // Adjust mapping for internal locals by mapping actual memory instead of |
4908 | // a pointer to this memory. |
4909 | for (auto &Pair : UntiedLocalVars) { |
4910 | QualType VDType = Pair.first->getType().getNonReferenceType(); |
4911 | if (Pair.first->getType()->isLValueReferenceType()) |
4912 | VDType = CGF.getContext().getPointerType(T: VDType); |
4913 | if (isAllocatableDecl(VD: Pair.first)) { |
4914 | llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Pair.second.first); |
4915 | Address Replacement( |
4916 | Ptr, |
4917 | CGF.ConvertTypeForMem(T: CGF.getContext().getPointerType(T: VDType)), |
4918 | CGF.getPointerAlign()); |
4919 | Pair.second.first = Replacement; |
4920 | Ptr = CGF.Builder.CreateLoad(Addr: Replacement); |
4921 | Replacement = Address(Ptr, CGF.ConvertTypeForMem(T: VDType), |
4922 | CGF.getContext().getDeclAlign(D: Pair.first)); |
4923 | Pair.second.second = Replacement; |
4924 | } else { |
4925 | llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Pair.second.first); |
4926 | Address Replacement(Ptr, CGF.ConvertTypeForMem(T: VDType), |
4927 | CGF.getContext().getDeclAlign(D: Pair.first)); |
4928 | Pair.second.first = Replacement; |
4929 | } |
4930 | } |
4931 | } |
4932 | if (Data.Reductions) { |
4933 | OMPPrivateScope FirstprivateScope(CGF); |
4934 | for (const auto &Pair : FirstprivatePtrs) { |
4935 | Address Replacement( |
4936 | CGF.Builder.CreateLoad(Addr: Pair.second), |
4937 | CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()), |
4938 | CGF.getContext().getDeclAlign(D: Pair.first)); |
4939 | FirstprivateScope.addPrivate(LocalVD: Pair.first, Addr: Replacement); |
4940 | } |
4941 | (void)FirstprivateScope.Privatize(); |
4942 | OMPLexicalScope LexScope(CGF, S, CapturedRegion); |
4943 | ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, |
4944 | Data.ReductionCopies, Data.ReductionOps); |
4945 | llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( |
4946 | Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: 9))); |
4947 | for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { |
4948 | RedCG.emitSharedOrigLValue(CGF, N: Cnt); |
4949 | RedCG.emitAggregateType(CGF, N: Cnt); |
4950 | // FIXME: This must removed once the runtime library is fixed. |
4951 | // Emit required threadprivate variables for |
4952 | // initializer/combiner/finalizer. |
4953 | CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(), |
4954 | RCG&: RedCG, N: Cnt); |
4955 | Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( |
4956 | CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt)); |
4957 | Replacement = Address( |
4958 | CGF.EmitScalarConversion(Src: Replacement.emitRawPointer(CGF), |
4959 | SrcTy: CGF.getContext().VoidPtrTy, |
4960 | DstTy: CGF.getContext().getPointerType( |
4961 | T: Data.ReductionCopies[Cnt]->getType()), |
4962 | Loc: Data.ReductionCopies[Cnt]->getExprLoc()), |
4963 | CGF.ConvertTypeForMem(T: Data.ReductionCopies[Cnt]->getType()), |
4964 | Replacement.getAlignment()); |
4965 | Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement); |
4966 | Scope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement); |
4967 | } |
4968 | } |
4969 | // Privatize all private variables except for in_reduction items. |
4970 | (void)Scope.Privatize(); |
4971 | SmallVector<const Expr *, 4> InRedVars; |
4972 | SmallVector<const Expr *, 4> InRedPrivs; |
4973 | SmallVector<const Expr *, 4> InRedOps; |
4974 | SmallVector<const Expr *, 4> TaskgroupDescriptors; |
4975 | for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { |
4976 | auto IPriv = C->privates().begin(); |
4977 | auto IRed = C->reduction_ops().begin(); |
4978 | auto ITD = C->taskgroup_descriptors().begin(); |
4979 | for (const Expr *Ref : C->varlists()) { |
4980 | InRedVars.emplace_back(Args&: Ref); |
4981 | InRedPrivs.emplace_back(Args: *IPriv); |
4982 | InRedOps.emplace_back(Args: *IRed); |
4983 | TaskgroupDescriptors.emplace_back(Args: *ITD); |
4984 | std::advance(i&: IPriv, n: 1); |
4985 | std::advance(i&: IRed, n: 1); |
4986 | std::advance(i&: ITD, n: 1); |
4987 | } |
4988 | } |
4989 | // Privatize in_reduction items here, because taskgroup descriptors must be |
4990 | // privatized earlier. |
4991 | OMPPrivateScope InRedScope(CGF); |
4992 | if (!InRedVars.empty()) { |
4993 | ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); |
4994 | for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { |
4995 | RedCG.emitSharedOrigLValue(CGF, N: Cnt); |
4996 | RedCG.emitAggregateType(CGF, N: Cnt); |
4997 | // The taskgroup descriptor variable is always implicit firstprivate and |
4998 | // privatized already during processing of the firstprivates. |
4999 | // FIXME: This must removed once the runtime library is fixed. |
5000 | // Emit required threadprivate variables for |
5001 | // initializer/combiner/finalizer. |
5002 | CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(), |
5003 | RCG&: RedCG, N: Cnt); |
5004 | llvm::Value *ReductionsPtr; |
5005 | if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { |
5006 | ReductionsPtr = CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: TRExpr), |
5007 | Loc: TRExpr->getExprLoc()); |
5008 | } else { |
5009 | ReductionsPtr = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy); |
5010 | } |
5011 | Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( |
5012 | CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt)); |
5013 | Replacement = Address( |
5014 | CGF.EmitScalarConversion( |
5015 | Src: Replacement.emitRawPointer(CGF), SrcTy: CGF.getContext().VoidPtrTy, |
5016 | DstTy: CGF.getContext().getPointerType(T: InRedPrivs[Cnt]->getType()), |
5017 | Loc: InRedPrivs[Cnt]->getExprLoc()), |
5018 | CGF.ConvertTypeForMem(T: InRedPrivs[Cnt]->getType()), |
5019 | Replacement.getAlignment()); |
5020 | Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement); |
5021 | InRedScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement); |
5022 | } |
5023 | } |
5024 | (void)InRedScope.Privatize(); |
5025 | |
5026 | CGOpenMPRuntime::UntiedTaskLocalDeclsRAII (CGF, |
5027 | UntiedLocalVars); |
5028 | Action.Enter(CGF); |
5029 | BodyGen(CGF); |
5030 | }; |
5031 | llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( |
5032 | D: S, ThreadIDVar: *I, PartIDVar: *PartId, TaskTVar: *TaskT, InnermostKind: S.getDirectiveKind(), CodeGen, Tied: Data.Tied, |
5033 | NumberOfParts&: Data.NumberOfParts); |
5034 | OMPLexicalScope Scope(*this, S, std::nullopt, |
5035 | !isOpenMPParallelDirective(DKind: S.getDirectiveKind()) && |
5036 | !isOpenMPSimdDirective(DKind: S.getDirectiveKind())); |
5037 | TaskGen(*this, OutlinedFn, Data); |
5038 | } |
5039 | |
5040 | static ImplicitParamDecl * |
5041 | createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, |
5042 | QualType Ty, CapturedDecl *CD, |
5043 | SourceLocation Loc) { |
5044 | auto *OrigVD = ImplicitParamDecl::Create(C, DC: CD, IdLoc: Loc, /*Id=*/nullptr, T: Ty, |
5045 | ParamKind: ImplicitParamKind::Other); |
5046 | auto *OrigRef = DeclRefExpr::Create( |
5047 | Context: C, QualifierLoc: NestedNameSpecifierLoc(), TemplateKWLoc: SourceLocation(), D: OrigVD, |
5048 | /*RefersToEnclosingVariableOrCapture=*/false, NameLoc: Loc, T: Ty, VK: VK_LValue); |
5049 | auto *PrivateVD = ImplicitParamDecl::Create(C, DC: CD, IdLoc: Loc, /*Id=*/nullptr, T: Ty, |
5050 | ParamKind: ImplicitParamKind::Other); |
5051 | auto *PrivateRef = DeclRefExpr::Create( |
5052 | Context: C, QualifierLoc: NestedNameSpecifierLoc(), TemplateKWLoc: SourceLocation(), D: PrivateVD, |
5053 | /*RefersToEnclosingVariableOrCapture=*/false, NameLoc: Loc, T: Ty, VK: VK_LValue); |
5054 | QualType ElemType = C.getBaseElementType(QT: Ty); |
5055 | auto *InitVD = ImplicitParamDecl::Create(C, DC: CD, IdLoc: Loc, /*Id=*/nullptr, T: ElemType, |
5056 | ParamKind: ImplicitParamKind::Other); |
5057 | auto *InitRef = DeclRefExpr::Create( |
5058 | Context: C, QualifierLoc: NestedNameSpecifierLoc(), TemplateKWLoc: SourceLocation(), D: InitVD, |
5059 | /*RefersToEnclosingVariableOrCapture=*/false, NameLoc: Loc, T: ElemType, VK: VK_LValue); |
5060 | PrivateVD->setInitStyle(VarDecl::CInit); |
5061 | PrivateVD->setInit(ImplicitCastExpr::Create(Context: C, T: ElemType, Kind: CK_LValueToRValue, |
5062 | Operand: InitRef, /*BasePath=*/nullptr, |
5063 | Cat: VK_PRValue, FPO: FPOptionsOverride())); |
5064 | Data.FirstprivateVars.emplace_back(Args&: OrigRef); |
5065 | Data.FirstprivateCopies.emplace_back(Args&: PrivateRef); |
5066 | Data.FirstprivateInits.emplace_back(Args&: InitRef); |
5067 | return OrigVD; |
5068 | } |
5069 | |
5070 | void CodeGenFunction::EmitOMPTargetTaskBasedDirective( |
5071 | const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, |
5072 | OMPTargetDataInfo &InputInfo) { |
5073 | // Emit outlined function for task construct. |
5074 | const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_task); |
5075 | Address CapturedStruct = GenerateCapturedStmtArgument(S: *CS); |
5076 | QualType SharedsTy = getContext().getRecordType(Decl: CS->getCapturedRecordDecl()); |
5077 | auto I = CS->getCapturedDecl()->param_begin(); |
5078 | auto PartId = std::next(x: I); |
5079 | auto TaskT = std::next(x: I, n: 4); |
5080 | OMPTaskDataTy Data; |
5081 | // The task is not final. |
5082 | Data.Final.setInt(/*IntVal=*/false); |
5083 | // Get list of firstprivate variables. |
5084 | for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { |
5085 | auto IRef = C->varlist_begin(); |
5086 | auto IElemInitRef = C->inits().begin(); |
5087 | for (auto *IInit : C->private_copies()) { |
5088 | Data.FirstprivateVars.push_back(Elt: *IRef); |
5089 | Data.FirstprivateCopies.push_back(Elt: IInit); |
5090 | Data.FirstprivateInits.push_back(Elt: *IElemInitRef); |
5091 | ++IRef; |
5092 | ++IElemInitRef; |
5093 | } |
5094 | } |
5095 | SmallVector<const Expr *, 4> LHSs; |
5096 | SmallVector<const Expr *, 4> RHSs; |
5097 | for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { |
5098 | Data.ReductionVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
5099 | Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
5100 | Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
5101 | Data.ReductionOps.append(in_start: C->reduction_ops().begin(), |
5102 | in_end: C->reduction_ops().end()); |
5103 | LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end()); |
5104 | RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end()); |
5105 | } |
5106 | OMPPrivateScope TargetScope(*this); |
5107 | VarDecl *BPVD = nullptr; |
5108 | VarDecl *PVD = nullptr; |
5109 | VarDecl *SVD = nullptr; |
5110 | VarDecl *MVD = nullptr; |
5111 | if (InputInfo.NumberOfTargetItems > 0) { |
5112 | auto *CD = CapturedDecl::Create( |
5113 | C&: getContext(), DC: getContext().getTranslationUnitDecl(), /*NumParams=*/0); |
5114 | llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); |
5115 | QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType( |
5116 | EltTy: getContext().VoidPtrTy, ArySize: ArrSize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, |
5117 | /*IndexTypeQuals=*/0); |
5118 | BPVD = createImplicitFirstprivateForType( |
5119 | C&: getContext(), Data, Ty: BaseAndPointerAndMapperType, CD, Loc: S.getBeginLoc()); |
5120 | PVD = createImplicitFirstprivateForType( |
5121 | C&: getContext(), Data, Ty: BaseAndPointerAndMapperType, CD, Loc: S.getBeginLoc()); |
5122 | QualType SizesType = getContext().getConstantArrayType( |
5123 | EltTy: getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), |
5124 | ArySize: ArrSize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, |
5125 | /*IndexTypeQuals=*/0); |
5126 | SVD = createImplicitFirstprivateForType(C&: getContext(), Data, Ty: SizesType, CD, |
5127 | Loc: S.getBeginLoc()); |
5128 | TargetScope.addPrivate(LocalVD: BPVD, Addr: InputInfo.BasePointersArray); |
5129 | TargetScope.addPrivate(LocalVD: PVD, Addr: InputInfo.PointersArray); |
5130 | TargetScope.addPrivate(LocalVD: SVD, Addr: InputInfo.SizesArray); |
5131 | // If there is no user-defined mapper, the mapper array will be nullptr. In |
5132 | // this case, we don't need to privatize it. |
5133 | if (!isa_and_nonnull<llvm::ConstantPointerNull>( |
5134 | Val: InputInfo.MappersArray.emitRawPointer(CGF&: *this))) { |
5135 | MVD = createImplicitFirstprivateForType( |
5136 | C&: getContext(), Data, Ty: BaseAndPointerAndMapperType, CD, Loc: S.getBeginLoc()); |
5137 | TargetScope.addPrivate(LocalVD: MVD, Addr: InputInfo.MappersArray); |
5138 | } |
5139 | } |
5140 | (void)TargetScope.Privatize(); |
5141 | buildDependences(S, Data); |
5142 | auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD, |
5143 | &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { |
5144 | // Set proper addresses for generated private copies. |
5145 | OMPPrivateScope Scope(CGF); |
5146 | if (!Data.FirstprivateVars.empty()) { |
5147 | enum { PrivatesParam = 2, CopyFnParam = 3 }; |
5148 | llvm::Value *CopyFn = CGF.Builder.CreateLoad( |
5149 | Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: CopyFnParam))); |
5150 | llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar( |
5151 | VD: CS->getCapturedDecl()->getParam(i: PrivatesParam))); |
5152 | // Map privates. |
5153 | llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; |
5154 | llvm::SmallVector<llvm::Value *, 16> CallArgs; |
5155 | llvm::SmallVector<llvm::Type *, 4> ParamTypes; |
5156 | CallArgs.push_back(Elt: PrivatesPtr); |
5157 | ParamTypes.push_back(Elt: PrivatesPtr->getType()); |
5158 | for (const Expr *E : Data.FirstprivateVars) { |
5159 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
5160 | RawAddress PrivatePtr = |
5161 | CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()), |
5162 | Name: ".firstpriv.ptr.addr" ); |
5163 | PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr); |
5164 | CallArgs.push_back(Elt: PrivatePtr.getPointer()); |
5165 | ParamTypes.push_back(Elt: PrivatePtr.getType()); |
5166 | } |
5167 | auto *CopyFnTy = llvm::FunctionType::get(Result: CGF.Builder.getVoidTy(), |
5168 | Params: ParamTypes, /*isVarArg=*/false); |
5169 | CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( |
5170 | CGF, Loc: S.getBeginLoc(), OutlinedFn: {CopyFnTy, CopyFn}, Args: CallArgs); |
5171 | for (const auto &Pair : PrivatePtrs) { |
5172 | Address Replacement( |
5173 | CGF.Builder.CreateLoad(Addr: Pair.second), |
5174 | CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()), |
5175 | CGF.getContext().getDeclAlign(D: Pair.first)); |
5176 | Scope.addPrivate(LocalVD: Pair.first, Addr: Replacement); |
5177 | } |
5178 | } |
5179 | CGF.processInReduction(S, Data, CGF, CS, Scope); |
5180 | if (InputInfo.NumberOfTargetItems > 0) { |
5181 | InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( |
5182 | Addr: CGF.GetAddrOfLocalVar(VD: BPVD), /*Index=*/0); |
5183 | InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( |
5184 | Addr: CGF.GetAddrOfLocalVar(VD: PVD), /*Index=*/0); |
5185 | InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( |
5186 | Addr: CGF.GetAddrOfLocalVar(VD: SVD), /*Index=*/0); |
5187 | // If MVD is nullptr, the mapper array is not privatized |
5188 | if (MVD) |
5189 | InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP( |
5190 | Addr: CGF.GetAddrOfLocalVar(VD: MVD), /*Index=*/0); |
5191 | } |
5192 | |
5193 | Action.Enter(CGF); |
5194 | OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); |
5195 | auto *TL = S.getSingleClause<OMPThreadLimitClause>(); |
5196 | if (CGF.CGM.getLangOpts().OpenMP >= 51 && |
5197 | needsTaskBasedThreadLimit(DKind: S.getDirectiveKind()) && TL) { |
5198 | // Emit __kmpc_set_thread_limit() to set the thread_limit for the task |
5199 | // enclosing this target region. This will indirectly set the thread_limit |
5200 | // for every applicable construct within target region. |
5201 | CGF.CGM.getOpenMPRuntime().emitThreadLimitClause( |
5202 | CGF, ThreadLimit: TL->getThreadLimit(), Loc: S.getBeginLoc()); |
5203 | } |
5204 | BodyGen(CGF); |
5205 | }; |
5206 | llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( |
5207 | D: S, ThreadIDVar: *I, PartIDVar: *PartId, TaskTVar: *TaskT, InnermostKind: S.getDirectiveKind(), CodeGen, /*Tied=*/true, |
5208 | NumberOfParts&: Data.NumberOfParts); |
5209 | llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); |
5210 | IntegerLiteral IfCond(getContext(), TrueOrFalse, |
5211 | getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/0), |
5212 | SourceLocation()); |
5213 | CGM.getOpenMPRuntime().emitTaskCall(CGF&: *this, Loc: S.getBeginLoc(), D: S, TaskFunction: OutlinedFn, |
5214 | SharedsTy, Shareds: CapturedStruct, IfCond: &IfCond, Data); |
5215 | } |
5216 | |
5217 | void CodeGenFunction::processInReduction(const OMPExecutableDirective &S, |
5218 | OMPTaskDataTy &Data, |
5219 | CodeGenFunction &CGF, |
5220 | const CapturedStmt *CS, |
5221 | OMPPrivateScope &Scope) { |
5222 | if (Data.Reductions) { |
5223 | OpenMPDirectiveKind CapturedRegion = S.getDirectiveKind(); |
5224 | OMPLexicalScope LexScope(CGF, S, CapturedRegion); |
5225 | ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, |
5226 | Data.ReductionCopies, Data.ReductionOps); |
5227 | llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( |
5228 | Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: 4))); |
5229 | for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { |
5230 | RedCG.emitSharedOrigLValue(CGF, N: Cnt); |
5231 | RedCG.emitAggregateType(CGF, N: Cnt); |
5232 | // FIXME: This must removed once the runtime library is fixed. |
5233 | // Emit required threadprivate variables for |
5234 | // initializer/combiner/finalizer. |
5235 | CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(), |
5236 | RCG&: RedCG, N: Cnt); |
5237 | Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( |
5238 | CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt)); |
5239 | Replacement = Address( |
5240 | CGF.EmitScalarConversion(Src: Replacement.emitRawPointer(CGF), |
5241 | SrcTy: CGF.getContext().VoidPtrTy, |
5242 | DstTy: CGF.getContext().getPointerType( |
5243 | T: Data.ReductionCopies[Cnt]->getType()), |
5244 | Loc: Data.ReductionCopies[Cnt]->getExprLoc()), |
5245 | CGF.ConvertTypeForMem(T: Data.ReductionCopies[Cnt]->getType()), |
5246 | Replacement.getAlignment()); |
5247 | Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement); |
5248 | Scope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement); |
5249 | } |
5250 | } |
5251 | (void)Scope.Privatize(); |
5252 | SmallVector<const Expr *, 4> InRedVars; |
5253 | SmallVector<const Expr *, 4> InRedPrivs; |
5254 | SmallVector<const Expr *, 4> InRedOps; |
5255 | SmallVector<const Expr *, 4> TaskgroupDescriptors; |
5256 | for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { |
5257 | auto IPriv = C->privates().begin(); |
5258 | auto IRed = C->reduction_ops().begin(); |
5259 | auto ITD = C->taskgroup_descriptors().begin(); |
5260 | for (const Expr *Ref : C->varlists()) { |
5261 | InRedVars.emplace_back(Args&: Ref); |
5262 | InRedPrivs.emplace_back(Args: *IPriv); |
5263 | InRedOps.emplace_back(Args: *IRed); |
5264 | TaskgroupDescriptors.emplace_back(Args: *ITD); |
5265 | std::advance(i&: IPriv, n: 1); |
5266 | std::advance(i&: IRed, n: 1); |
5267 | std::advance(i&: ITD, n: 1); |
5268 | } |
5269 | } |
5270 | OMPPrivateScope InRedScope(CGF); |
5271 | if (!InRedVars.empty()) { |
5272 | ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); |
5273 | for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { |
5274 | RedCG.emitSharedOrigLValue(CGF, N: Cnt); |
5275 | RedCG.emitAggregateType(CGF, N: Cnt); |
5276 | // FIXME: This must removed once the runtime library is fixed. |
5277 | // Emit required threadprivate variables for |
5278 | // initializer/combiner/finalizer. |
5279 | CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(), |
5280 | RCG&: RedCG, N: Cnt); |
5281 | llvm::Value *ReductionsPtr; |
5282 | if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { |
5283 | ReductionsPtr = |
5284 | CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: TRExpr), Loc: TRExpr->getExprLoc()); |
5285 | } else { |
5286 | ReductionsPtr = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy); |
5287 | } |
5288 | Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( |
5289 | CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt)); |
5290 | Replacement = Address( |
5291 | CGF.EmitScalarConversion( |
5292 | Src: Replacement.emitRawPointer(CGF), SrcTy: CGF.getContext().VoidPtrTy, |
5293 | DstTy: CGF.getContext().getPointerType(T: InRedPrivs[Cnt]->getType()), |
5294 | Loc: InRedPrivs[Cnt]->getExprLoc()), |
5295 | CGF.ConvertTypeForMem(T: InRedPrivs[Cnt]->getType()), |
5296 | Replacement.getAlignment()); |
5297 | Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement); |
5298 | InRedScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement); |
5299 | } |
5300 | } |
5301 | (void)InRedScope.Privatize(); |
5302 | } |
5303 | |
5304 | void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { |
5305 | // Emit outlined function for task construct. |
5306 | const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_task); |
5307 | Address CapturedStruct = GenerateCapturedStmtArgument(S: *CS); |
5308 | QualType SharedsTy = getContext().getRecordType(Decl: CS->getCapturedRecordDecl()); |
5309 | const Expr *IfCond = nullptr; |
5310 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
5311 | if (C->getNameModifier() == OMPD_unknown || |
5312 | C->getNameModifier() == OMPD_task) { |
5313 | IfCond = C->getCondition(); |
5314 | break; |
5315 | } |
5316 | } |
5317 | |
5318 | OMPTaskDataTy Data; |
5319 | // Check if we should emit tied or untied task. |
5320 | Data.Tied = !S.getSingleClause<OMPUntiedClause>(); |
5321 | auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { |
5322 | CGF.EmitStmt(S: CS->getCapturedStmt()); |
5323 | }; |
5324 | auto &&TaskGen = [&S, SharedsTy, CapturedStruct, |
5325 | IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, |
5326 | const OMPTaskDataTy &Data) { |
5327 | CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, Loc: S.getBeginLoc(), D: S, TaskFunction: OutlinedFn, |
5328 | SharedsTy, Shareds: CapturedStruct, IfCond, |
5329 | Data); |
5330 | }; |
5331 | auto LPCRegion = |
5332 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
5333 | EmitOMPTaskBasedDirective(S, CapturedRegion: OMPD_task, BodyGen, TaskGen, Data); |
5334 | } |
5335 | |
5336 | void CodeGenFunction::EmitOMPTaskyieldDirective( |
5337 | const OMPTaskyieldDirective &S) { |
5338 | CGM.getOpenMPRuntime().emitTaskyieldCall(CGF&: *this, Loc: S.getBeginLoc()); |
5339 | } |
5340 | |
5341 | void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) { |
5342 | const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>(); |
5343 | Expr *ME = MC ? MC->getMessageString() : nullptr; |
5344 | const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>(); |
5345 | bool IsFatal = false; |
5346 | if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal) |
5347 | IsFatal = true; |
5348 | CGM.getOpenMPRuntime().emitErrorCall(CGF&: *this, Loc: S.getBeginLoc(), ME, IsFatal); |
5349 | } |
5350 | |
5351 | void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { |
5352 | CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_barrier); |
5353 | } |
5354 | |
5355 | void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { |
5356 | OMPTaskDataTy Data; |
5357 | // Build list of dependences |
5358 | buildDependences(S, Data); |
5359 | Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>(); |
5360 | CGM.getOpenMPRuntime().emitTaskwaitCall(CGF&: *this, Loc: S.getBeginLoc(), Data); |
5361 | } |
5362 | |
5363 | bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) { |
5364 | return T.clauses().empty(); |
5365 | } |
5366 | |
5367 | void CodeGenFunction::EmitOMPTaskgroupDirective( |
5368 | const OMPTaskgroupDirective &S) { |
5369 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
5370 | if (CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(T: S)) { |
5371 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
5372 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
5373 | InsertPointTy AllocaIP(AllocaInsertPt->getParent(), |
5374 | AllocaInsertPt->getIterator()); |
5375 | |
5376 | auto BodyGenCB = [&, this](InsertPointTy AllocaIP, |
5377 | InsertPointTy CodeGenIP) { |
5378 | Builder.restoreIP(IP: CodeGenIP); |
5379 | EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt()); |
5380 | }; |
5381 | CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; |
5382 | if (!CapturedStmtInfo) |
5383 | CapturedStmtInfo = &CapStmtInfo; |
5384 | Builder.restoreIP(IP: OMPBuilder.createTaskgroup(Loc: Builder, AllocaIP, BodyGenCB)); |
5385 | return; |
5386 | } |
5387 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
5388 | Action.Enter(CGF); |
5389 | if (const Expr *E = S.getReductionRef()) { |
5390 | SmallVector<const Expr *, 4> LHSs; |
5391 | SmallVector<const Expr *, 4> RHSs; |
5392 | OMPTaskDataTy Data; |
5393 | for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { |
5394 | Data.ReductionVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
5395 | Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
5396 | Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
5397 | Data.ReductionOps.append(in_start: C->reduction_ops().begin(), |
5398 | in_end: C->reduction_ops().end()); |
5399 | LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end()); |
5400 | RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end()); |
5401 | } |
5402 | llvm::Value *ReductionDesc = |
5403 | CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, Loc: S.getBeginLoc(), |
5404 | LHSExprs: LHSs, RHSExprs: RHSs, Data); |
5405 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
5406 | CGF.EmitVarDecl(D: *VD); |
5407 | CGF.EmitStoreOfScalar(Value: ReductionDesc, Addr: CGF.GetAddrOfLocalVar(VD), |
5408 | /*Volatile=*/false, Ty: E->getType()); |
5409 | } |
5410 | CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt()); |
5411 | }; |
5412 | CGM.getOpenMPRuntime().emitTaskgroupRegion(CGF&: *this, TaskgroupOpGen: CodeGen, Loc: S.getBeginLoc()); |
5413 | } |
5414 | |
5415 | void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { |
5416 | llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>() |
5417 | ? llvm::AtomicOrdering::NotAtomic |
5418 | : llvm::AtomicOrdering::AcquireRelease; |
5419 | CGM.getOpenMPRuntime().emitFlush( |
5420 | CGF&: *this, |
5421 | Vars: [&S]() -> ArrayRef<const Expr *> { |
5422 | if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) |
5423 | return llvm::ArrayRef(FlushClause->varlist_begin(), |
5424 | FlushClause->varlist_end()); |
5425 | return std::nullopt; |
5426 | }(), |
5427 | Loc: S.getBeginLoc(), AO); |
5428 | } |
5429 | |
5430 | void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) { |
5431 | const auto *DO = S.getSingleClause<OMPDepobjClause>(); |
5432 | LValue DOLVal = EmitLValue(E: DO->getDepobj()); |
5433 | if (const auto *DC = S.getSingleClause<OMPDependClause>()) { |
5434 | OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(), |
5435 | DC->getModifier()); |
5436 | Dependencies.DepExprs.append(in_start: DC->varlist_begin(), in_end: DC->varlist_end()); |
5437 | Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause( |
5438 | CGF&: *this, Dependencies, Loc: DC->getBeginLoc()); |
5439 | EmitStoreOfScalar(value: DepAddr.emitRawPointer(CGF&: *this), lvalue: DOLVal); |
5440 | return; |
5441 | } |
5442 | if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) { |
5443 | CGM.getOpenMPRuntime().emitDestroyClause(CGF&: *this, DepobjLVal: DOLVal, Loc: DC->getBeginLoc()); |
5444 | return; |
5445 | } |
5446 | if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) { |
5447 | CGM.getOpenMPRuntime().emitUpdateClause( |
5448 | CGF&: *this, DepobjLVal: DOLVal, NewDepKind: UC->getDependencyKind(), Loc: UC->getBeginLoc()); |
5449 | return; |
5450 | } |
5451 | } |
5452 | |
5453 | void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) { |
5454 | if (!OMPParentLoopDirectiveForScan) |
5455 | return; |
5456 | const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan; |
5457 | bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>(); |
5458 | SmallVector<const Expr *, 4> Shareds; |
5459 | SmallVector<const Expr *, 4> Privates; |
5460 | SmallVector<const Expr *, 4> LHSs; |
5461 | SmallVector<const Expr *, 4> RHSs; |
5462 | SmallVector<const Expr *, 4> ReductionOps; |
5463 | SmallVector<const Expr *, 4> CopyOps; |
5464 | SmallVector<const Expr *, 4> CopyArrayTemps; |
5465 | SmallVector<const Expr *, 4> CopyArrayElems; |
5466 | for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) { |
5467 | if (C->getModifier() != OMPC_REDUCTION_inscan) |
5468 | continue; |
5469 | Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end()); |
5470 | Privates.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
5471 | LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end()); |
5472 | RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end()); |
5473 | ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end()); |
5474 | CopyOps.append(in_start: C->copy_ops().begin(), in_end: C->copy_ops().end()); |
5475 | CopyArrayTemps.append(in_start: C->copy_array_temps().begin(), |
5476 | in_end: C->copy_array_temps().end()); |
5477 | CopyArrayElems.append(in_start: C->copy_array_elems().begin(), |
5478 | in_end: C->copy_array_elems().end()); |
5479 | } |
5480 | if (ParentDir.getDirectiveKind() == OMPD_simd || |
5481 | (getLangOpts().OpenMPSimd && |
5482 | isOpenMPSimdDirective(DKind: ParentDir.getDirectiveKind()))) { |
5483 | // For simd directive and simd-based directives in simd only mode, use the |
5484 | // following codegen: |
5485 | // int x = 0; |
5486 | // #pragma omp simd reduction(inscan, +: x) |
5487 | // for (..) { |
5488 | // <first part> |
5489 | // #pragma omp scan inclusive(x) |
5490 | // <second part> |
5491 | // } |
5492 | // is transformed to: |
5493 | // int x = 0; |
5494 | // for (..) { |
5495 | // int x_priv = 0; |
5496 | // <first part> |
5497 | // x = x_priv + x; |
5498 | // x_priv = x; |
5499 | // <second part> |
5500 | // } |
5501 | // and |
5502 | // int x = 0; |
5503 | // #pragma omp simd reduction(inscan, +: x) |
5504 | // for (..) { |
5505 | // <first part> |
5506 | // #pragma omp scan exclusive(x) |
5507 | // <second part> |
5508 | // } |
5509 | // to |
5510 | // int x = 0; |
5511 | // for (..) { |
5512 | // int x_priv = 0; |
5513 | // <second part> |
5514 | // int temp = x; |
5515 | // x = x_priv + x; |
5516 | // x_priv = temp; |
5517 | // <first part> |
5518 | // } |
5519 | llvm::BasicBlock *OMPScanReduce = createBasicBlock(name: "omp.inscan.reduce" ); |
5520 | EmitBranch(Block: IsInclusive |
5521 | ? OMPScanReduce |
5522 | : BreakContinueStack.back().ContinueBlock.getBlock()); |
5523 | EmitBlock(BB: OMPScanDispatch); |
5524 | { |
5525 | // New scope for correct construction/destruction of temp variables for |
5526 | // exclusive scan. |
5527 | LexicalScope Scope(*this, S.getSourceRange()); |
5528 | EmitBranch(Block: IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock); |
5529 | EmitBlock(BB: OMPScanReduce); |
5530 | if (!IsInclusive) { |
5531 | // Create temp var and copy LHS value to this temp value. |
5532 | // TMP = LHS; |
5533 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
5534 | const Expr *PrivateExpr = Privates[I]; |
5535 | const Expr *TempExpr = CopyArrayTemps[I]; |
5536 | EmitAutoVarDecl( |
5537 | D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: TempExpr)->getDecl())); |
5538 | LValue DestLVal = EmitLValue(E: TempExpr); |
5539 | LValue SrcLVal = EmitLValue(E: LHSs[I]); |
5540 | EmitOMPCopy(OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), |
5541 | SrcAddr: SrcLVal.getAddress(), |
5542 | DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()), |
5543 | SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), |
5544 | Copy: CopyOps[I]); |
5545 | } |
5546 | } |
5547 | CGM.getOpenMPRuntime().emitReduction( |
5548 | CGF&: *this, Loc: ParentDir.getEndLoc(), Privates, LHSExprs: LHSs, RHSExprs: RHSs, ReductionOps, |
5549 | Options: {/*WithNowait=*/true, /*SimpleReduction=*/true, .ReductionKind: OMPD_simd}); |
5550 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
5551 | const Expr *PrivateExpr = Privates[I]; |
5552 | LValue DestLVal; |
5553 | LValue SrcLVal; |
5554 | if (IsInclusive) { |
5555 | DestLVal = EmitLValue(E: RHSs[I]); |
5556 | SrcLVal = EmitLValue(E: LHSs[I]); |
5557 | } else { |
5558 | const Expr *TempExpr = CopyArrayTemps[I]; |
5559 | DestLVal = EmitLValue(E: RHSs[I]); |
5560 | SrcLVal = EmitLValue(E: TempExpr); |
5561 | } |
5562 | EmitOMPCopy( |
5563 | OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(), |
5564 | DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()), |
5565 | SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]); |
5566 | } |
5567 | } |
5568 | EmitBranch(Block: IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock); |
5569 | OMPScanExitBlock = IsInclusive |
5570 | ? BreakContinueStack.back().ContinueBlock.getBlock() |
5571 | : OMPScanReduce; |
5572 | EmitBlock(BB: OMPAfterScanBlock); |
5573 | return; |
5574 | } |
5575 | if (!IsInclusive) { |
5576 | EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock()); |
5577 | EmitBlock(BB: OMPScanExitBlock); |
5578 | } |
5579 | if (OMPFirstScanLoop) { |
5580 | // Emit buffer[i] = red; at the end of the input phase. |
5581 | const auto *IVExpr = cast<OMPLoopDirective>(Val: ParentDir) |
5582 | .getIterationVariable() |
5583 | ->IgnoreParenImpCasts(); |
5584 | LValue IdxLVal = EmitLValue(E: IVExpr); |
5585 | llvm::Value *IdxVal = EmitLoadOfScalar(lvalue: IdxLVal, Loc: IVExpr->getExprLoc()); |
5586 | IdxVal = Builder.CreateIntCast(V: IdxVal, DestTy: SizeTy, /*isSigned=*/false); |
5587 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
5588 | const Expr *PrivateExpr = Privates[I]; |
5589 | const Expr *OrigExpr = Shareds[I]; |
5590 | const Expr *CopyArrayElem = CopyArrayElems[I]; |
5591 | OpaqueValueMapping IdxMapping( |
5592 | *this, |
5593 | cast<OpaqueValueExpr>( |
5594 | Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()), |
5595 | RValue::get(V: IdxVal)); |
5596 | LValue DestLVal = EmitLValue(E: CopyArrayElem); |
5597 | LValue SrcLVal = EmitLValue(E: OrigExpr); |
5598 | EmitOMPCopy( |
5599 | OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(), |
5600 | DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()), |
5601 | SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]); |
5602 | } |
5603 | } |
5604 | EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock()); |
5605 | if (IsInclusive) { |
5606 | EmitBlock(BB: OMPScanExitBlock); |
5607 | EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock()); |
5608 | } |
5609 | EmitBlock(BB: OMPScanDispatch); |
5610 | if (!OMPFirstScanLoop) { |
5611 | // Emit red = buffer[i]; at the entrance to the scan phase. |
5612 | const auto *IVExpr = cast<OMPLoopDirective>(Val: ParentDir) |
5613 | .getIterationVariable() |
5614 | ->IgnoreParenImpCasts(); |
5615 | LValue IdxLVal = EmitLValue(E: IVExpr); |
5616 | llvm::Value *IdxVal = EmitLoadOfScalar(lvalue: IdxLVal, Loc: IVExpr->getExprLoc()); |
5617 | IdxVal = Builder.CreateIntCast(V: IdxVal, DestTy: SizeTy, /*isSigned=*/false); |
5618 | llvm::BasicBlock *ExclusiveExitBB = nullptr; |
5619 | if (!IsInclusive) { |
5620 | llvm::BasicBlock *ContBB = createBasicBlock(name: "omp.exclusive.dec" ); |
5621 | ExclusiveExitBB = createBasicBlock(name: "omp.exclusive.copy.exit" ); |
5622 | llvm::Value *Cmp = Builder.CreateIsNull(Arg: IdxVal); |
5623 | Builder.CreateCondBr(Cond: Cmp, True: ExclusiveExitBB, False: ContBB); |
5624 | EmitBlock(BB: ContBB); |
5625 | // Use idx - 1 iteration for exclusive scan. |
5626 | IdxVal = Builder.CreateNUWSub(LHS: IdxVal, RHS: llvm::ConstantInt::get(Ty: SizeTy, V: 1)); |
5627 | } |
5628 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
5629 | const Expr *PrivateExpr = Privates[I]; |
5630 | const Expr *OrigExpr = Shareds[I]; |
5631 | const Expr *CopyArrayElem = CopyArrayElems[I]; |
5632 | OpaqueValueMapping IdxMapping( |
5633 | *this, |
5634 | cast<OpaqueValueExpr>( |
5635 | Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()), |
5636 | RValue::get(V: IdxVal)); |
5637 | LValue SrcLVal = EmitLValue(E: CopyArrayElem); |
5638 | LValue DestLVal = EmitLValue(E: OrigExpr); |
5639 | EmitOMPCopy( |
5640 | OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(), |
5641 | DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()), |
5642 | SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]); |
5643 | } |
5644 | if (!IsInclusive) { |
5645 | EmitBlock(BB: ExclusiveExitBB); |
5646 | } |
5647 | } |
5648 | EmitBranch(Block: (OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock |
5649 | : OMPAfterScanBlock); |
5650 | EmitBlock(BB: OMPAfterScanBlock); |
5651 | } |
5652 | |
5653 | void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, |
5654 | const CodeGenLoopTy &CodeGenLoop, |
5655 | Expr *IncExpr) { |
5656 | // Emit the loop iteration variable. |
5657 | const auto *IVExpr = cast<DeclRefExpr>(Val: S.getIterationVariable()); |
5658 | const auto *IVDecl = cast<VarDecl>(Val: IVExpr->getDecl()); |
5659 | EmitVarDecl(D: *IVDecl); |
5660 | |
5661 | // Emit the iterations count variable. |
5662 | // If it is not a variable, Sema decided to calculate iterations count on each |
5663 | // iteration (e.g., it is foldable into a constant). |
5664 | if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) { |
5665 | EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl())); |
5666 | // Emit calculation of the iterations count. |
5667 | EmitIgnoredExpr(E: S.getCalcLastIteration()); |
5668 | } |
5669 | |
5670 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
5671 | |
5672 | bool HasLastprivateClause = false; |
5673 | // Check pre-condition. |
5674 | { |
5675 | OMPLoopScope PreInitScope(*this, S); |
5676 | // Skip the entire loop if we don't meet the precondition. |
5677 | // If the condition constant folds and can be elided, avoid emitting the |
5678 | // whole loop. |
5679 | bool CondConstant; |
5680 | llvm::BasicBlock *ContBlock = nullptr; |
5681 | if (ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) { |
5682 | if (!CondConstant) |
5683 | return; |
5684 | } else { |
5685 | llvm::BasicBlock *ThenBlock = createBasicBlock(name: "omp.precond.then" ); |
5686 | ContBlock = createBasicBlock(name: "omp.precond.end" ); |
5687 | emitPreCond(CGF&: *this, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock, |
5688 | TrueCount: getProfileCount(S: &S)); |
5689 | EmitBlock(BB: ThenBlock); |
5690 | incrementProfileCounter(S: &S); |
5691 | } |
5692 | |
5693 | emitAlignedClause(CGF&: *this, D: S); |
5694 | // Emit 'then' code. |
5695 | { |
5696 | // Emit helper vars inits. |
5697 | |
5698 | LValue LB = EmitOMPHelperVar( |
5699 | CGF&: *this, Helper: cast<DeclRefExpr>( |
5700 | Val: (isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind()) |
5701 | ? S.getCombinedLowerBoundVariable() |
5702 | : S.getLowerBoundVariable()))); |
5703 | LValue UB = EmitOMPHelperVar( |
5704 | CGF&: *this, Helper: cast<DeclRefExpr>( |
5705 | Val: (isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind()) |
5706 | ? S.getCombinedUpperBoundVariable() |
5707 | : S.getUpperBoundVariable()))); |
5708 | LValue ST = |
5709 | EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable())); |
5710 | LValue IL = |
5711 | EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable())); |
5712 | |
5713 | OMPPrivateScope LoopScope(*this); |
5714 | if (EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope)) { |
5715 | // Emit implicit barrier to synchronize threads and avoid data races |
5716 | // on initialization of firstprivate variables and post-update of |
5717 | // lastprivate variables. |
5718 | CGM.getOpenMPRuntime().emitBarrierCall( |
5719 | CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false, |
5720 | /*ForceSimpleCall=*/true); |
5721 | } |
5722 | EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope); |
5723 | if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()) && |
5724 | !isOpenMPParallelDirective(DKind: S.getDirectiveKind()) && |
5725 | !isOpenMPTeamsDirective(DKind: S.getDirectiveKind())) |
5726 | EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope); |
5727 | HasLastprivateClause = EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope); |
5728 | EmitOMPPrivateLoopCounters(S, LoopScope); |
5729 | (void)LoopScope.Privatize(); |
5730 | if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind())) |
5731 | CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF&: *this, D: S); |
5732 | |
5733 | // Detect the distribute schedule kind and chunk. |
5734 | llvm::Value *Chunk = nullptr; |
5735 | OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; |
5736 | if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) { |
5737 | ScheduleKind = C->getDistScheduleKind(); |
5738 | if (const Expr *Ch = C->getChunkSize()) { |
5739 | Chunk = EmitScalarExpr(E: Ch); |
5740 | Chunk = EmitScalarConversion(Src: Chunk, SrcTy: Ch->getType(), |
5741 | DstTy: S.getIterationVariable()->getType(), |
5742 | Loc: S.getBeginLoc()); |
5743 | } |
5744 | } else { |
5745 | // Default behaviour for dist_schedule clause. |
5746 | CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk( |
5747 | CGF&: *this, S, ScheduleKind, Chunk); |
5748 | } |
5749 | const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType()); |
5750 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
5751 | |
5752 | // OpenMP [2.10.8, distribute Construct, Description] |
5753 | // If dist_schedule is specified, kind must be static. If specified, |
5754 | // iterations are divided into chunks of size chunk_size, chunks are |
5755 | // assigned to the teams of the league in a round-robin fashion in the |
5756 | // order of the team number. When no chunk_size is specified, the |
5757 | // iteration space is divided into chunks that are approximately equal |
5758 | // in size, and at most one chunk is distributed to each team of the |
5759 | // league. The size of the chunks is unspecified in this case. |
5760 | bool StaticChunked = |
5761 | RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && |
5762 | isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind()); |
5763 | if (RT.isStaticNonchunked(ScheduleKind, |
5764 | /* Chunked */ Chunk != nullptr) || |
5765 | StaticChunked) { |
5766 | CGOpenMPRuntime::StaticRTInput StaticInit( |
5767 | IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), |
5768 | LB.getAddress(), UB.getAddress(), ST.getAddress(), |
5769 | StaticChunked ? Chunk : nullptr); |
5770 | RT.emitDistributeStaticInit(CGF&: *this, Loc: S.getBeginLoc(), SchedKind: ScheduleKind, |
5771 | Values: StaticInit); |
5772 | JumpDest LoopExit = |
5773 | getJumpDestInCurrentScope(Target: createBasicBlock(name: "omp.loop.exit" )); |
5774 | // UB = min(UB, GlobalUB); |
5775 | EmitIgnoredExpr(E: isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind()) |
5776 | ? S.getCombinedEnsureUpperBound() |
5777 | : S.getEnsureUpperBound()); |
5778 | // IV = LB; |
5779 | EmitIgnoredExpr(E: isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind()) |
5780 | ? S.getCombinedInit() |
5781 | : S.getInit()); |
5782 | |
5783 | const Expr *Cond = |
5784 | isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind()) |
5785 | ? S.getCombinedCond() |
5786 | : S.getCond(); |
5787 | |
5788 | if (StaticChunked) |
5789 | Cond = S.getCombinedDistCond(); |
5790 | |
5791 | // For static unchunked schedules generate: |
5792 | // |
5793 | // 1. For distribute alone, codegen |
5794 | // while (idx <= UB) { |
5795 | // BODY; |
5796 | // ++idx; |
5797 | // } |
5798 | // |
5799 | // 2. When combined with 'for' (e.g. as in 'distribute parallel for') |
5800 | // while (idx <= UB) { |
5801 | // <CodeGen rest of pragma>(LB, UB); |
5802 | // idx += ST; |
5803 | // } |
5804 | // |
5805 | // For static chunk one schedule generate: |
5806 | // |
5807 | // while (IV <= GlobalUB) { |
5808 | // <CodeGen rest of pragma>(LB, UB); |
5809 | // LB += ST; |
5810 | // UB += ST; |
5811 | // UB = min(UB, GlobalUB); |
5812 | // IV = LB; |
5813 | // } |
5814 | // |
5815 | emitCommonSimdLoop( |
5816 | CGF&: *this, S, |
5817 | SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
5818 | if (isOpenMPSimdDirective(DKind: S.getDirectiveKind())) |
5819 | CGF.EmitOMPSimdInit(D: S); |
5820 | }, |
5821 | BodyCodeGen: [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop, |
5822 | StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) { |
5823 | CGF.EmitOMPInnerLoop( |
5824 | S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: Cond, IncExpr, |
5825 | BodyGen: [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { |
5826 | CodeGenLoop(CGF, S, LoopExit); |
5827 | }, |
5828 | PostIncGen: [&S, StaticChunked](CodeGenFunction &CGF) { |
5829 | if (StaticChunked) { |
5830 | CGF.EmitIgnoredExpr(E: S.getCombinedNextLowerBound()); |
5831 | CGF.EmitIgnoredExpr(E: S.getCombinedNextUpperBound()); |
5832 | CGF.EmitIgnoredExpr(E: S.getCombinedEnsureUpperBound()); |
5833 | CGF.EmitIgnoredExpr(E: S.getCombinedInit()); |
5834 | } |
5835 | }); |
5836 | }); |
5837 | EmitBlock(BB: LoopExit.getBlock()); |
5838 | // Tell the runtime we are done. |
5839 | RT.emitForStaticFinish(CGF&: *this, Loc: S.getEndLoc(), DKind: OMPD_distribute); |
5840 | } else { |
5841 | // Emit the outer loop, which requests its work chunk [LB..UB] from |
5842 | // runtime and runs the inner loop to process it. |
5843 | const OMPLoopArguments LoopArguments = { |
5844 | LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), |
5845 | Chunk}; |
5846 | EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArgs: LoopArguments, |
5847 | CodeGenLoopContent: CodeGenLoop); |
5848 | } |
5849 | if (isOpenMPSimdDirective(DKind: S.getDirectiveKind())) { |
5850 | EmitOMPSimdFinal(D: S, CondGen: [IL, &S](CodeGenFunction &CGF) { |
5851 | return CGF.Builder.CreateIsNotNull( |
5852 | Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())); |
5853 | }); |
5854 | } |
5855 | if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()) && |
5856 | !isOpenMPParallelDirective(DKind: S.getDirectiveKind()) && |
5857 | !isOpenMPTeamsDirective(DKind: S.getDirectiveKind())) { |
5858 | EmitOMPReductionClauseFinal(D: S, ReductionKind: OMPD_simd); |
5859 | // Emit post-update of the reduction variables if IsLastIter != 0. |
5860 | emitPostUpdateForReductionClause( |
5861 | CGF&: *this, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) { |
5862 | return CGF.Builder.CreateIsNotNull( |
5863 | Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())); |
5864 | }); |
5865 | } |
5866 | // Emit final copy of the lastprivate variables if IsLastIter != 0. |
5867 | if (HasLastprivateClause) { |
5868 | EmitOMPLastprivateClauseFinal( |
5869 | D: S, /*NoFinals=*/false, |
5870 | IsLastIterCond: Builder.CreateIsNotNull(Arg: EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()))); |
5871 | } |
5872 | } |
5873 | |
5874 | // We're now done with the loop, so jump to the continuation block. |
5875 | if (ContBlock) { |
5876 | EmitBranch(Block: ContBlock); |
5877 | EmitBlock(BB: ContBlock, IsFinished: true); |
5878 | } |
5879 | } |
5880 | } |
5881 | |
5882 | void CodeGenFunction::EmitOMPDistributeDirective( |
5883 | const OMPDistributeDirective &S) { |
5884 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
5885 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc()); |
5886 | }; |
5887 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
5888 | CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_distribute, CodeGen); |
5889 | } |
5890 | |
5891 | static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, |
5892 | const CapturedStmt *S, |
5893 | SourceLocation Loc) { |
5894 | CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); |
5895 | CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; |
5896 | CGF.CapturedStmtInfo = &CapStmtInfo; |
5897 | llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(S: *S, Loc); |
5898 | Fn->setDoesNotRecurse(); |
5899 | return Fn; |
5900 | } |
5901 | |
5902 | template <typename T> |
5903 | static void emitRestoreIP(CodeGenFunction &CGF, const T *C, |
5904 | llvm::OpenMPIRBuilder::InsertPointTy AllocaIP, |
5905 | llvm::OpenMPIRBuilder &OMPBuilder) { |
5906 | |
5907 | unsigned NumLoops = C->getNumLoops(); |
5908 | QualType Int64Ty = CGF.CGM.getContext().getIntTypeForBitwidth( |
5909 | /*DestWidth=*/64, /*Signed=*/1); |
5910 | llvm::SmallVector<llvm::Value *> StoreValues; |
5911 | for (unsigned I = 0; I < NumLoops; I++) { |
5912 | const Expr *CounterVal = C->getLoopData(I); |
5913 | assert(CounterVal); |
5914 | llvm::Value *StoreValue = CGF.EmitScalarConversion( |
5915 | Src: CGF.EmitScalarExpr(E: CounterVal), SrcTy: CounterVal->getType(), DstTy: Int64Ty, |
5916 | Loc: CounterVal->getExprLoc()); |
5917 | StoreValues.emplace_back(Args&: StoreValue); |
5918 | } |
5919 | OMPDoacrossKind<T> ODK; |
5920 | bool IsDependSource = ODK.isSource(C); |
5921 | CGF.Builder.restoreIP( |
5922 | IP: OMPBuilder.createOrderedDepend(Loc: CGF.Builder, AllocaIP, NumLoops, |
5923 | StoreValues, Name: ".cnt.addr" , IsDependSource)); |
5924 | } |
5925 | |
5926 | void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { |
5927 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
5928 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
5929 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
5930 | |
5931 | if (S.hasClausesOfKind<OMPDependClause>() || |
5932 | S.hasClausesOfKind<OMPDoacrossClause>()) { |
5933 | // The ordered directive with depend clause. |
5934 | assert(!S.hasAssociatedStmt() && "No associated statement must be in " |
5935 | "ordered depend|doacross construct." ); |
5936 | InsertPointTy AllocaIP(AllocaInsertPt->getParent(), |
5937 | AllocaInsertPt->getIterator()); |
5938 | for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) |
5939 | emitRestoreIP(CGF&: *this, C: DC, AllocaIP, OMPBuilder); |
5940 | for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>()) |
5941 | emitRestoreIP(CGF&: *this, C: DC, AllocaIP, OMPBuilder); |
5942 | } else { |
5943 | // The ordered directive with threads or simd clause, or without clause. |
5944 | // Without clause, it behaves as if the threads clause is specified. |
5945 | const auto *C = S.getSingleClause<OMPSIMDClause>(); |
5946 | |
5947 | auto FiniCB = [this](InsertPointTy IP) { |
5948 | OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP); |
5949 | }; |
5950 | |
5951 | auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP, |
5952 | InsertPointTy CodeGenIP) { |
5953 | Builder.restoreIP(IP: CodeGenIP); |
5954 | |
5955 | const CapturedStmt *CS = S.getInnermostCapturedStmt(); |
5956 | if (C) { |
5957 | llvm::BasicBlock *FiniBB = splitBBWithSuffix( |
5958 | Builder, /*CreateBranch=*/false, Suffix: ".ordered.after" ); |
5959 | llvm::SmallVector<llvm::Value *, 16> CapturedVars; |
5960 | GenerateOpenMPCapturedVars(S: *CS, CapturedVars); |
5961 | llvm::Function *OutlinedFn = |
5962 | emitOutlinedOrderedFunction(CGM, S: CS, Loc: S.getBeginLoc()); |
5963 | assert(S.getBeginLoc().isValid() && |
5964 | "Outlined function call location must be valid." ); |
5965 | ApplyDebugLocation::CreateDefaultArtificial(CGF&: *this, TemporaryLocation: S.getBeginLoc()); |
5966 | OMPBuilderCBHelpers::EmitCaptureStmt(CGF&: *this, CodeGenIP, FiniBB&: *FiniBB, |
5967 | Fn: OutlinedFn, Args: CapturedVars); |
5968 | } else { |
5969 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
5970 | CGF&: *this, RegionBodyStmt: CS->getCapturedStmt(), AllocaIP, CodeGenIP, RegionName: "ordered" ); |
5971 | } |
5972 | }; |
5973 | |
5974 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
5975 | Builder.restoreIP( |
5976 | IP: OMPBuilder.createOrderedThreadsSimd(Loc: Builder, BodyGenCB, FiniCB, IsThreads: !C)); |
5977 | } |
5978 | return; |
5979 | } |
5980 | |
5981 | if (S.hasClausesOfKind<OMPDependClause>()) { |
5982 | assert(!S.hasAssociatedStmt() && |
5983 | "No associated statement must be in ordered depend construct." ); |
5984 | for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) |
5985 | CGM.getOpenMPRuntime().emitDoacrossOrdered(CGF&: *this, C: DC); |
5986 | return; |
5987 | } |
5988 | if (S.hasClausesOfKind<OMPDoacrossClause>()) { |
5989 | assert(!S.hasAssociatedStmt() && |
5990 | "No associated statement must be in ordered doacross construct." ); |
5991 | for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>()) |
5992 | CGM.getOpenMPRuntime().emitDoacrossOrdered(CGF&: *this, C: DC); |
5993 | return; |
5994 | } |
5995 | const auto *C = S.getSingleClause<OMPSIMDClause>(); |
5996 | auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, |
5997 | PrePostActionTy &Action) { |
5998 | const CapturedStmt *CS = S.getInnermostCapturedStmt(); |
5999 | if (C) { |
6000 | llvm::SmallVector<llvm::Value *, 16> CapturedVars; |
6001 | CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars); |
6002 | llvm::Function *OutlinedFn = |
6003 | emitOutlinedOrderedFunction(CGM, S: CS, Loc: S.getBeginLoc()); |
6004 | CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc: S.getBeginLoc(), |
6005 | OutlinedFn, Args: CapturedVars); |
6006 | } else { |
6007 | Action.Enter(CGF); |
6008 | CGF.EmitStmt(S: CS->getCapturedStmt()); |
6009 | } |
6010 | }; |
6011 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
6012 | CGM.getOpenMPRuntime().emitOrderedRegion(CGF&: *this, OrderedOpGen: CodeGen, Loc: S.getBeginLoc(), IsThreads: !C); |
6013 | } |
6014 | |
6015 | static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, |
6016 | QualType SrcType, QualType DestType, |
6017 | SourceLocation Loc) { |
6018 | assert(CGF.hasScalarEvaluationKind(DestType) && |
6019 | "DestType must have scalar evaluation kind." ); |
6020 | assert(!Val.isAggregate() && "Must be a scalar or complex." ); |
6021 | return Val.isScalar() ? CGF.EmitScalarConversion(Src: Val.getScalarVal(), SrcTy: SrcType, |
6022 | DstTy: DestType, Loc) |
6023 | : CGF.EmitComplexToScalarConversion( |
6024 | Src: Val.getComplexVal(), SrcTy: SrcType, DstTy: DestType, Loc); |
6025 | } |
6026 | |
6027 | static CodeGenFunction::ComplexPairTy |
6028 | convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, |
6029 | QualType DestType, SourceLocation Loc) { |
6030 | assert(CGF.getEvaluationKind(DestType) == TEK_Complex && |
6031 | "DestType must have complex evaluation kind." ); |
6032 | CodeGenFunction::ComplexPairTy ComplexVal; |
6033 | if (Val.isScalar()) { |
6034 | // Convert the input element to the element type of the complex. |
6035 | QualType DestElementType = |
6036 | DestType->castAs<ComplexType>()->getElementType(); |
6037 | llvm::Value *ScalarVal = CGF.EmitScalarConversion( |
6038 | Src: Val.getScalarVal(), SrcTy: SrcType, DstTy: DestElementType, Loc); |
6039 | ComplexVal = CodeGenFunction::ComplexPairTy( |
6040 | ScalarVal, llvm::Constant::getNullValue(Ty: ScalarVal->getType())); |
6041 | } else { |
6042 | assert(Val.isComplex() && "Must be a scalar or complex." ); |
6043 | QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); |
6044 | QualType DestElementType = |
6045 | DestType->castAs<ComplexType>()->getElementType(); |
6046 | ComplexVal.first = CGF.EmitScalarConversion( |
6047 | Src: Val.getComplexVal().first, SrcTy: SrcElementType, DstTy: DestElementType, Loc); |
6048 | ComplexVal.second = CGF.EmitScalarConversion( |
6049 | Src: Val.getComplexVal().second, SrcTy: SrcElementType, DstTy: DestElementType, Loc); |
6050 | } |
6051 | return ComplexVal; |
6052 | } |
6053 | |
6054 | static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO, |
6055 | LValue LVal, RValue RVal) { |
6056 | if (LVal.isGlobalReg()) |
6057 | CGF.EmitStoreThroughGlobalRegLValue(Src: RVal, Dst: LVal); |
6058 | else |
6059 | CGF.EmitAtomicStore(rvalue: RVal, lvalue: LVal, AO, IsVolatile: LVal.isVolatile(), /*isInit=*/false); |
6060 | } |
6061 | |
6062 | static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF, |
6063 | llvm::AtomicOrdering AO, LValue LVal, |
6064 | SourceLocation Loc) { |
6065 | if (LVal.isGlobalReg()) |
6066 | return CGF.EmitLoadOfLValue(V: LVal, Loc); |
6067 | return CGF.EmitAtomicLoad( |
6068 | lvalue: LVal, loc: Loc, AO: llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: AO), |
6069 | IsVolatile: LVal.isVolatile()); |
6070 | } |
6071 | |
6072 | void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, |
6073 | QualType RValTy, SourceLocation Loc) { |
6074 | switch (getEvaluationKind(T: LVal.getType())) { |
6075 | case TEK_Scalar: |
6076 | EmitStoreThroughLValue(Src: RValue::get(V: convertToScalarValue( |
6077 | CGF&: *this, Val: RVal, SrcType: RValTy, DestType: LVal.getType(), Loc)), |
6078 | Dst: LVal); |
6079 | break; |
6080 | case TEK_Complex: |
6081 | EmitStoreOfComplex( |
6082 | V: convertToComplexValue(CGF&: *this, Val: RVal, SrcType: RValTy, DestType: LVal.getType(), Loc), dest: LVal, |
6083 | /*isInit=*/false); |
6084 | break; |
6085 | case TEK_Aggregate: |
6086 | llvm_unreachable("Must be a scalar or complex." ); |
6087 | } |
6088 | } |
6089 | |
6090 | static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO, |
6091 | const Expr *X, const Expr *V, |
6092 | SourceLocation Loc) { |
6093 | // v = x; |
6094 | assert(V->isLValue() && "V of 'omp atomic read' is not lvalue" ); |
6095 | assert(X->isLValue() && "X of 'omp atomic read' is not lvalue" ); |
6096 | LValue XLValue = CGF.EmitLValue(E: X); |
6097 | LValue VLValue = CGF.EmitLValue(E: V); |
6098 | RValue Res = emitSimpleAtomicLoad(CGF, AO, LVal: XLValue, Loc); |
6099 | // OpenMP, 2.17.7, atomic Construct |
6100 | // If the read or capture clause is specified and the acquire, acq_rel, or |
6101 | // seq_cst clause is specified then the strong flush on exit from the atomic |
6102 | // operation is also an acquire flush. |
6103 | switch (AO) { |
6104 | case llvm::AtomicOrdering::Acquire: |
6105 | case llvm::AtomicOrdering::AcquireRelease: |
6106 | case llvm::AtomicOrdering::SequentiallyConsistent: |
6107 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: std::nullopt, Loc, |
6108 | AO: llvm::AtomicOrdering::Acquire); |
6109 | break; |
6110 | case llvm::AtomicOrdering::Monotonic: |
6111 | case llvm::AtomicOrdering::Release: |
6112 | break; |
6113 | case llvm::AtomicOrdering::NotAtomic: |
6114 | case llvm::AtomicOrdering::Unordered: |
6115 | llvm_unreachable("Unexpected ordering." ); |
6116 | } |
6117 | CGF.emitOMPSimpleStore(LVal: VLValue, RVal: Res, RValTy: X->getType().getNonReferenceType(), Loc); |
6118 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: V); |
6119 | } |
6120 | |
6121 | static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, |
6122 | llvm::AtomicOrdering AO, const Expr *X, |
6123 | const Expr *E, SourceLocation Loc) { |
6124 | // x = expr; |
6125 | assert(X->isLValue() && "X of 'omp atomic write' is not lvalue" ); |
6126 | emitSimpleAtomicStore(CGF, AO, LVal: CGF.EmitLValue(E: X), RVal: CGF.EmitAnyExpr(E)); |
6127 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X); |
6128 | // OpenMP, 2.17.7, atomic Construct |
6129 | // If the write, update, or capture clause is specified and the release, |
6130 | // acq_rel, or seq_cst clause is specified then the strong flush on entry to |
6131 | // the atomic operation is also a release flush. |
6132 | switch (AO) { |
6133 | case llvm::AtomicOrdering::Release: |
6134 | case llvm::AtomicOrdering::AcquireRelease: |
6135 | case llvm::AtomicOrdering::SequentiallyConsistent: |
6136 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: std::nullopt, Loc, |
6137 | AO: llvm::AtomicOrdering::Release); |
6138 | break; |
6139 | case llvm::AtomicOrdering::Acquire: |
6140 | case llvm::AtomicOrdering::Monotonic: |
6141 | break; |
6142 | case llvm::AtomicOrdering::NotAtomic: |
6143 | case llvm::AtomicOrdering::Unordered: |
6144 | llvm_unreachable("Unexpected ordering." ); |
6145 | } |
6146 | } |
6147 | |
6148 | static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, |
6149 | RValue Update, |
6150 | BinaryOperatorKind BO, |
6151 | llvm::AtomicOrdering AO, |
6152 | bool IsXLHSInRHSPart) { |
6153 | ASTContext &Context = CGF.getContext(); |
6154 | // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' |
6155 | // expression is simple and atomic is allowed for the given type for the |
6156 | // target platform. |
6157 | if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() || |
6158 | (!isa<llvm::ConstantInt>(Val: Update.getScalarVal()) && |
6159 | (Update.getScalarVal()->getType() != X.getAddress().getElementType())) || |
6160 | !Context.getTargetInfo().hasBuiltinAtomic( |
6161 | AtomicSizeInBits: Context.getTypeSize(T: X.getType()), AlignmentInBits: Context.toBits(CharSize: X.getAlignment()))) |
6162 | return std::make_pair(x: false, y: RValue::get(V: nullptr)); |
6163 | |
6164 | auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) { |
6165 | if (T->isIntegerTy()) |
6166 | return true; |
6167 | |
6168 | if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub)) |
6169 | return llvm::isPowerOf2_64(Value: CGF.CGM.getDataLayout().getTypeStoreSize(Ty: T)); |
6170 | |
6171 | return false; |
6172 | }; |
6173 | |
6174 | if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) || |
6175 | !CheckAtomicSupport(X.getAddress().getElementType(), BO)) |
6176 | return std::make_pair(x: false, y: RValue::get(V: nullptr)); |
6177 | |
6178 | bool IsInteger = X.getAddress().getElementType()->isIntegerTy(); |
6179 | llvm::AtomicRMWInst::BinOp RMWOp; |
6180 | switch (BO) { |
6181 | case BO_Add: |
6182 | RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd; |
6183 | break; |
6184 | case BO_Sub: |
6185 | if (!IsXLHSInRHSPart) |
6186 | return std::make_pair(x: false, y: RValue::get(V: nullptr)); |
6187 | RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub; |
6188 | break; |
6189 | case BO_And: |
6190 | RMWOp = llvm::AtomicRMWInst::And; |
6191 | break; |
6192 | case BO_Or: |
6193 | RMWOp = llvm::AtomicRMWInst::Or; |
6194 | break; |
6195 | case BO_Xor: |
6196 | RMWOp = llvm::AtomicRMWInst::Xor; |
6197 | break; |
6198 | case BO_LT: |
6199 | if (IsInteger) |
6200 | RMWOp = X.getType()->hasSignedIntegerRepresentation() |
6201 | ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min |
6202 | : llvm::AtomicRMWInst::Max) |
6203 | : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin |
6204 | : llvm::AtomicRMWInst::UMax); |
6205 | else |
6206 | RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMin |
6207 | : llvm::AtomicRMWInst::FMax; |
6208 | break; |
6209 | case BO_GT: |
6210 | if (IsInteger) |
6211 | RMWOp = X.getType()->hasSignedIntegerRepresentation() |
6212 | ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max |
6213 | : llvm::AtomicRMWInst::Min) |
6214 | : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax |
6215 | : llvm::AtomicRMWInst::UMin); |
6216 | else |
6217 | RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMax |
6218 | : llvm::AtomicRMWInst::FMin; |
6219 | break; |
6220 | case BO_Assign: |
6221 | RMWOp = llvm::AtomicRMWInst::Xchg; |
6222 | break; |
6223 | case BO_Mul: |
6224 | case BO_Div: |
6225 | case BO_Rem: |
6226 | case BO_Shl: |
6227 | case BO_Shr: |
6228 | case BO_LAnd: |
6229 | case BO_LOr: |
6230 | return std::make_pair(x: false, y: RValue::get(V: nullptr)); |
6231 | case BO_PtrMemD: |
6232 | case BO_PtrMemI: |
6233 | case BO_LE: |
6234 | case BO_GE: |
6235 | case BO_EQ: |
6236 | case BO_NE: |
6237 | case BO_Cmp: |
6238 | case BO_AddAssign: |
6239 | case BO_SubAssign: |
6240 | case BO_AndAssign: |
6241 | case BO_OrAssign: |
6242 | case BO_XorAssign: |
6243 | case BO_MulAssign: |
6244 | case BO_DivAssign: |
6245 | case BO_RemAssign: |
6246 | case BO_ShlAssign: |
6247 | case BO_ShrAssign: |
6248 | case BO_Comma: |
6249 | llvm_unreachable("Unsupported atomic update operation" ); |
6250 | } |
6251 | llvm::Value *UpdateVal = Update.getScalarVal(); |
6252 | if (auto *IC = dyn_cast<llvm::ConstantInt>(Val: UpdateVal)) { |
6253 | if (IsInteger) |
6254 | UpdateVal = CGF.Builder.CreateIntCast( |
6255 | V: IC, DestTy: X.getAddress().getElementType(), |
6256 | isSigned: X.getType()->hasSignedIntegerRepresentation()); |
6257 | else |
6258 | UpdateVal = CGF.Builder.CreateCast(Op: llvm::Instruction::CastOps::UIToFP, V: IC, |
6259 | DestTy: X.getAddress().getElementType()); |
6260 | } |
6261 | llvm::Value *Res = |
6262 | CGF.Builder.CreateAtomicRMW(Op: RMWOp, Addr: X.getAddress(), Val: UpdateVal, Ordering: AO); |
6263 | return std::make_pair(x: true, y: RValue::get(V: Res)); |
6264 | } |
6265 | |
6266 | std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( |
6267 | LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, |
6268 | llvm::AtomicOrdering AO, SourceLocation Loc, |
6269 | const llvm::function_ref<RValue(RValue)> CommonGen) { |
6270 | // Update expressions are allowed to have the following forms: |
6271 | // x binop= expr; -> xrval + expr; |
6272 | // x++, ++x -> xrval + 1; |
6273 | // x--, --x -> xrval - 1; |
6274 | // x = x binop expr; -> xrval binop expr |
6275 | // x = expr Op x; - > expr binop xrval; |
6276 | auto Res = emitOMPAtomicRMW(CGF&: *this, X, Update: E, BO, AO, IsXLHSInRHSPart); |
6277 | if (!Res.first) { |
6278 | if (X.isGlobalReg()) { |
6279 | // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop |
6280 | // 'xrval'. |
6281 | EmitStoreThroughLValue(Src: CommonGen(EmitLoadOfLValue(V: X, Loc)), Dst: X); |
6282 | } else { |
6283 | // Perform compare-and-swap procedure. |
6284 | EmitAtomicUpdate(LVal: X, AO, UpdateOp: CommonGen, IsVolatile: X.getType().isVolatileQualified()); |
6285 | } |
6286 | } |
6287 | return Res; |
6288 | } |
6289 | |
6290 | static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, |
6291 | llvm::AtomicOrdering AO, const Expr *X, |
6292 | const Expr *E, const Expr *UE, |
6293 | bool IsXLHSInRHSPart, SourceLocation Loc) { |
6294 | assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && |
6295 | "Update expr in 'atomic update' must be a binary operator." ); |
6296 | const auto *BOUE = cast<BinaryOperator>(Val: UE->IgnoreImpCasts()); |
6297 | // Update expressions are allowed to have the following forms: |
6298 | // x binop= expr; -> xrval + expr; |
6299 | // x++, ++x -> xrval + 1; |
6300 | // x--, --x -> xrval - 1; |
6301 | // x = x binop expr; -> xrval binop expr |
6302 | // x = expr Op x; - > expr binop xrval; |
6303 | assert(X->isLValue() && "X of 'omp atomic update' is not lvalue" ); |
6304 | LValue XLValue = CGF.EmitLValue(E: X); |
6305 | RValue ExprRValue = CGF.EmitAnyExpr(E); |
6306 | const auto *LHS = cast<OpaqueValueExpr>(Val: BOUE->getLHS()->IgnoreImpCasts()); |
6307 | const auto *RHS = cast<OpaqueValueExpr>(Val: BOUE->getRHS()->IgnoreImpCasts()); |
6308 | const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; |
6309 | const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; |
6310 | auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) { |
6311 | CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); |
6312 | CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); |
6313 | return CGF.EmitAnyExpr(E: UE); |
6314 | }; |
6315 | (void)CGF.EmitOMPAtomicSimpleUpdateExpr( |
6316 | X: XLValue, E: ExprRValue, BO: BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, CommonGen: Gen); |
6317 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X); |
6318 | // OpenMP, 2.17.7, atomic Construct |
6319 | // If the write, update, or capture clause is specified and the release, |
6320 | // acq_rel, or seq_cst clause is specified then the strong flush on entry to |
6321 | // the atomic operation is also a release flush. |
6322 | switch (AO) { |
6323 | case llvm::AtomicOrdering::Release: |
6324 | case llvm::AtomicOrdering::AcquireRelease: |
6325 | case llvm::AtomicOrdering::SequentiallyConsistent: |
6326 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: std::nullopt, Loc, |
6327 | AO: llvm::AtomicOrdering::Release); |
6328 | break; |
6329 | case llvm::AtomicOrdering::Acquire: |
6330 | case llvm::AtomicOrdering::Monotonic: |
6331 | break; |
6332 | case llvm::AtomicOrdering::NotAtomic: |
6333 | case llvm::AtomicOrdering::Unordered: |
6334 | llvm_unreachable("Unexpected ordering." ); |
6335 | } |
6336 | } |
6337 | |
6338 | static RValue convertToType(CodeGenFunction &CGF, RValue Value, |
6339 | QualType SourceType, QualType ResType, |
6340 | SourceLocation Loc) { |
6341 | switch (CGF.getEvaluationKind(T: ResType)) { |
6342 | case TEK_Scalar: |
6343 | return RValue::get( |
6344 | V: convertToScalarValue(CGF, Val: Value, SrcType: SourceType, DestType: ResType, Loc)); |
6345 | case TEK_Complex: { |
6346 | auto Res = convertToComplexValue(CGF, Val: Value, SrcType: SourceType, DestType: ResType, Loc); |
6347 | return RValue::getComplex(V1: Res.first, V2: Res.second); |
6348 | } |
6349 | case TEK_Aggregate: |
6350 | break; |
6351 | } |
6352 | llvm_unreachable("Must be a scalar or complex." ); |
6353 | } |
6354 | |
6355 | static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, |
6356 | llvm::AtomicOrdering AO, |
6357 | bool IsPostfixUpdate, const Expr *V, |
6358 | const Expr *X, const Expr *E, |
6359 | const Expr *UE, bool IsXLHSInRHSPart, |
6360 | SourceLocation Loc) { |
6361 | assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue" ); |
6362 | assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue" ); |
6363 | RValue NewVVal; |
6364 | LValue VLValue = CGF.EmitLValue(E: V); |
6365 | LValue XLValue = CGF.EmitLValue(E: X); |
6366 | RValue ExprRValue = CGF.EmitAnyExpr(E); |
6367 | QualType NewVValType; |
6368 | if (UE) { |
6369 | // 'x' is updated with some additional value. |
6370 | assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && |
6371 | "Update expr in 'atomic capture' must be a binary operator." ); |
6372 | const auto *BOUE = cast<BinaryOperator>(Val: UE->IgnoreImpCasts()); |
6373 | // Update expressions are allowed to have the following forms: |
6374 | // x binop= expr; -> xrval + expr; |
6375 | // x++, ++x -> xrval + 1; |
6376 | // x--, --x -> xrval - 1; |
6377 | // x = x binop expr; -> xrval binop expr |
6378 | // x = expr Op x; - > expr binop xrval; |
6379 | const auto *LHS = cast<OpaqueValueExpr>(Val: BOUE->getLHS()->IgnoreImpCasts()); |
6380 | const auto *RHS = cast<OpaqueValueExpr>(Val: BOUE->getRHS()->IgnoreImpCasts()); |
6381 | const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; |
6382 | NewVValType = XRValExpr->getType(); |
6383 | const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; |
6384 | auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, |
6385 | IsPostfixUpdate](RValue XRValue) { |
6386 | CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); |
6387 | CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); |
6388 | RValue Res = CGF.EmitAnyExpr(E: UE); |
6389 | NewVVal = IsPostfixUpdate ? XRValue : Res; |
6390 | return Res; |
6391 | }; |
6392 | auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( |
6393 | X: XLValue, E: ExprRValue, BO: BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, CommonGen: Gen); |
6394 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X); |
6395 | if (Res.first) { |
6396 | // 'atomicrmw' instruction was generated. |
6397 | if (IsPostfixUpdate) { |
6398 | // Use old value from 'atomicrmw'. |
6399 | NewVVal = Res.second; |
6400 | } else { |
6401 | // 'atomicrmw' does not provide new value, so evaluate it using old |
6402 | // value of 'x'. |
6403 | CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); |
6404 | CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); |
6405 | NewVVal = CGF.EmitAnyExpr(E: UE); |
6406 | } |
6407 | } |
6408 | } else { |
6409 | // 'x' is simply rewritten with some 'expr'. |
6410 | NewVValType = X->getType().getNonReferenceType(); |
6411 | ExprRValue = convertToType(CGF, Value: ExprRValue, SourceType: E->getType(), |
6412 | ResType: X->getType().getNonReferenceType(), Loc); |
6413 | auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) { |
6414 | NewVVal = XRValue; |
6415 | return ExprRValue; |
6416 | }; |
6417 | // Try to perform atomicrmw xchg, otherwise simple exchange. |
6418 | auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( |
6419 | X: XLValue, E: ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, |
6420 | Loc, CommonGen: Gen); |
6421 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X); |
6422 | if (Res.first) { |
6423 | // 'atomicrmw' instruction was generated. |
6424 | NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; |
6425 | } |
6426 | } |
6427 | // Emit post-update store to 'v' of old/new 'x' value. |
6428 | CGF.emitOMPSimpleStore(LVal: VLValue, RVal: NewVVal, RValTy: NewVValType, Loc); |
6429 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: V); |
6430 | // OpenMP 5.1 removes the required flush for capture clause. |
6431 | if (CGF.CGM.getLangOpts().OpenMP < 51) { |
6432 | // OpenMP, 2.17.7, atomic Construct |
6433 | // If the write, update, or capture clause is specified and the release, |
6434 | // acq_rel, or seq_cst clause is specified then the strong flush on entry to |
6435 | // the atomic operation is also a release flush. |
6436 | // If the read or capture clause is specified and the acquire, acq_rel, or |
6437 | // seq_cst clause is specified then the strong flush on exit from the atomic |
6438 | // operation is also an acquire flush. |
6439 | switch (AO) { |
6440 | case llvm::AtomicOrdering::Release: |
6441 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: std::nullopt, Loc, |
6442 | AO: llvm::AtomicOrdering::Release); |
6443 | break; |
6444 | case llvm::AtomicOrdering::Acquire: |
6445 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: std::nullopt, Loc, |
6446 | AO: llvm::AtomicOrdering::Acquire); |
6447 | break; |
6448 | case llvm::AtomicOrdering::AcquireRelease: |
6449 | case llvm::AtomicOrdering::SequentiallyConsistent: |
6450 | CGF.CGM.getOpenMPRuntime().emitFlush( |
6451 | CGF, Vars: std::nullopt, Loc, AO: llvm::AtomicOrdering::AcquireRelease); |
6452 | break; |
6453 | case llvm::AtomicOrdering::Monotonic: |
6454 | break; |
6455 | case llvm::AtomicOrdering::NotAtomic: |
6456 | case llvm::AtomicOrdering::Unordered: |
6457 | llvm_unreachable("Unexpected ordering." ); |
6458 | } |
6459 | } |
6460 | } |
6461 | |
6462 | static void emitOMPAtomicCompareExpr( |
6463 | CodeGenFunction &CGF, llvm::AtomicOrdering AO, llvm::AtomicOrdering FailAO, |
6464 | const Expr *X, const Expr *V, const Expr *R, const Expr *E, const Expr *D, |
6465 | const Expr *CE, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly, |
6466 | SourceLocation Loc) { |
6467 | llvm::OpenMPIRBuilder &OMPBuilder = |
6468 | CGF.CGM.getOpenMPRuntime().getOMPBuilder(); |
6469 | |
6470 | OMPAtomicCompareOp Op; |
6471 | assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator" ); |
6472 | switch (cast<BinaryOperator>(Val: CE)->getOpcode()) { |
6473 | case BO_EQ: |
6474 | Op = OMPAtomicCompareOp::EQ; |
6475 | break; |
6476 | case BO_LT: |
6477 | Op = OMPAtomicCompareOp::MIN; |
6478 | break; |
6479 | case BO_GT: |
6480 | Op = OMPAtomicCompareOp::MAX; |
6481 | break; |
6482 | default: |
6483 | llvm_unreachable("unsupported atomic compare binary operator" ); |
6484 | } |
6485 | |
6486 | LValue XLVal = CGF.EmitLValue(E: X); |
6487 | Address XAddr = XLVal.getAddress(); |
6488 | |
6489 | auto EmitRValueWithCastIfNeeded = [&CGF, Loc](const Expr *X, const Expr *E) { |
6490 | if (X->getType() == E->getType()) |
6491 | return CGF.EmitScalarExpr(E); |
6492 | const Expr *NewE = E->IgnoreImplicitAsWritten(); |
6493 | llvm::Value *V = CGF.EmitScalarExpr(E: NewE); |
6494 | if (NewE->getType() == X->getType()) |
6495 | return V; |
6496 | return CGF.EmitScalarConversion(Src: V, SrcTy: NewE->getType(), DstTy: X->getType(), Loc); |
6497 | }; |
6498 | |
6499 | llvm::Value *EVal = EmitRValueWithCastIfNeeded(X, E); |
6500 | llvm::Value *DVal = D ? EmitRValueWithCastIfNeeded(X, D) : nullptr; |
6501 | if (auto *CI = dyn_cast<llvm::ConstantInt>(Val: EVal)) |
6502 | EVal = CGF.Builder.CreateIntCast( |
6503 | V: CI, DestTy: XLVal.getAddress().getElementType(), |
6504 | isSigned: E->getType()->hasSignedIntegerRepresentation()); |
6505 | if (DVal) |
6506 | if (auto *CI = dyn_cast<llvm::ConstantInt>(Val: DVal)) |
6507 | DVal = CGF.Builder.CreateIntCast( |
6508 | V: CI, DestTy: XLVal.getAddress().getElementType(), |
6509 | isSigned: D->getType()->hasSignedIntegerRepresentation()); |
6510 | |
6511 | llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{ |
6512 | .Var: XAddr.emitRawPointer(CGF), .ElemTy: XAddr.getElementType(), |
6513 | .IsSigned: X->getType()->hasSignedIntegerRepresentation(), |
6514 | .IsVolatile: X->getType().isVolatileQualified()}; |
6515 | llvm::OpenMPIRBuilder::AtomicOpValue VOpVal, ROpVal; |
6516 | if (V) { |
6517 | LValue LV = CGF.EmitLValue(E: V); |
6518 | Address Addr = LV.getAddress(); |
6519 | VOpVal = {.Var: Addr.emitRawPointer(CGF), .ElemTy: Addr.getElementType(), |
6520 | .IsSigned: V->getType()->hasSignedIntegerRepresentation(), |
6521 | .IsVolatile: V->getType().isVolatileQualified()}; |
6522 | } |
6523 | if (R) { |
6524 | LValue LV = CGF.EmitLValue(E: R); |
6525 | Address Addr = LV.getAddress(); |
6526 | ROpVal = {.Var: Addr.emitRawPointer(CGF), .ElemTy: Addr.getElementType(), |
6527 | .IsSigned: R->getType()->hasSignedIntegerRepresentation(), |
6528 | .IsVolatile: R->getType().isVolatileQualified()}; |
6529 | } |
6530 | |
6531 | if (FailAO == llvm::AtomicOrdering::NotAtomic) { |
6532 | // fail clause was not mentioned on the |
6533 | // "#pragma omp atomic compare" construct. |
6534 | CGF.Builder.restoreIP(IP: OMPBuilder.createAtomicCompare( |
6535 | Loc: CGF.Builder, X&: XOpVal, V&: VOpVal, R&: ROpVal, E: EVal, D: DVal, AO, Op, IsXBinopExpr, |
6536 | IsPostfixUpdate, IsFailOnly)); |
6537 | } else |
6538 | CGF.Builder.restoreIP(IP: OMPBuilder.createAtomicCompare( |
6539 | Loc: CGF.Builder, X&: XOpVal, V&: VOpVal, R&: ROpVal, E: EVal, D: DVal, AO, Op, IsXBinopExpr, |
6540 | IsPostfixUpdate, IsFailOnly, Failure: FailAO)); |
6541 | } |
6542 | |
6543 | static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, |
6544 | llvm::AtomicOrdering AO, |
6545 | llvm::AtomicOrdering FailAO, bool IsPostfixUpdate, |
6546 | const Expr *X, const Expr *V, const Expr *R, |
6547 | const Expr *E, const Expr *UE, const Expr *D, |
6548 | const Expr *CE, bool IsXLHSInRHSPart, |
6549 | bool IsFailOnly, SourceLocation Loc) { |
6550 | switch (Kind) { |
6551 | case OMPC_read: |
6552 | emitOMPAtomicReadExpr(CGF, AO, X, V, Loc); |
6553 | break; |
6554 | case OMPC_write: |
6555 | emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc); |
6556 | break; |
6557 | case OMPC_unknown: |
6558 | case OMPC_update: |
6559 | emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc); |
6560 | break; |
6561 | case OMPC_capture: |
6562 | emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE, |
6563 | IsXLHSInRHSPart, Loc); |
6564 | break; |
6565 | case OMPC_compare: { |
6566 | emitOMPAtomicCompareExpr(CGF, AO, FailAO, X, V, R, E, D, CE, |
6567 | IsXBinopExpr: IsXLHSInRHSPart, IsPostfixUpdate, IsFailOnly, Loc); |
6568 | break; |
6569 | } |
6570 | default: |
6571 | llvm_unreachable("Clause is not allowed in 'omp atomic'." ); |
6572 | } |
6573 | } |
6574 | |
6575 | void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { |
6576 | llvm::AtomicOrdering AO = CGM.getOpenMPRuntime().getDefaultMemoryOrdering(); |
6577 | // Fail Memory Clause Ordering. |
6578 | llvm::AtomicOrdering FailAO = llvm::AtomicOrdering::NotAtomic; |
6579 | bool MemOrderingSpecified = false; |
6580 | if (S.getSingleClause<OMPSeqCstClause>()) { |
6581 | AO = llvm::AtomicOrdering::SequentiallyConsistent; |
6582 | MemOrderingSpecified = true; |
6583 | } else if (S.getSingleClause<OMPAcqRelClause>()) { |
6584 | AO = llvm::AtomicOrdering::AcquireRelease; |
6585 | MemOrderingSpecified = true; |
6586 | } else if (S.getSingleClause<OMPAcquireClause>()) { |
6587 | AO = llvm::AtomicOrdering::Acquire; |
6588 | MemOrderingSpecified = true; |
6589 | } else if (S.getSingleClause<OMPReleaseClause>()) { |
6590 | AO = llvm::AtomicOrdering::Release; |
6591 | MemOrderingSpecified = true; |
6592 | } else if (S.getSingleClause<OMPRelaxedClause>()) { |
6593 | AO = llvm::AtomicOrdering::Monotonic; |
6594 | MemOrderingSpecified = true; |
6595 | } |
6596 | llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered; |
6597 | OpenMPClauseKind Kind = OMPC_unknown; |
6598 | for (const OMPClause *C : S.clauses()) { |
6599 | // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause, |
6600 | // if it is first). |
6601 | OpenMPClauseKind K = C->getClauseKind(); |
6602 | // TBD |
6603 | if (K == OMPC_weak) |
6604 | return; |
6605 | if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire || |
6606 | K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint) |
6607 | continue; |
6608 | Kind = K; |
6609 | KindsEncountered.insert(V: K); |
6610 | } |
6611 | // We just need to correct Kind here. No need to set a bool saying it is |
6612 | // actually compare capture because we can tell from whether V and R are |
6613 | // nullptr. |
6614 | if (KindsEncountered.contains(V: OMPC_compare) && |
6615 | KindsEncountered.contains(V: OMPC_capture)) |
6616 | Kind = OMPC_compare; |
6617 | if (!MemOrderingSpecified) { |
6618 | llvm::AtomicOrdering DefaultOrder = |
6619 | CGM.getOpenMPRuntime().getDefaultMemoryOrdering(); |
6620 | if (DefaultOrder == llvm::AtomicOrdering::Monotonic || |
6621 | DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent || |
6622 | (DefaultOrder == llvm::AtomicOrdering::AcquireRelease && |
6623 | Kind == OMPC_capture)) { |
6624 | AO = DefaultOrder; |
6625 | } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) { |
6626 | if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) { |
6627 | AO = llvm::AtomicOrdering::Release; |
6628 | } else if (Kind == OMPC_read) { |
6629 | assert(Kind == OMPC_read && "Unexpected atomic kind." ); |
6630 | AO = llvm::AtomicOrdering::Acquire; |
6631 | } |
6632 | } |
6633 | } |
6634 | |
6635 | if (KindsEncountered.contains(V: OMPC_compare) && |
6636 | KindsEncountered.contains(V: OMPC_fail)) { |
6637 | Kind = OMPC_compare; |
6638 | const auto *FailClause = S.getSingleClause<OMPFailClause>(); |
6639 | if (FailClause) { |
6640 | OpenMPClauseKind FailParameter = FailClause->getFailParameter(); |
6641 | if (FailParameter == llvm::omp::OMPC_relaxed) |
6642 | FailAO = llvm::AtomicOrdering::Monotonic; |
6643 | else if (FailParameter == llvm::omp::OMPC_acquire) |
6644 | FailAO = llvm::AtomicOrdering::Acquire; |
6645 | else if (FailParameter == llvm::omp::OMPC_seq_cst) |
6646 | FailAO = llvm::AtomicOrdering::SequentiallyConsistent; |
6647 | } |
6648 | } |
6649 | |
6650 | LexicalScope Scope(*this, S.getSourceRange()); |
6651 | EmitStopPoint(S: S.getAssociatedStmt()); |
6652 | emitOMPAtomicExpr(CGF&: *this, Kind, AO, FailAO, IsPostfixUpdate: S.isPostfixUpdate(), X: S.getX(), |
6653 | V: S.getV(), R: S.getR(), E: S.getExpr(), UE: S.getUpdateExpr(), |
6654 | D: S.getD(), CE: S.getCondExpr(), IsXLHSInRHSPart: S.isXLHSInRHSPart(), |
6655 | IsFailOnly: S.isFailOnly(), Loc: S.getBeginLoc()); |
6656 | } |
6657 | |
6658 | static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, |
6659 | const OMPExecutableDirective &S, |
6660 | const RegionCodeGenTy &CodeGen) { |
6661 | assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); |
6662 | CodeGenModule &CGM = CGF.CGM; |
6663 | |
6664 | // On device emit this construct as inlined code. |
6665 | if (CGM.getLangOpts().OpenMPIsTargetDevice) { |
6666 | OMPLexicalScope Scope(CGF, S, OMPD_target); |
6667 | CGM.getOpenMPRuntime().emitInlinedDirective( |
6668 | CGF, InnermostKind: OMPD_target, CodeGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6669 | CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt()); |
6670 | }); |
6671 | return; |
6672 | } |
6673 | |
6674 | auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); |
6675 | llvm::Function *Fn = nullptr; |
6676 | llvm::Constant *FnID = nullptr; |
6677 | |
6678 | const Expr *IfCond = nullptr; |
6679 | // Check for the at most one if clause associated with the target region. |
6680 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
6681 | if (C->getNameModifier() == OMPD_unknown || |
6682 | C->getNameModifier() == OMPD_target) { |
6683 | IfCond = C->getCondition(); |
6684 | break; |
6685 | } |
6686 | } |
6687 | |
6688 | // Check if we have any device clause associated with the directive. |
6689 | llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device( |
6690 | nullptr, OMPC_DEVICE_unknown); |
6691 | if (auto *C = S.getSingleClause<OMPDeviceClause>()) |
6692 | Device.setPointerAndInt(PtrVal: C->getDevice(), IntVal: C->getModifier()); |
6693 | |
6694 | // Check if we have an if clause whose conditional always evaluates to false |
6695 | // or if we do not have any targets specified. If so the target region is not |
6696 | // an offload entry point. |
6697 | bool IsOffloadEntry = true; |
6698 | if (IfCond) { |
6699 | bool Val; |
6700 | if (CGF.ConstantFoldsToSimpleInteger(Cond: IfCond, Result&: Val) && !Val) |
6701 | IsOffloadEntry = false; |
6702 | } |
6703 | if (CGM.getLangOpts().OMPTargetTriples.empty()) |
6704 | IsOffloadEntry = false; |
6705 | |
6706 | if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) { |
6707 | unsigned DiagID = CGM.getDiags().getCustomDiagID( |
6708 | L: DiagnosticsEngine::Error, |
6709 | FormatString: "No offloading entry generated while offloading is mandatory." ); |
6710 | CGM.getDiags().Report(DiagID); |
6711 | } |
6712 | |
6713 | assert(CGF.CurFuncDecl && "No parent declaration for target region!" ); |
6714 | StringRef ParentName; |
6715 | // In case we have Ctors/Dtors we use the complete type variant to produce |
6716 | // the mangling of the device outlined kernel. |
6717 | if (const auto *D = dyn_cast<CXXConstructorDecl>(Val: CGF.CurFuncDecl)) |
6718 | ParentName = CGM.getMangledName(GD: GlobalDecl(D, Ctor_Complete)); |
6719 | else if (const auto *D = dyn_cast<CXXDestructorDecl>(Val: CGF.CurFuncDecl)) |
6720 | ParentName = CGM.getMangledName(GD: GlobalDecl(D, Dtor_Complete)); |
6721 | else |
6722 | ParentName = |
6723 | CGM.getMangledName(GD: GlobalDecl(cast<FunctionDecl>(Val: CGF.CurFuncDecl))); |
6724 | |
6725 | // Emit target region as a standalone region. |
6726 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction(D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: FnID, |
6727 | IsOffloadEntry, CodeGen); |
6728 | OMPLexicalScope Scope(CGF, S, OMPD_task); |
6729 | auto &&SizeEmitter = |
6730 | [IsOffloadEntry](CodeGenFunction &CGF, |
6731 | const OMPLoopDirective &D) -> llvm::Value * { |
6732 | if (IsOffloadEntry) { |
6733 | OMPLoopScope(CGF, D); |
6734 | // Emit calculation of the iterations count. |
6735 | llvm::Value *NumIterations = CGF.EmitScalarExpr(E: D.getNumIterations()); |
6736 | NumIterations = CGF.Builder.CreateIntCast(V: NumIterations, DestTy: CGF.Int64Ty, |
6737 | /*isSigned=*/false); |
6738 | return NumIterations; |
6739 | } |
6740 | return nullptr; |
6741 | }; |
6742 | CGM.getOpenMPRuntime().emitTargetCall(CGF, D: S, OutlinedFn: Fn, OutlinedFnID: FnID, IfCond, Device, |
6743 | SizeEmitter); |
6744 | } |
6745 | |
6746 | static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, |
6747 | PrePostActionTy &Action) { |
6748 | Action.Enter(CGF); |
6749 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
6750 | (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope); |
6751 | CGF.EmitOMPPrivateClause(D: S, PrivateScope); |
6752 | (void)PrivateScope.Privatize(); |
6753 | if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind())) |
6754 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S); |
6755 | |
6756 | CGF.EmitStmt(S: S.getCapturedStmt(RegionKind: OMPD_target)->getCapturedStmt()); |
6757 | CGF.EnsureInsertPoint(); |
6758 | } |
6759 | |
6760 | void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, |
6761 | StringRef ParentName, |
6762 | const OMPTargetDirective &S) { |
6763 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6764 | emitTargetRegion(CGF, S, Action); |
6765 | }; |
6766 | llvm::Function *Fn; |
6767 | llvm::Constant *Addr; |
6768 | // Emit target region as a standalone region. |
6769 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
6770 | D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen); |
6771 | assert(Fn && Addr && "Target device function emission failed." ); |
6772 | } |
6773 | |
6774 | void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { |
6775 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6776 | emitTargetRegion(CGF, S, Action); |
6777 | }; |
6778 | emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen); |
6779 | } |
6780 | |
6781 | static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, |
6782 | const OMPExecutableDirective &S, |
6783 | OpenMPDirectiveKind InnermostKind, |
6784 | const RegionCodeGenTy &CodeGen) { |
6785 | const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_teams); |
6786 | llvm::Function *OutlinedFn = |
6787 | CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( |
6788 | CGF, D: S, ThreadIDVar: *CS->getCapturedDecl()->param_begin(), InnermostKind, |
6789 | CodeGen); |
6790 | |
6791 | const auto *NT = S.getSingleClause<OMPNumTeamsClause>(); |
6792 | const auto *TL = S.getSingleClause<OMPThreadLimitClause>(); |
6793 | if (NT || TL) { |
6794 | const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr; |
6795 | const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr; |
6796 | |
6797 | CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, |
6798 | Loc: S.getBeginLoc()); |
6799 | } |
6800 | |
6801 | OMPTeamsScope Scope(CGF, S); |
6802 | llvm::SmallVector<llvm::Value *, 16> CapturedVars; |
6803 | CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars); |
6804 | CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, D: S, Loc: S.getBeginLoc(), OutlinedFn, |
6805 | CapturedVars); |
6806 | } |
6807 | |
6808 | void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { |
6809 | // Emit teams region as a standalone region. |
6810 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6811 | Action.Enter(CGF); |
6812 | OMPPrivateScope PrivateScope(CGF); |
6813 | (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope); |
6814 | CGF.EmitOMPPrivateClause(D: S, PrivateScope); |
6815 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
6816 | (void)PrivateScope.Privatize(); |
6817 | CGF.EmitStmt(S: S.getCapturedStmt(RegionKind: OMPD_teams)->getCapturedStmt()); |
6818 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
6819 | }; |
6820 | emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute, CodeGen); |
6821 | emitPostUpdateForReductionClause(CGF&: *this, D: S, |
6822 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
6823 | } |
6824 | |
6825 | static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, |
6826 | const OMPTargetTeamsDirective &S) { |
6827 | auto *CS = S.getCapturedStmt(RegionKind: OMPD_teams); |
6828 | Action.Enter(CGF); |
6829 | // Emit teams region as a standalone region. |
6830 | auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6831 | Action.Enter(CGF); |
6832 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
6833 | (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope); |
6834 | CGF.EmitOMPPrivateClause(D: S, PrivateScope); |
6835 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
6836 | (void)PrivateScope.Privatize(); |
6837 | if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind())) |
6838 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S); |
6839 | CGF.EmitStmt(S: CS->getCapturedStmt()); |
6840 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
6841 | }; |
6842 | emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_teams, CodeGen); |
6843 | emitPostUpdateForReductionClause(CGF, D: S, |
6844 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
6845 | } |
6846 | |
6847 | void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( |
6848 | CodeGenModule &CGM, StringRef ParentName, |
6849 | const OMPTargetTeamsDirective &S) { |
6850 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6851 | emitTargetTeamsRegion(CGF, Action, S); |
6852 | }; |
6853 | llvm::Function *Fn; |
6854 | llvm::Constant *Addr; |
6855 | // Emit target region as a standalone region. |
6856 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
6857 | D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen); |
6858 | assert(Fn && Addr && "Target device function emission failed." ); |
6859 | } |
6860 | |
6861 | void CodeGenFunction::EmitOMPTargetTeamsDirective( |
6862 | const OMPTargetTeamsDirective &S) { |
6863 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6864 | emitTargetTeamsRegion(CGF, Action, S); |
6865 | }; |
6866 | emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen); |
6867 | } |
6868 | |
6869 | static void |
6870 | emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, |
6871 | const OMPTargetTeamsDistributeDirective &S) { |
6872 | Action.Enter(CGF); |
6873 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6874 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc()); |
6875 | }; |
6876 | |
6877 | // Emit teams region as a standalone region. |
6878 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
6879 | PrePostActionTy &Action) { |
6880 | Action.Enter(CGF); |
6881 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
6882 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
6883 | (void)PrivateScope.Privatize(); |
6884 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute, |
6885 | CodeGen: CodeGenDistribute); |
6886 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
6887 | }; |
6888 | emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute, CodeGen); |
6889 | emitPostUpdateForReductionClause(CGF, D: S, |
6890 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
6891 | } |
6892 | |
6893 | void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( |
6894 | CodeGenModule &CGM, StringRef ParentName, |
6895 | const OMPTargetTeamsDistributeDirective &S) { |
6896 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6897 | emitTargetTeamsDistributeRegion(CGF, Action, S); |
6898 | }; |
6899 | llvm::Function *Fn; |
6900 | llvm::Constant *Addr; |
6901 | // Emit target region as a standalone region. |
6902 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
6903 | D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen); |
6904 | assert(Fn && Addr && "Target device function emission failed." ); |
6905 | } |
6906 | |
6907 | void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( |
6908 | const OMPTargetTeamsDistributeDirective &S) { |
6909 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6910 | emitTargetTeamsDistributeRegion(CGF, Action, S); |
6911 | }; |
6912 | emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen); |
6913 | } |
6914 | |
6915 | static void emitTargetTeamsDistributeSimdRegion( |
6916 | CodeGenFunction &CGF, PrePostActionTy &Action, |
6917 | const OMPTargetTeamsDistributeSimdDirective &S) { |
6918 | Action.Enter(CGF); |
6919 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6920 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc()); |
6921 | }; |
6922 | |
6923 | // Emit teams region as a standalone region. |
6924 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
6925 | PrePostActionTy &Action) { |
6926 | Action.Enter(CGF); |
6927 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
6928 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
6929 | (void)PrivateScope.Privatize(); |
6930 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute, |
6931 | CodeGen: CodeGenDistribute); |
6932 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
6933 | }; |
6934 | emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_simd, CodeGen); |
6935 | emitPostUpdateForReductionClause(CGF, D: S, |
6936 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
6937 | } |
6938 | |
6939 | void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( |
6940 | CodeGenModule &CGM, StringRef ParentName, |
6941 | const OMPTargetTeamsDistributeSimdDirective &S) { |
6942 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6943 | emitTargetTeamsDistributeSimdRegion(CGF, Action, S); |
6944 | }; |
6945 | llvm::Function *Fn; |
6946 | llvm::Constant *Addr; |
6947 | // Emit target region as a standalone region. |
6948 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
6949 | D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen); |
6950 | assert(Fn && Addr && "Target device function emission failed." ); |
6951 | } |
6952 | |
6953 | void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( |
6954 | const OMPTargetTeamsDistributeSimdDirective &S) { |
6955 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6956 | emitTargetTeamsDistributeSimdRegion(CGF, Action, S); |
6957 | }; |
6958 | emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen); |
6959 | } |
6960 | |
6961 | void CodeGenFunction::EmitOMPTeamsDistributeDirective( |
6962 | const OMPTeamsDistributeDirective &S) { |
6963 | |
6964 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6965 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc()); |
6966 | }; |
6967 | |
6968 | // Emit teams region as a standalone region. |
6969 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
6970 | PrePostActionTy &Action) { |
6971 | Action.Enter(CGF); |
6972 | OMPPrivateScope PrivateScope(CGF); |
6973 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
6974 | (void)PrivateScope.Privatize(); |
6975 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute, |
6976 | CodeGen: CodeGenDistribute); |
6977 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
6978 | }; |
6979 | emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute, CodeGen); |
6980 | emitPostUpdateForReductionClause(CGF&: *this, D: S, |
6981 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
6982 | } |
6983 | |
6984 | void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( |
6985 | const OMPTeamsDistributeSimdDirective &S) { |
6986 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6987 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc()); |
6988 | }; |
6989 | |
6990 | // Emit teams region as a standalone region. |
6991 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
6992 | PrePostActionTy &Action) { |
6993 | Action.Enter(CGF); |
6994 | OMPPrivateScope PrivateScope(CGF); |
6995 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
6996 | (void)PrivateScope.Privatize(); |
6997 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_simd, |
6998 | CodeGen: CodeGenDistribute); |
6999 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
7000 | }; |
7001 | emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute_simd, CodeGen); |
7002 | emitPostUpdateForReductionClause(CGF&: *this, D: S, |
7003 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
7004 | } |
7005 | |
7006 | void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( |
7007 | const OMPTeamsDistributeParallelForDirective &S) { |
7008 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7009 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined, |
7010 | IncExpr: S.getDistInc()); |
7011 | }; |
7012 | |
7013 | // Emit teams region as a standalone region. |
7014 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7015 | PrePostActionTy &Action) { |
7016 | Action.Enter(CGF); |
7017 | OMPPrivateScope PrivateScope(CGF); |
7018 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
7019 | (void)PrivateScope.Privatize(); |
7020 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute, |
7021 | CodeGen: CodeGenDistribute); |
7022 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
7023 | }; |
7024 | emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute_parallel_for, CodeGen); |
7025 | emitPostUpdateForReductionClause(CGF&: *this, D: S, |
7026 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
7027 | } |
7028 | |
7029 | void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( |
7030 | const OMPTeamsDistributeParallelForSimdDirective &S) { |
7031 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7032 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined, |
7033 | IncExpr: S.getDistInc()); |
7034 | }; |
7035 | |
7036 | // Emit teams region as a standalone region. |
7037 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7038 | PrePostActionTy &Action) { |
7039 | Action.Enter(CGF); |
7040 | OMPPrivateScope PrivateScope(CGF); |
7041 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
7042 | (void)PrivateScope.Privatize(); |
7043 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective( |
7044 | CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false); |
7045 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
7046 | }; |
7047 | emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute_parallel_for_simd, |
7048 | CodeGen); |
7049 | emitPostUpdateForReductionClause(CGF&: *this, D: S, |
7050 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
7051 | } |
7052 | |
7053 | void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) { |
7054 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
7055 | llvm::Value *Device = nullptr; |
7056 | llvm::Value *NumDependences = nullptr; |
7057 | llvm::Value *DependenceList = nullptr; |
7058 | |
7059 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
7060 | Device = EmitScalarExpr(E: C->getDevice()); |
7061 | |
7062 | // Build list and emit dependences |
7063 | OMPTaskDataTy Data; |
7064 | buildDependences(S, Data); |
7065 | if (!Data.Dependences.empty()) { |
7066 | Address DependenciesArray = Address::invalid(); |
7067 | std::tie(args&: NumDependences, args&: DependenciesArray) = |
7068 | CGM.getOpenMPRuntime().emitDependClause(CGF&: *this, Dependencies: Data.Dependences, |
7069 | Loc: S.getBeginLoc()); |
7070 | DependenceList = DependenciesArray.emitRawPointer(CGF&: *this); |
7071 | } |
7072 | Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>(); |
7073 | |
7074 | assert(!(Data.HasNowaitClause && !(S.getSingleClause<OMPInitClause>() || |
7075 | S.getSingleClause<OMPDestroyClause>() || |
7076 | S.getSingleClause<OMPUseClause>())) && |
7077 | "OMPNowaitClause clause is used separately in OMPInteropDirective." ); |
7078 | |
7079 | auto ItOMPInitClause = S.getClausesOfKind<OMPInitClause>(); |
7080 | if (!ItOMPInitClause.empty()) { |
7081 | // Look at the multiple init clauses |
7082 | for (const OMPInitClause *C : ItOMPInitClause) { |
7083 | llvm::Value *InteropvarPtr = |
7084 | EmitLValue(E: C->getInteropVar()).getPointer(CGF&: *this); |
7085 | llvm::omp::OMPInteropType InteropType = |
7086 | llvm::omp::OMPInteropType::Unknown; |
7087 | if (C->getIsTarget()) { |
7088 | InteropType = llvm::omp::OMPInteropType::Target; |
7089 | } else { |
7090 | assert(C->getIsTargetSync() && |
7091 | "Expected interop-type target/targetsync" ); |
7092 | InteropType = llvm::omp::OMPInteropType::TargetSync; |
7093 | } |
7094 | OMPBuilder.createOMPInteropInit(Loc: Builder, InteropVar: InteropvarPtr, InteropType, |
7095 | Device, NumDependences, DependenceAddress: DependenceList, |
7096 | HaveNowaitClause: Data.HasNowaitClause); |
7097 | } |
7098 | } |
7099 | auto ItOMPDestroyClause = S.getClausesOfKind<OMPDestroyClause>(); |
7100 | if (!ItOMPDestroyClause.empty()) { |
7101 | // Look at the multiple destroy clauses |
7102 | for (const OMPDestroyClause *C : ItOMPDestroyClause) { |
7103 | llvm::Value *InteropvarPtr = |
7104 | EmitLValue(E: C->getInteropVar()).getPointer(CGF&: *this); |
7105 | OMPBuilder.createOMPInteropDestroy(Loc: Builder, InteropVar: InteropvarPtr, Device, |
7106 | NumDependences, DependenceAddress: DependenceList, |
7107 | HaveNowaitClause: Data.HasNowaitClause); |
7108 | } |
7109 | } |
7110 | auto ItOMPUseClause = S.getClausesOfKind<OMPUseClause>(); |
7111 | if (!ItOMPUseClause.empty()) { |
7112 | // Look at the multiple use clauses |
7113 | for (const OMPUseClause *C : ItOMPUseClause) { |
7114 | llvm::Value *InteropvarPtr = |
7115 | EmitLValue(E: C->getInteropVar()).getPointer(CGF&: *this); |
7116 | OMPBuilder.createOMPInteropUse(Loc: Builder, InteropVar: InteropvarPtr, Device, |
7117 | NumDependences, DependenceAddress: DependenceList, |
7118 | HaveNowaitClause: Data.HasNowaitClause); |
7119 | } |
7120 | } |
7121 | } |
7122 | |
7123 | static void emitTargetTeamsDistributeParallelForRegion( |
7124 | CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, |
7125 | PrePostActionTy &Action) { |
7126 | Action.Enter(CGF); |
7127 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7128 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined, |
7129 | IncExpr: S.getDistInc()); |
7130 | }; |
7131 | |
7132 | // Emit teams region as a standalone region. |
7133 | auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7134 | PrePostActionTy &Action) { |
7135 | Action.Enter(CGF); |
7136 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
7137 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
7138 | (void)PrivateScope.Privatize(); |
7139 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective( |
7140 | CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false); |
7141 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
7142 | }; |
7143 | |
7144 | emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_parallel_for, |
7145 | CodeGen: CodeGenTeams); |
7146 | emitPostUpdateForReductionClause(CGF, D: S, |
7147 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
7148 | } |
7149 | |
7150 | void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( |
7151 | CodeGenModule &CGM, StringRef ParentName, |
7152 | const OMPTargetTeamsDistributeParallelForDirective &S) { |
7153 | // Emit SPMD target teams distribute parallel for region as a standalone |
7154 | // region. |
7155 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7156 | emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); |
7157 | }; |
7158 | llvm::Function *Fn; |
7159 | llvm::Constant *Addr; |
7160 | // Emit target region as a standalone region. |
7161 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7162 | D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen); |
7163 | assert(Fn && Addr && "Target device function emission failed." ); |
7164 | } |
7165 | |
7166 | void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( |
7167 | const OMPTargetTeamsDistributeParallelForDirective &S) { |
7168 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7169 | emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); |
7170 | }; |
7171 | emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen); |
7172 | } |
7173 | |
7174 | static void emitTargetTeamsDistributeParallelForSimdRegion( |
7175 | CodeGenFunction &CGF, |
7176 | const OMPTargetTeamsDistributeParallelForSimdDirective &S, |
7177 | PrePostActionTy &Action) { |
7178 | Action.Enter(CGF); |
7179 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7180 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined, |
7181 | IncExpr: S.getDistInc()); |
7182 | }; |
7183 | |
7184 | // Emit teams region as a standalone region. |
7185 | auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7186 | PrePostActionTy &Action) { |
7187 | Action.Enter(CGF); |
7188 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
7189 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
7190 | (void)PrivateScope.Privatize(); |
7191 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective( |
7192 | CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false); |
7193 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
7194 | }; |
7195 | |
7196 | emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_parallel_for_simd, |
7197 | CodeGen: CodeGenTeams); |
7198 | emitPostUpdateForReductionClause(CGF, D: S, |
7199 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
7200 | } |
7201 | |
7202 | void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( |
7203 | CodeGenModule &CGM, StringRef ParentName, |
7204 | const OMPTargetTeamsDistributeParallelForSimdDirective &S) { |
7205 | // Emit SPMD target teams distribute parallel for simd region as a standalone |
7206 | // region. |
7207 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7208 | emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); |
7209 | }; |
7210 | llvm::Function *Fn; |
7211 | llvm::Constant *Addr; |
7212 | // Emit target region as a standalone region. |
7213 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7214 | D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen); |
7215 | assert(Fn && Addr && "Target device function emission failed." ); |
7216 | } |
7217 | |
7218 | void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( |
7219 | const OMPTargetTeamsDistributeParallelForSimdDirective &S) { |
7220 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7221 | emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); |
7222 | }; |
7223 | emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen); |
7224 | } |
7225 | |
7226 | void CodeGenFunction::EmitOMPCancellationPointDirective( |
7227 | const OMPCancellationPointDirective &S) { |
7228 | CGM.getOpenMPRuntime().emitCancellationPointCall(CGF&: *this, Loc: S.getBeginLoc(), |
7229 | CancelRegion: S.getCancelRegion()); |
7230 | } |
7231 | |
7232 | void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { |
7233 | const Expr *IfCond = nullptr; |
7234 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
7235 | if (C->getNameModifier() == OMPD_unknown || |
7236 | C->getNameModifier() == OMPD_cancel) { |
7237 | IfCond = C->getCondition(); |
7238 | break; |
7239 | } |
7240 | } |
7241 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
7242 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
7243 | // TODO: This check is necessary as we only generate `omp parallel` through |
7244 | // the OpenMPIRBuilder for now. |
7245 | if (S.getCancelRegion() == OMPD_parallel || |
7246 | S.getCancelRegion() == OMPD_sections || |
7247 | S.getCancelRegion() == OMPD_section) { |
7248 | llvm::Value *IfCondition = nullptr; |
7249 | if (IfCond) |
7250 | IfCondition = EmitScalarExpr(E: IfCond, |
7251 | /*IgnoreResultAssign=*/true); |
7252 | return Builder.restoreIP( |
7253 | IP: OMPBuilder.createCancel(Loc: Builder, IfCondition, CanceledDirective: S.getCancelRegion())); |
7254 | } |
7255 | } |
7256 | |
7257 | CGM.getOpenMPRuntime().emitCancelCall(CGF&: *this, Loc: S.getBeginLoc(), IfCond, |
7258 | CancelRegion: S.getCancelRegion()); |
7259 | } |
7260 | |
7261 | CodeGenFunction::JumpDest |
7262 | CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { |
7263 | if (Kind == OMPD_parallel || Kind == OMPD_task || |
7264 | Kind == OMPD_target_parallel || Kind == OMPD_taskloop || |
7265 | Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop) |
7266 | return ReturnBlock; |
7267 | assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || |
7268 | Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || |
7269 | Kind == OMPD_distribute_parallel_for || |
7270 | Kind == OMPD_target_parallel_for || |
7271 | Kind == OMPD_teams_distribute_parallel_for || |
7272 | Kind == OMPD_target_teams_distribute_parallel_for); |
7273 | return OMPCancelStack.getExitBlock(); |
7274 | } |
7275 | |
7276 | void CodeGenFunction::EmitOMPUseDevicePtrClause( |
7277 | const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope, |
7278 | const llvm::DenseMap<const ValueDecl *, llvm::Value *> |
7279 | CaptureDeviceAddrMap) { |
7280 | llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; |
7281 | for (const Expr *OrigVarIt : C.varlists()) { |
7282 | const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: OrigVarIt)->getDecl()); |
7283 | if (!Processed.insert(V: OrigVD).second) |
7284 | continue; |
7285 | |
7286 | // In order to identify the right initializer we need to match the |
7287 | // declaration used by the mapping logic. In some cases we may get |
7288 | // OMPCapturedExprDecl that refers to the original declaration. |
7289 | const ValueDecl *MatchingVD = OrigVD; |
7290 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: MatchingVD)) { |
7291 | // OMPCapturedExprDecl are used to privative fields of the current |
7292 | // structure. |
7293 | const auto *ME = cast<MemberExpr>(Val: OED->getInit()); |
7294 | assert(isa<CXXThisExpr>(ME->getBase()->IgnoreImpCasts()) && |
7295 | "Base should be the current struct!" ); |
7296 | MatchingVD = ME->getMemberDecl(); |
7297 | } |
7298 | |
7299 | // If we don't have information about the current list item, move on to |
7300 | // the next one. |
7301 | auto InitAddrIt = CaptureDeviceAddrMap.find(Val: MatchingVD); |
7302 | if (InitAddrIt == CaptureDeviceAddrMap.end()) |
7303 | continue; |
7304 | |
7305 | llvm::Type *Ty = ConvertTypeForMem(T: OrigVD->getType().getNonReferenceType()); |
7306 | |
7307 | // Return the address of the private variable. |
7308 | bool IsRegistered = PrivateScope.addPrivate( |
7309 | LocalVD: OrigVD, |
7310 | Addr: Address(InitAddrIt->second, Ty, |
7311 | getContext().getTypeAlignInChars(T: getContext().VoidPtrTy))); |
7312 | assert(IsRegistered && "firstprivate var already registered as private" ); |
7313 | // Silence the warning about unused variable. |
7314 | (void)IsRegistered; |
7315 | } |
7316 | } |
7317 | |
7318 | static const VarDecl *getBaseDecl(const Expr *Ref) { |
7319 | const Expr *Base = Ref->IgnoreParenImpCasts(); |
7320 | while (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: Base)) |
7321 | Base = OASE->getBase()->IgnoreParenImpCasts(); |
7322 | while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: Base)) |
7323 | Base = ASE->getBase()->IgnoreParenImpCasts(); |
7324 | return cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Base)->getDecl()); |
7325 | } |
7326 | |
7327 | void CodeGenFunction::EmitOMPUseDeviceAddrClause( |
7328 | const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope, |
7329 | const llvm::DenseMap<const ValueDecl *, llvm::Value *> |
7330 | CaptureDeviceAddrMap) { |
7331 | llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; |
7332 | for (const Expr *Ref : C.varlists()) { |
7333 | const VarDecl *OrigVD = getBaseDecl(Ref); |
7334 | if (!Processed.insert(V: OrigVD).second) |
7335 | continue; |
7336 | // In order to identify the right initializer we need to match the |
7337 | // declaration used by the mapping logic. In some cases we may get |
7338 | // OMPCapturedExprDecl that refers to the original declaration. |
7339 | const ValueDecl *MatchingVD = OrigVD; |
7340 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: MatchingVD)) { |
7341 | // OMPCapturedExprDecl are used to privative fields of the current |
7342 | // structure. |
7343 | const auto *ME = cast<MemberExpr>(Val: OED->getInit()); |
7344 | assert(isa<CXXThisExpr>(ME->getBase()) && |
7345 | "Base should be the current struct!" ); |
7346 | MatchingVD = ME->getMemberDecl(); |
7347 | } |
7348 | |
7349 | // If we don't have information about the current list item, move on to |
7350 | // the next one. |
7351 | auto InitAddrIt = CaptureDeviceAddrMap.find(Val: MatchingVD); |
7352 | if (InitAddrIt == CaptureDeviceAddrMap.end()) |
7353 | continue; |
7354 | |
7355 | llvm::Type *Ty = ConvertTypeForMem(T: OrigVD->getType().getNonReferenceType()); |
7356 | |
7357 | Address PrivAddr = |
7358 | Address(InitAddrIt->second, Ty, |
7359 | getContext().getTypeAlignInChars(T: getContext().VoidPtrTy)); |
7360 | // For declrefs and variable length array need to load the pointer for |
7361 | // correct mapping, since the pointer to the data was passed to the runtime. |
7362 | if (isa<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts()) || |
7363 | MatchingVD->getType()->isArrayType()) { |
7364 | QualType PtrTy = getContext().getPointerType( |
7365 | T: OrigVD->getType().getNonReferenceType()); |
7366 | PrivAddr = |
7367 | EmitLoadOfPointer(Ptr: PrivAddr.withElementType(ElemTy: ConvertTypeForMem(T: PtrTy)), |
7368 | PtrTy: PtrTy->castAs<PointerType>()); |
7369 | } |
7370 | |
7371 | (void)PrivateScope.addPrivate(LocalVD: OrigVD, Addr: PrivAddr); |
7372 | } |
7373 | } |
7374 | |
7375 | // Generate the instructions for '#pragma omp target data' directive. |
7376 | void CodeGenFunction::EmitOMPTargetDataDirective( |
7377 | const OMPTargetDataDirective &S) { |
7378 | CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true, |
7379 | /*SeparateBeginEndCalls=*/true); |
7380 | |
7381 | // Create a pre/post action to signal the privatization of the device pointer. |
7382 | // This action can be replaced by the OpenMP runtime code generation to |
7383 | // deactivate privatization. |
7384 | bool PrivatizeDevicePointers = false; |
7385 | class DevicePointerPrivActionTy : public PrePostActionTy { |
7386 | bool &PrivatizeDevicePointers; |
7387 | |
7388 | public: |
7389 | explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) |
7390 | : PrivatizeDevicePointers(PrivatizeDevicePointers) {} |
7391 | void Enter(CodeGenFunction &CGF) override { |
7392 | PrivatizeDevicePointers = true; |
7393 | } |
7394 | }; |
7395 | DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); |
7396 | |
7397 | auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7398 | auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7399 | CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt()); |
7400 | }; |
7401 | |
7402 | // Codegen that selects whether to generate the privatization code or not. |
7403 | auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7404 | RegionCodeGenTy RCG(InnermostCodeGen); |
7405 | PrivatizeDevicePointers = false; |
7406 | |
7407 | // Call the pre-action to change the status of PrivatizeDevicePointers if |
7408 | // needed. |
7409 | Action.Enter(CGF); |
7410 | |
7411 | if (PrivatizeDevicePointers) { |
7412 | OMPPrivateScope PrivateScope(CGF); |
7413 | // Emit all instances of the use_device_ptr clause. |
7414 | for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) |
7415 | CGF.EmitOMPUseDevicePtrClause(C: *C, PrivateScope, |
7416 | CaptureDeviceAddrMap: Info.CaptureDeviceAddrMap); |
7417 | for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) |
7418 | CGF.EmitOMPUseDeviceAddrClause(C: *C, PrivateScope, |
7419 | CaptureDeviceAddrMap: Info.CaptureDeviceAddrMap); |
7420 | (void)PrivateScope.Privatize(); |
7421 | RCG(CGF); |
7422 | } else { |
7423 | // If we don't have target devices, don't bother emitting the data |
7424 | // mapping code. |
7425 | std::optional<OpenMPDirectiveKind> CaptureRegion; |
7426 | if (CGM.getLangOpts().OMPTargetTriples.empty()) { |
7427 | // Emit helper decls of the use_device_ptr/use_device_addr clauses. |
7428 | for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) |
7429 | for (const Expr *E : C->varlists()) { |
7430 | const Decl *D = cast<DeclRefExpr>(Val: E)->getDecl(); |
7431 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D)) |
7432 | CGF.EmitVarDecl(D: *OED); |
7433 | } |
7434 | for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) |
7435 | for (const Expr *E : C->varlists()) { |
7436 | const Decl *D = getBaseDecl(Ref: E); |
7437 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D)) |
7438 | CGF.EmitVarDecl(D: *OED); |
7439 | } |
7440 | } else { |
7441 | CaptureRegion = OMPD_unknown; |
7442 | } |
7443 | |
7444 | OMPLexicalScope Scope(CGF, S, CaptureRegion); |
7445 | RCG(CGF); |
7446 | } |
7447 | }; |
7448 | |
7449 | // Forward the provided action to the privatization codegen. |
7450 | RegionCodeGenTy PrivRCG(PrivCodeGen); |
7451 | PrivRCG.setAction(Action); |
7452 | |
7453 | // Notwithstanding the body of the region is emitted as inlined directive, |
7454 | // we don't use an inline scope as changes in the references inside the |
7455 | // region are expected to be visible outside, so we do not privative them. |
7456 | OMPLexicalScope Scope(CGF, S); |
7457 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_target_data, |
7458 | CodeGen: PrivRCG); |
7459 | }; |
7460 | |
7461 | RegionCodeGenTy RCG(CodeGen); |
7462 | |
7463 | // If we don't have target devices, don't bother emitting the data mapping |
7464 | // code. |
7465 | if (CGM.getLangOpts().OMPTargetTriples.empty()) { |
7466 | RCG(*this); |
7467 | return; |
7468 | } |
7469 | |
7470 | // Check if we have any if clause associated with the directive. |
7471 | const Expr *IfCond = nullptr; |
7472 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
7473 | IfCond = C->getCondition(); |
7474 | |
7475 | // Check if we have any device clause associated with the directive. |
7476 | const Expr *Device = nullptr; |
7477 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
7478 | Device = C->getDevice(); |
7479 | |
7480 | // Set the action to signal privatization of device pointers. |
7481 | RCG.setAction(PrivAction); |
7482 | |
7483 | // Emit region code. |
7484 | CGM.getOpenMPRuntime().emitTargetDataCalls(CGF&: *this, D: S, IfCond, Device, CodeGen: RCG, |
7485 | Info); |
7486 | } |
7487 | |
7488 | void CodeGenFunction::EmitOMPTargetEnterDataDirective( |
7489 | const OMPTargetEnterDataDirective &S) { |
7490 | // If we don't have target devices, don't bother emitting the data mapping |
7491 | // code. |
7492 | if (CGM.getLangOpts().OMPTargetTriples.empty()) |
7493 | return; |
7494 | |
7495 | // Check if we have any if clause associated with the directive. |
7496 | const Expr *IfCond = nullptr; |
7497 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
7498 | IfCond = C->getCondition(); |
7499 | |
7500 | // Check if we have any device clause associated with the directive. |
7501 | const Expr *Device = nullptr; |
7502 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
7503 | Device = C->getDevice(); |
7504 | |
7505 | OMPLexicalScope Scope(*this, S, OMPD_task); |
7506 | CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF&: *this, D: S, IfCond, Device); |
7507 | } |
7508 | |
7509 | void CodeGenFunction::EmitOMPTargetExitDataDirective( |
7510 | const OMPTargetExitDataDirective &S) { |
7511 | // If we don't have target devices, don't bother emitting the data mapping |
7512 | // code. |
7513 | if (CGM.getLangOpts().OMPTargetTriples.empty()) |
7514 | return; |
7515 | |
7516 | // Check if we have any if clause associated with the directive. |
7517 | const Expr *IfCond = nullptr; |
7518 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
7519 | IfCond = C->getCondition(); |
7520 | |
7521 | // Check if we have any device clause associated with the directive. |
7522 | const Expr *Device = nullptr; |
7523 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
7524 | Device = C->getDevice(); |
7525 | |
7526 | OMPLexicalScope Scope(*this, S, OMPD_task); |
7527 | CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF&: *this, D: S, IfCond, Device); |
7528 | } |
7529 | |
7530 | static void emitTargetParallelRegion(CodeGenFunction &CGF, |
7531 | const OMPTargetParallelDirective &S, |
7532 | PrePostActionTy &Action) { |
7533 | // Get the captured statement associated with the 'parallel' region. |
7534 | const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_parallel); |
7535 | Action.Enter(CGF); |
7536 | auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7537 | Action.Enter(CGF); |
7538 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
7539 | (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope); |
7540 | CGF.EmitOMPPrivateClause(D: S, PrivateScope); |
7541 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
7542 | (void)PrivateScope.Privatize(); |
7543 | if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind())) |
7544 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S); |
7545 | // TODO: Add support for clauses. |
7546 | CGF.EmitStmt(S: CS->getCapturedStmt()); |
7547 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel); |
7548 | }; |
7549 | emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_parallel, CodeGen, |
7550 | CodeGenBoundParameters: emitEmptyBoundParameters); |
7551 | emitPostUpdateForReductionClause(CGF, D: S, |
7552 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
7553 | } |
7554 | |
7555 | void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( |
7556 | CodeGenModule &CGM, StringRef ParentName, |
7557 | const OMPTargetParallelDirective &S) { |
7558 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7559 | emitTargetParallelRegion(CGF, S, Action); |
7560 | }; |
7561 | llvm::Function *Fn; |
7562 | llvm::Constant *Addr; |
7563 | // Emit target region as a standalone region. |
7564 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7565 | D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen); |
7566 | assert(Fn && Addr && "Target device function emission failed." ); |
7567 | } |
7568 | |
7569 | void CodeGenFunction::EmitOMPTargetParallelDirective( |
7570 | const OMPTargetParallelDirective &S) { |
7571 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7572 | emitTargetParallelRegion(CGF, S, Action); |
7573 | }; |
7574 | emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen); |
7575 | } |
7576 | |
7577 | static void emitTargetParallelForRegion(CodeGenFunction &CGF, |
7578 | const OMPTargetParallelForDirective &S, |
7579 | PrePostActionTy &Action) { |
7580 | Action.Enter(CGF); |
7581 | // Emit directive as a combined directive that consists of two implicit |
7582 | // directives: 'parallel' with 'for' directive. |
7583 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7584 | Action.Enter(CGF); |
7585 | CodeGenFunction::OMPCancelStackRAII CancelRegion( |
7586 | CGF, OMPD_target_parallel_for, S.hasCancel()); |
7587 | CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds, |
7588 | CGDispatchBounds: emitDispatchForLoopBounds); |
7589 | }; |
7590 | emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_for, CodeGen, |
7591 | CodeGenBoundParameters: emitEmptyBoundParameters); |
7592 | } |
7593 | |
7594 | void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( |
7595 | CodeGenModule &CGM, StringRef ParentName, |
7596 | const OMPTargetParallelForDirective &S) { |
7597 | // Emit SPMD target parallel for region as a standalone region. |
7598 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7599 | emitTargetParallelForRegion(CGF, S, Action); |
7600 | }; |
7601 | llvm::Function *Fn; |
7602 | llvm::Constant *Addr; |
7603 | // Emit target region as a standalone region. |
7604 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7605 | D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen); |
7606 | assert(Fn && Addr && "Target device function emission failed." ); |
7607 | } |
7608 | |
7609 | void CodeGenFunction::EmitOMPTargetParallelForDirective( |
7610 | const OMPTargetParallelForDirective &S) { |
7611 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7612 | emitTargetParallelForRegion(CGF, S, Action); |
7613 | }; |
7614 | emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen); |
7615 | } |
7616 | |
7617 | static void |
7618 | emitTargetParallelForSimdRegion(CodeGenFunction &CGF, |
7619 | const OMPTargetParallelForSimdDirective &S, |
7620 | PrePostActionTy &Action) { |
7621 | Action.Enter(CGF); |
7622 | // Emit directive as a combined directive that consists of two implicit |
7623 | // directives: 'parallel' with 'for' directive. |
7624 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7625 | Action.Enter(CGF); |
7626 | CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds, |
7627 | CGDispatchBounds: emitDispatchForLoopBounds); |
7628 | }; |
7629 | emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_simd, CodeGen, |
7630 | CodeGenBoundParameters: emitEmptyBoundParameters); |
7631 | } |
7632 | |
7633 | void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( |
7634 | CodeGenModule &CGM, StringRef ParentName, |
7635 | const OMPTargetParallelForSimdDirective &S) { |
7636 | // Emit SPMD target parallel for region as a standalone region. |
7637 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7638 | emitTargetParallelForSimdRegion(CGF, S, Action); |
7639 | }; |
7640 | llvm::Function *Fn; |
7641 | llvm::Constant *Addr; |
7642 | // Emit target region as a standalone region. |
7643 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7644 | D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen); |
7645 | assert(Fn && Addr && "Target device function emission failed." ); |
7646 | } |
7647 | |
7648 | void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( |
7649 | const OMPTargetParallelForSimdDirective &S) { |
7650 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7651 | emitTargetParallelForSimdRegion(CGF, S, Action); |
7652 | }; |
7653 | emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen); |
7654 | } |
7655 | |
7656 | /// Emit a helper variable and return corresponding lvalue. |
7657 | static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, |
7658 | const ImplicitParamDecl *PVD, |
7659 | CodeGenFunction::OMPPrivateScope &Privates) { |
7660 | const auto *VDecl = cast<VarDecl>(Val: Helper->getDecl()); |
7661 | Privates.addPrivate(LocalVD: VDecl, Addr: CGF.GetAddrOfLocalVar(VD: PVD)); |
7662 | } |
7663 | |
7664 | void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { |
7665 | assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); |
7666 | // Emit outlined function for task construct. |
7667 | const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_taskloop); |
7668 | Address CapturedStruct = Address::invalid(); |
7669 | { |
7670 | OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); |
7671 | CapturedStruct = GenerateCapturedStmtArgument(S: *CS); |
7672 | } |
7673 | QualType SharedsTy = getContext().getRecordType(Decl: CS->getCapturedRecordDecl()); |
7674 | const Expr *IfCond = nullptr; |
7675 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
7676 | if (C->getNameModifier() == OMPD_unknown || |
7677 | C->getNameModifier() == OMPD_taskloop) { |
7678 | IfCond = C->getCondition(); |
7679 | break; |
7680 | } |
7681 | } |
7682 | |
7683 | OMPTaskDataTy Data; |
7684 | // Check if taskloop must be emitted without taskgroup. |
7685 | Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); |
7686 | // TODO: Check if we should emit tied or untied task. |
7687 | Data.Tied = true; |
7688 | // Set scheduling for taskloop |
7689 | if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) { |
7690 | // grainsize clause |
7691 | Data.Schedule.setInt(/*IntVal=*/false); |
7692 | Data.Schedule.setPointer(EmitScalarExpr(E: Clause->getGrainsize())); |
7693 | } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) { |
7694 | // num_tasks clause |
7695 | Data.Schedule.setInt(/*IntVal=*/true); |
7696 | Data.Schedule.setPointer(EmitScalarExpr(E: Clause->getNumTasks())); |
7697 | } |
7698 | |
7699 | auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { |
7700 | // if (PreCond) { |
7701 | // for (IV in 0..LastIteration) BODY; |
7702 | // <Final counter/linear vars updates>; |
7703 | // } |
7704 | // |
7705 | |
7706 | // Emit: if (PreCond) - begin. |
7707 | // If the condition constant folds and can be elided, avoid emitting the |
7708 | // whole loop. |
7709 | bool CondConstant; |
7710 | llvm::BasicBlock *ContBlock = nullptr; |
7711 | OMPLoopScope PreInitScope(CGF, S); |
7712 | if (CGF.ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) { |
7713 | if (!CondConstant) |
7714 | return; |
7715 | } else { |
7716 | llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "taskloop.if.then" ); |
7717 | ContBlock = CGF.createBasicBlock(name: "taskloop.if.end" ); |
7718 | emitPreCond(CGF, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock, |
7719 | TrueCount: CGF.getProfileCount(S: &S)); |
7720 | CGF.EmitBlock(BB: ThenBlock); |
7721 | CGF.incrementProfileCounter(S: &S); |
7722 | } |
7723 | |
7724 | (void)CGF.EmitOMPLinearClauseInit(D: S); |
7725 | |
7726 | OMPPrivateScope LoopScope(CGF); |
7727 | // Emit helper vars inits. |
7728 | enum { LowerBound = 5, UpperBound, Stride, LastIter }; |
7729 | auto *I = CS->getCapturedDecl()->param_begin(); |
7730 | auto *LBP = std::next(x: I, n: LowerBound); |
7731 | auto *UBP = std::next(x: I, n: UpperBound); |
7732 | auto *STP = std::next(x: I, n: Stride); |
7733 | auto *LIP = std::next(x: I, n: LastIter); |
7734 | mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getLowerBoundVariable()), PVD: *LBP, |
7735 | Privates&: LoopScope); |
7736 | mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getUpperBoundVariable()), PVD: *UBP, |
7737 | Privates&: LoopScope); |
7738 | mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable()), PVD: *STP, Privates&: LoopScope); |
7739 | mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable()), PVD: *LIP, |
7740 | Privates&: LoopScope); |
7741 | CGF.EmitOMPPrivateLoopCounters(S, LoopScope); |
7742 | CGF.EmitOMPLinearClause(D: S, PrivateScope&: LoopScope); |
7743 | bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope); |
7744 | (void)LoopScope.Privatize(); |
7745 | // Emit the loop iteration variable. |
7746 | const Expr *IVExpr = S.getIterationVariable(); |
7747 | const auto *IVDecl = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IVExpr)->getDecl()); |
7748 | CGF.EmitVarDecl(D: *IVDecl); |
7749 | CGF.EmitIgnoredExpr(E: S.getInit()); |
7750 | |
7751 | // Emit the iterations count variable. |
7752 | // If it is not a variable, Sema decided to calculate iterations count on |
7753 | // each iteration (e.g., it is foldable into a constant). |
7754 | if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) { |
7755 | CGF.EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl())); |
7756 | // Emit calculation of the iterations count. |
7757 | CGF.EmitIgnoredExpr(E: S.getCalcLastIteration()); |
7758 | } |
7759 | |
7760 | { |
7761 | OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); |
7762 | emitCommonSimdLoop( |
7763 | CGF, S, |
7764 | SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7765 | if (isOpenMPSimdDirective(DKind: S.getDirectiveKind())) |
7766 | CGF.EmitOMPSimdInit(D: S); |
7767 | }, |
7768 | BodyCodeGen: [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
7769 | CGF.EmitOMPInnerLoop( |
7770 | S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: S.getCond(), IncExpr: S.getInc(), |
7771 | BodyGen: [&S](CodeGenFunction &CGF) { |
7772 | emitOMPLoopBodyWithStopPoint(CGF, S, |
7773 | LoopExit: CodeGenFunction::JumpDest()); |
7774 | }, |
7775 | PostIncGen: [](CodeGenFunction &) {}); |
7776 | }); |
7777 | } |
7778 | // Emit: if (PreCond) - end. |
7779 | if (ContBlock) { |
7780 | CGF.EmitBranch(Block: ContBlock); |
7781 | CGF.EmitBlock(BB: ContBlock, IsFinished: true); |
7782 | } |
7783 | // Emit final copy of the lastprivate variables if IsLastIter != 0. |
7784 | if (HasLastprivateClause) { |
7785 | CGF.EmitOMPLastprivateClauseFinal( |
7786 | D: S, NoFinals: isOpenMPSimdDirective(DKind: S.getDirectiveKind()), |
7787 | IsLastIterCond: CGF.Builder.CreateIsNotNull(Arg: CGF.EmitLoadOfScalar( |
7788 | Addr: CGF.GetAddrOfLocalVar(VD: *LIP), /*Volatile=*/false, |
7789 | Ty: (*LIP)->getType(), Loc: S.getBeginLoc()))); |
7790 | } |
7791 | LoopScope.restoreMap(); |
7792 | CGF.EmitOMPLinearClauseFinal(D: S, CondGen: [LIP, &S](CodeGenFunction &CGF) { |
7793 | return CGF.Builder.CreateIsNotNull( |
7794 | Arg: CGF.EmitLoadOfScalar(Addr: CGF.GetAddrOfLocalVar(VD: *LIP), /*Volatile=*/false, |
7795 | Ty: (*LIP)->getType(), Loc: S.getBeginLoc())); |
7796 | }); |
7797 | }; |
7798 | auto &&TaskGen = [&S, SharedsTy, CapturedStruct, |
7799 | IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, |
7800 | const OMPTaskDataTy &Data) { |
7801 | auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, |
7802 | &Data](CodeGenFunction &CGF, PrePostActionTy &) { |
7803 | OMPLoopScope PreInitScope(CGF, S); |
7804 | CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, Loc: S.getBeginLoc(), D: S, |
7805 | TaskFunction: OutlinedFn, SharedsTy, |
7806 | Shareds: CapturedStruct, IfCond, Data); |
7807 | }; |
7808 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_taskloop, |
7809 | CodeGen); |
7810 | }; |
7811 | if (Data.Nogroup) { |
7812 | EmitOMPTaskBasedDirective(S, CapturedRegion: OMPD_taskloop, BodyGen, TaskGen, Data); |
7813 | } else { |
7814 | CGM.getOpenMPRuntime().emitTaskgroupRegion( |
7815 | CGF&: *this, |
7816 | TaskgroupOpGen: [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, |
7817 | PrePostActionTy &Action) { |
7818 | Action.Enter(CGF); |
7819 | CGF.EmitOMPTaskBasedDirective(S, CapturedRegion: OMPD_taskloop, BodyGen, TaskGen, |
7820 | Data); |
7821 | }, |
7822 | Loc: S.getBeginLoc()); |
7823 | } |
7824 | } |
7825 | |
7826 | void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { |
7827 | auto LPCRegion = |
7828 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
7829 | EmitOMPTaskLoopBasedDirective(S); |
7830 | } |
7831 | |
7832 | void CodeGenFunction::EmitOMPTaskLoopSimdDirective( |
7833 | const OMPTaskLoopSimdDirective &S) { |
7834 | auto LPCRegion = |
7835 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
7836 | OMPLexicalScope Scope(*this, S); |
7837 | EmitOMPTaskLoopBasedDirective(S); |
7838 | } |
7839 | |
7840 | void CodeGenFunction::EmitOMPMasterTaskLoopDirective( |
7841 | const OMPMasterTaskLoopDirective &S) { |
7842 | auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7843 | Action.Enter(CGF); |
7844 | EmitOMPTaskLoopBasedDirective(S); |
7845 | }; |
7846 | auto LPCRegion = |
7847 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
7848 | OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false); |
7849 | CGM.getOpenMPRuntime().emitMasterRegion(CGF&: *this, MasterOpGen: CodeGen, Loc: S.getBeginLoc()); |
7850 | } |
7851 | |
7852 | void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective( |
7853 | const OMPMasterTaskLoopSimdDirective &S) { |
7854 | auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7855 | Action.Enter(CGF); |
7856 | EmitOMPTaskLoopBasedDirective(S); |
7857 | }; |
7858 | auto LPCRegion = |
7859 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
7860 | OMPLexicalScope Scope(*this, S); |
7861 | CGM.getOpenMPRuntime().emitMasterRegion(CGF&: *this, MasterOpGen: CodeGen, Loc: S.getBeginLoc()); |
7862 | } |
7863 | |
7864 | void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective( |
7865 | const OMPParallelMasterTaskLoopDirective &S) { |
7866 | auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7867 | auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, |
7868 | PrePostActionTy &Action) { |
7869 | Action.Enter(CGF); |
7870 | CGF.EmitOMPTaskLoopBasedDirective(S); |
7871 | }; |
7872 | OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); |
7873 | CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: TaskLoopCodeGen, |
7874 | Loc: S.getBeginLoc()); |
7875 | }; |
7876 | auto LPCRegion = |
7877 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
7878 | emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_master_taskloop, CodeGen, |
7879 | CodeGenBoundParameters: emitEmptyBoundParameters); |
7880 | } |
7881 | |
7882 | void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective( |
7883 | const OMPParallelMasterTaskLoopSimdDirective &S) { |
7884 | auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7885 | auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, |
7886 | PrePostActionTy &Action) { |
7887 | Action.Enter(CGF); |
7888 | CGF.EmitOMPTaskLoopBasedDirective(S); |
7889 | }; |
7890 | OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); |
7891 | CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: TaskLoopCodeGen, |
7892 | Loc: S.getBeginLoc()); |
7893 | }; |
7894 | auto LPCRegion = |
7895 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
7896 | emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_master_taskloop_simd, CodeGen, |
7897 | CodeGenBoundParameters: emitEmptyBoundParameters); |
7898 | } |
7899 | |
7900 | // Generate the instructions for '#pragma omp target update' directive. |
7901 | void CodeGenFunction::EmitOMPTargetUpdateDirective( |
7902 | const OMPTargetUpdateDirective &S) { |
7903 | // If we don't have target devices, don't bother emitting the data mapping |
7904 | // code. |
7905 | if (CGM.getLangOpts().OMPTargetTriples.empty()) |
7906 | return; |
7907 | |
7908 | // Check if we have any if clause associated with the directive. |
7909 | const Expr *IfCond = nullptr; |
7910 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
7911 | IfCond = C->getCondition(); |
7912 | |
7913 | // Check if we have any device clause associated with the directive. |
7914 | const Expr *Device = nullptr; |
7915 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
7916 | Device = C->getDevice(); |
7917 | |
7918 | OMPLexicalScope Scope(*this, S, OMPD_task); |
7919 | CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF&: *this, D: S, IfCond, Device); |
7920 | } |
7921 | |
7922 | void CodeGenFunction::EmitOMPGenericLoopDirective( |
7923 | const OMPGenericLoopDirective &S) { |
7924 | // Unimplemented, just inline the underlying statement for now. |
7925 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7926 | // Emit the loop iteration variable. |
7927 | const Stmt *CS = |
7928 | cast<CapturedStmt>(Val: S.getAssociatedStmt())->getCapturedStmt(); |
7929 | const auto *ForS = dyn_cast<ForStmt>(Val: CS); |
7930 | if (ForS && !isa<DeclStmt>(Val: ForS->getInit())) { |
7931 | OMPPrivateScope LoopScope(CGF); |
7932 | CGF.EmitOMPPrivateLoopCounters(S, LoopScope); |
7933 | (void)LoopScope.Privatize(); |
7934 | CGF.EmitStmt(S: CS); |
7935 | LoopScope.restoreMap(); |
7936 | } else { |
7937 | CGF.EmitStmt(S: CS); |
7938 | } |
7939 | }; |
7940 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
7941 | CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_loop, CodeGen); |
7942 | } |
7943 | |
7944 | void CodeGenFunction::EmitOMPParallelGenericLoopDirective( |
7945 | const OMPLoopDirective &S) { |
7946 | // Emit combined directive as if its constituent constructs are 'parallel' |
7947 | // and 'for'. |
7948 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7949 | Action.Enter(CGF); |
7950 | emitOMPCopyinClause(CGF, S); |
7951 | (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); |
7952 | }; |
7953 | { |
7954 | auto LPCRegion = |
7955 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S); |
7956 | emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_for, CodeGen, |
7957 | CodeGenBoundParameters: emitEmptyBoundParameters); |
7958 | } |
7959 | // Check for outer lastprivate conditional update. |
7960 | checkForLastprivateConditionalUpdate(CGF&: *this, S); |
7961 | } |
7962 | |
7963 | void CodeGenFunction::EmitOMPTeamsGenericLoopDirective( |
7964 | const OMPTeamsGenericLoopDirective &S) { |
7965 | // To be consistent with current behavior of 'target teams loop', emit |
7966 | // 'teams loop' as if its constituent constructs are 'teams' and 'distribute'. |
7967 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7968 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc()); |
7969 | }; |
7970 | |
7971 | // Emit teams region as a standalone region. |
7972 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7973 | PrePostActionTy &Action) { |
7974 | Action.Enter(CGF); |
7975 | OMPPrivateScope PrivateScope(CGF); |
7976 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
7977 | (void)PrivateScope.Privatize(); |
7978 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute, |
7979 | CodeGen: CodeGenDistribute); |
7980 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
7981 | }; |
7982 | emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute, CodeGen); |
7983 | emitPostUpdateForReductionClause(CGF&: *this, D: S, |
7984 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
7985 | } |
7986 | |
7987 | #ifndef NDEBUG |
7988 | static void emitTargetTeamsLoopCodegenStatus(CodeGenFunction &CGF, |
7989 | std::string StatusMsg, |
7990 | const OMPExecutableDirective &D) { |
7991 | bool IsDevice = CGF.CGM.getLangOpts().OpenMPIsTargetDevice; |
7992 | if (IsDevice) |
7993 | StatusMsg += ": DEVICE" ; |
7994 | else |
7995 | StatusMsg += ": HOST" ; |
7996 | SourceLocation L = D.getBeginLoc(); |
7997 | auto &SM = CGF.getContext().getSourceManager(); |
7998 | PresumedLoc PLoc = SM.getPresumedLoc(L); |
7999 | const char *FileName = PLoc.isValid() ? PLoc.getFilename() : nullptr; |
8000 | unsigned LineNo = |
8001 | PLoc.isValid() ? PLoc.getLine() : SM.getExpansionLineNumber(L); |
8002 | llvm::dbgs() << StatusMsg << ": " << FileName << ": " << LineNo << "\n" ; |
8003 | } |
8004 | #endif |
8005 | |
8006 | static void emitTargetTeamsGenericLoopRegionAsParallel( |
8007 | CodeGenFunction &CGF, PrePostActionTy &Action, |
8008 | const OMPTargetTeamsGenericLoopDirective &S) { |
8009 | Action.Enter(CGF); |
8010 | // Emit 'teams loop' as if its constituent constructs are 'distribute, |
8011 | // 'parallel, and 'for'. |
8012 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
8013 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined, |
8014 | IncExpr: S.getDistInc()); |
8015 | }; |
8016 | |
8017 | // Emit teams region as a standalone region. |
8018 | auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
8019 | PrePostActionTy &Action) { |
8020 | Action.Enter(CGF); |
8021 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
8022 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
8023 | (void)PrivateScope.Privatize(); |
8024 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective( |
8025 | CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false); |
8026 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
8027 | }; |
8028 | DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE, |
8029 | emitTargetTeamsLoopCodegenStatus( |
8030 | CGF, TTL_CODEGEN_TYPE " as parallel for" , S)); |
8031 | emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_parallel_for, |
8032 | CodeGen: CodeGenTeams); |
8033 | emitPostUpdateForReductionClause(CGF, D: S, |
8034 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
8035 | } |
8036 | |
8037 | static void emitTargetTeamsGenericLoopRegionAsDistribute( |
8038 | CodeGenFunction &CGF, PrePostActionTy &Action, |
8039 | const OMPTargetTeamsGenericLoopDirective &S) { |
8040 | Action.Enter(CGF); |
8041 | // Emit 'teams loop' as if its constituent construct is 'distribute'. |
8042 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
8043 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc()); |
8044 | }; |
8045 | |
8046 | // Emit teams region as a standalone region. |
8047 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
8048 | PrePostActionTy &Action) { |
8049 | Action.Enter(CGF); |
8050 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
8051 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope); |
8052 | (void)PrivateScope.Privatize(); |
8053 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective( |
8054 | CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false); |
8055 | CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams); |
8056 | }; |
8057 | DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE, |
8058 | emitTargetTeamsLoopCodegenStatus( |
8059 | CGF, TTL_CODEGEN_TYPE " as distribute" , S)); |
8060 | emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute, CodeGen); |
8061 | emitPostUpdateForReductionClause(CGF, D: S, |
8062 | CondGen: [](CodeGenFunction &) { return nullptr; }); |
8063 | } |
8064 | |
8065 | void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective( |
8066 | const OMPTargetTeamsGenericLoopDirective &S) { |
8067 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8068 | if (S.canBeParallelFor()) |
8069 | emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S); |
8070 | else |
8071 | emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S); |
8072 | }; |
8073 | emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen); |
8074 | } |
8075 | |
8076 | void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( |
8077 | CodeGenModule &CGM, StringRef ParentName, |
8078 | const OMPTargetTeamsGenericLoopDirective &S) { |
8079 | // Emit SPMD target parallel loop region as a standalone region. |
8080 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8081 | if (S.canBeParallelFor()) |
8082 | emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S); |
8083 | else |
8084 | emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S); |
8085 | }; |
8086 | llvm::Function *Fn; |
8087 | llvm::Constant *Addr; |
8088 | // Emit target region as a standalone region. |
8089 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
8090 | D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen); |
8091 | assert(Fn && Addr && |
8092 | "Target device function emission failed for 'target teams loop'." ); |
8093 | } |
8094 | |
8095 | static void emitTargetParallelGenericLoopRegion( |
8096 | CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S, |
8097 | PrePostActionTy &Action) { |
8098 | Action.Enter(CGF); |
8099 | // Emit as 'parallel for'. |
8100 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8101 | Action.Enter(CGF); |
8102 | CodeGenFunction::OMPCancelStackRAII CancelRegion( |
8103 | CGF, OMPD_target_parallel_loop, /*hasCancel=*/false); |
8104 | CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds, |
8105 | CGDispatchBounds: emitDispatchForLoopBounds); |
8106 | }; |
8107 | emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_for, CodeGen, |
8108 | CodeGenBoundParameters: emitEmptyBoundParameters); |
8109 | } |
8110 | |
8111 | void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction( |
8112 | CodeGenModule &CGM, StringRef ParentName, |
8113 | const OMPTargetParallelGenericLoopDirective &S) { |
8114 | // Emit target parallel loop region as a standalone region. |
8115 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8116 | emitTargetParallelGenericLoopRegion(CGF, S, Action); |
8117 | }; |
8118 | llvm::Function *Fn; |
8119 | llvm::Constant *Addr; |
8120 | // Emit target region as a standalone region. |
8121 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
8122 | D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen); |
8123 | assert(Fn && Addr && "Target device function emission failed." ); |
8124 | } |
8125 | |
8126 | /// Emit combined directive 'target parallel loop' as if its constituent |
8127 | /// constructs are 'target', 'parallel', and 'for'. |
8128 | void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective( |
8129 | const OMPTargetParallelGenericLoopDirective &S) { |
8130 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8131 | emitTargetParallelGenericLoopRegion(CGF, S, Action); |
8132 | }; |
8133 | emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen); |
8134 | } |
8135 | |
8136 | void CodeGenFunction::EmitSimpleOMPExecutableDirective( |
8137 | const OMPExecutableDirective &D) { |
8138 | if (const auto *SD = dyn_cast<OMPScanDirective>(Val: &D)) { |
8139 | EmitOMPScanDirective(S: *SD); |
8140 | return; |
8141 | } |
8142 | if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) |
8143 | return; |
8144 | auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8145 | OMPPrivateScope GlobalsScope(CGF); |
8146 | if (isOpenMPTaskingDirective(Kind: D.getDirectiveKind())) { |
8147 | // Capture global firstprivates to avoid crash. |
8148 | for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { |
8149 | for (const Expr *Ref : C->varlists()) { |
8150 | const auto *DRE = cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts()); |
8151 | if (!DRE) |
8152 | continue; |
8153 | const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl()); |
8154 | if (!VD || VD->hasLocalStorage()) |
8155 | continue; |
8156 | if (!CGF.LocalDeclMap.count(Val: VD)) { |
8157 | LValue GlobLVal = CGF.EmitLValue(E: Ref); |
8158 | GlobalsScope.addPrivate(LocalVD: VD, Addr: GlobLVal.getAddress()); |
8159 | } |
8160 | } |
8161 | } |
8162 | } |
8163 | if (isOpenMPSimdDirective(DKind: D.getDirectiveKind())) { |
8164 | (void)GlobalsScope.Privatize(); |
8165 | ParentLoopDirectiveForScanRegion ScanRegion(CGF, D); |
8166 | emitOMPSimdRegion(CGF, S: cast<OMPLoopDirective>(Val: D), Action); |
8167 | } else { |
8168 | if (const auto *LD = dyn_cast<OMPLoopDirective>(Val: &D)) { |
8169 | for (const Expr *E : LD->counters()) { |
8170 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
8171 | if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(Val: VD)) { |
8172 | LValue GlobLVal = CGF.EmitLValue(E); |
8173 | GlobalsScope.addPrivate(LocalVD: VD, Addr: GlobLVal.getAddress()); |
8174 | } |
8175 | if (isa<OMPCapturedExprDecl>(Val: VD)) { |
8176 | // Emit only those that were not explicitly referenced in clauses. |
8177 | if (!CGF.LocalDeclMap.count(Val: VD)) |
8178 | CGF.EmitVarDecl(D: *VD); |
8179 | } |
8180 | } |
8181 | for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) { |
8182 | if (!C->getNumForLoops()) |
8183 | continue; |
8184 | for (unsigned I = LD->getLoopsNumber(), |
8185 | E = C->getLoopNumIterations().size(); |
8186 | I < E; ++I) { |
8187 | if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( |
8188 | Val: cast<DeclRefExpr>(Val: C->getLoopCounter(NumLoop: I))->getDecl())) { |
8189 | // Emit only those that were not explicitly referenced in clauses. |
8190 | if (!CGF.LocalDeclMap.count(Val: VD)) |
8191 | CGF.EmitVarDecl(D: *VD); |
8192 | } |
8193 | } |
8194 | } |
8195 | } |
8196 | (void)GlobalsScope.Privatize(); |
8197 | CGF.EmitStmt(S: D.getInnermostCapturedStmt()->getCapturedStmt()); |
8198 | } |
8199 | }; |
8200 | if (D.getDirectiveKind() == OMPD_atomic || |
8201 | D.getDirectiveKind() == OMPD_critical || |
8202 | D.getDirectiveKind() == OMPD_section || |
8203 | D.getDirectiveKind() == OMPD_master || |
8204 | D.getDirectiveKind() == OMPD_masked || |
8205 | D.getDirectiveKind() == OMPD_unroll) { |
8206 | EmitStmt(S: D.getAssociatedStmt()); |
8207 | } else { |
8208 | auto LPCRegion = |
8209 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S: D); |
8210 | OMPSimdLexicalScope Scope(*this, D); |
8211 | CGM.getOpenMPRuntime().emitInlinedDirective( |
8212 | CGF&: *this, |
8213 | InnermostKind: isOpenMPSimdDirective(DKind: D.getDirectiveKind()) ? OMPD_simd |
8214 | : D.getDirectiveKind(), |
8215 | CodeGen); |
8216 | } |
8217 | // Check for outer lastprivate conditional update. |
8218 | checkForLastprivateConditionalUpdate(CGF&: *this, S: D); |
8219 | } |
8220 | |