1 | //===- PlaceSafepoints.cpp - Place GC Safepoints --------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Place garbage collection safepoints at appropriate locations in the IR. This |
10 | // does not make relocation semantics or variable liveness explicit. That's |
11 | // done by RewriteStatepointsForGC. |
12 | // |
13 | // Terminology: |
14 | // - A call is said to be "parseable" if there is a stack map generated for the |
15 | // return PC of the call. A runtime can determine where values listed in the |
16 | // deopt arguments and (after RewriteStatepointsForGC) gc arguments are located |
17 | // on the stack when the code is suspended inside such a call. Every parse |
18 | // point is represented by a call wrapped in an gc.statepoint intrinsic. |
19 | // - A "poll" is an explicit check in the generated code to determine if the |
20 | // runtime needs the generated code to cooperate by calling a helper routine |
21 | // and thus suspending its execution at a known state. The call to the helper |
22 | // routine will be parseable. The (gc & runtime specific) logic of a poll is |
23 | // assumed to be provided in a function of the name "gc.safepoint_poll". |
24 | // |
25 | // We aim to insert polls such that running code can quickly be brought to a |
26 | // well defined state for inspection by the collector. In the current |
27 | // implementation, this is done via the insertion of poll sites at method entry |
28 | // and the backedge of most loops. We try to avoid inserting more polls than |
29 | // are necessary to ensure a finite period between poll sites. This is not |
30 | // because the poll itself is expensive in the generated code; it's not. Polls |
31 | // do tend to impact the optimizer itself in negative ways; we'd like to avoid |
32 | // perturbing the optimization of the method as much as we can. |
33 | // |
34 | // We also need to make most call sites parseable. The callee might execute a |
35 | // poll (or otherwise be inspected by the GC). If so, the entire stack |
36 | // (including the suspended frame of the current method) must be parseable. |
37 | // |
38 | // This pass will insert: |
39 | // - Call parse points ("call safepoints") for any call which may need to |
40 | // reach a safepoint during the execution of the callee function. |
41 | // - Backedge safepoint polls and entry safepoint polls to ensure that |
42 | // executing code reaches a safepoint poll in a finite amount of time. |
43 | // |
44 | // We do not currently support return statepoints, but adding them would not |
45 | // be hard. They are not required for correctness - entry safepoints are an |
46 | // alternative - but some GCs may prefer them. Patches welcome. |
47 | // |
48 | //===----------------------------------------------------------------------===// |
49 | |
50 | #include "llvm/Transforms/Scalar/PlaceSafepoints.h" |
51 | #include "llvm/InitializePasses.h" |
52 | #include "llvm/Pass.h" |
53 | |
54 | #include "llvm/ADT/SetVector.h" |
55 | #include "llvm/ADT/Statistic.h" |
56 | #include "llvm/Analysis/CFG.h" |
57 | #include "llvm/Analysis/LoopInfo.h" |
58 | #include "llvm/Analysis/ScalarEvolution.h" |
59 | #include "llvm/Analysis/TargetLibraryInfo.h" |
60 | #include "llvm/IR/Dominators.h" |
61 | #include "llvm/IR/IntrinsicInst.h" |
62 | #include "llvm/IR/LegacyPassManager.h" |
63 | #include "llvm/IR/Module.h" |
64 | #include "llvm/IR/Statepoint.h" |
65 | #include "llvm/Support/CommandLine.h" |
66 | #include "llvm/Support/Debug.h" |
67 | #include "llvm/Transforms/Scalar.h" |
68 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
69 | #include "llvm/Transforms/Utils/Cloning.h" |
70 | #include "llvm/Transforms/Utils/Local.h" |
71 | |
72 | using namespace llvm; |
73 | |
74 | #define DEBUG_TYPE "place-safepoints" |
75 | |
76 | STATISTIC(NumEntrySafepoints, "Number of entry safepoints inserted" ); |
77 | STATISTIC(NumBackedgeSafepoints, "Number of backedge safepoints inserted" ); |
78 | |
79 | STATISTIC(CallInLoop, |
80 | "Number of loops without safepoints due to calls in loop" ); |
81 | STATISTIC(FiniteExecution, |
82 | "Number of loops without safepoints finite execution" ); |
83 | |
84 | // Ignore opportunities to avoid placing safepoints on backedges, useful for |
85 | // validation |
86 | static cl::opt<bool> AllBackedges("spp-all-backedges" , cl::Hidden, |
87 | cl::init(Val: false)); |
88 | |
89 | /// How narrow does the trip count of a loop have to be to have to be considered |
90 | /// "counted"? Counted loops do not get safepoints at backedges. |
91 | static cl::opt<int> CountedLoopTripWidth("spp-counted-loop-trip-width" , |
92 | cl::Hidden, cl::init(Val: 32)); |
93 | |
94 | // If true, split the backedge of a loop when placing the safepoint, otherwise |
95 | // split the latch block itself. Both are useful to support for |
96 | // experimentation, but in practice, it looks like splitting the backedge |
97 | // optimizes better. |
98 | static cl::opt<bool> SplitBackedge("spp-split-backedge" , cl::Hidden, |
99 | cl::init(Val: false)); |
100 | |
101 | namespace { |
102 | /// An analysis pass whose purpose is to identify each of the backedges in |
103 | /// the function which require a safepoint poll to be inserted. |
104 | class PlaceBackedgeSafepointsLegacyPass : public FunctionPass { |
105 | public: |
106 | static char ID; |
107 | |
108 | /// The output of the pass - gives a list of each backedge (described by |
109 | /// pointing at the branch) which need a poll inserted. |
110 | std::vector<Instruction *> PollLocations; |
111 | |
112 | /// True unless we're running spp-no-calls in which case we need to disable |
113 | /// the call-dependent placement opts. |
114 | bool CallSafepointsEnabled; |
115 | |
116 | PlaceBackedgeSafepointsLegacyPass(bool CallSafepoints = false) |
117 | : FunctionPass(ID), CallSafepointsEnabled(CallSafepoints) { |
118 | initializePlaceBackedgeSafepointsLegacyPassPass( |
119 | *PassRegistry::getPassRegistry()); |
120 | } |
121 | |
122 | bool runOnLoop(Loop *); |
123 | |
124 | void runOnLoopAndSubLoops(Loop *L) { |
125 | // Visit all the subloops |
126 | for (Loop *I : *L) |
127 | runOnLoopAndSubLoops(L: I); |
128 | runOnLoop(L); |
129 | } |
130 | |
131 | bool runOnFunction(Function &F) override { |
132 | SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); |
133 | DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
134 | LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
135 | TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); |
136 | for (Loop *I : *LI) { |
137 | runOnLoopAndSubLoops(L: I); |
138 | } |
139 | return false; |
140 | } |
141 | |
142 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
143 | AU.addRequired<DominatorTreeWrapperPass>(); |
144 | AU.addRequired<ScalarEvolutionWrapperPass>(); |
145 | AU.addRequired<LoopInfoWrapperPass>(); |
146 | AU.addRequired<TargetLibraryInfoWrapperPass>(); |
147 | // We no longer modify the IR at all in this pass. Thus all |
148 | // analysis are preserved. |
149 | AU.setPreservesAll(); |
150 | } |
151 | |
152 | private: |
153 | ScalarEvolution *SE = nullptr; |
154 | DominatorTree *DT = nullptr; |
155 | LoopInfo *LI = nullptr; |
156 | TargetLibraryInfo *TLI = nullptr; |
157 | }; |
158 | } // namespace |
159 | |
160 | static cl::opt<bool> NoEntry("spp-no-entry" , cl::Hidden, cl::init(Val: false)); |
161 | static cl::opt<bool> NoCall("spp-no-call" , cl::Hidden, cl::init(Val: false)); |
162 | static cl::opt<bool> NoBackedge("spp-no-backedge" , cl::Hidden, cl::init(Val: false)); |
163 | |
164 | char PlaceBackedgeSafepointsLegacyPass::ID = 0; |
165 | |
166 | INITIALIZE_PASS_BEGIN(PlaceBackedgeSafepointsLegacyPass, |
167 | "place-backedge-safepoints-impl" , |
168 | "Place Backedge Safepoints" , false, false) |
169 | INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) |
170 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
171 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
172 | INITIALIZE_PASS_END(PlaceBackedgeSafepointsLegacyPass, |
173 | "place-backedge-safepoints-impl" , |
174 | "Place Backedge Safepoints" , false, false) |
175 | |
176 | static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *, |
177 | BasicBlock *Pred, |
178 | DominatorTree &DT, |
179 | const TargetLibraryInfo &TLI); |
180 | |
181 | static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE, |
182 | BasicBlock *Pred); |
183 | |
184 | static Instruction *findLocationForEntrySafepoint(Function &F, |
185 | DominatorTree &DT); |
186 | |
187 | static bool isGCSafepointPoll(Function &F); |
188 | static bool shouldRewriteFunction(Function &F); |
189 | static bool enableEntrySafepoints(Function &F); |
190 | static bool enableBackedgeSafepoints(Function &F); |
191 | static bool enableCallSafepoints(Function &F); |
192 | |
193 | static void |
194 | InsertSafepointPoll(BasicBlock::iterator InsertBefore, |
195 | std::vector<CallBase *> &ParsePointsNeeded /*rval*/, |
196 | const TargetLibraryInfo &TLI); |
197 | |
198 | bool PlaceBackedgeSafepointsLegacyPass::runOnLoop(Loop *L) { |
199 | // Loop through all loop latches (branches controlling backedges). We need |
200 | // to place a safepoint on every backedge (potentially). |
201 | // Note: In common usage, there will be only one edge due to LoopSimplify |
202 | // having run sometime earlier in the pipeline, but this code must be correct |
203 | // w.r.t. loops with multiple backedges. |
204 | BasicBlock * = L->getHeader(); |
205 | SmallVector<BasicBlock *, 16> LoopLatches; |
206 | L->getLoopLatches(LoopLatches); |
207 | for (BasicBlock *Pred : LoopLatches) { |
208 | assert(L->contains(Pred)); |
209 | |
210 | // Make a policy decision about whether this loop needs a safepoint or |
211 | // not. Note that this is about unburdening the optimizer in loops, not |
212 | // avoiding the runtime cost of the actual safepoint. |
213 | if (!AllBackedges) { |
214 | if (mustBeFiniteCountedLoop(L, SE, Pred)) { |
215 | LLVM_DEBUG(dbgs() << "skipping safepoint placement in finite loop\n" ); |
216 | FiniteExecution++; |
217 | continue; |
218 | } |
219 | if (CallSafepointsEnabled && |
220 | containsUnconditionalCallSafepoint(L, Header, Pred, DT&: *DT, TLI: *TLI)) { |
221 | // Note: This is only semantically legal since we won't do any further |
222 | // IPO or inlining before the actual call insertion.. If we hadn't, we |
223 | // might latter loose this call safepoint. |
224 | LLVM_DEBUG( |
225 | dbgs() |
226 | << "skipping safepoint placement due to unconditional call\n" ); |
227 | CallInLoop++; |
228 | continue; |
229 | } |
230 | } |
231 | |
232 | // TODO: We can create an inner loop which runs a finite number of |
233 | // iterations with an outer loop which contains a safepoint. This would |
234 | // not help runtime performance that much, but it might help our ability to |
235 | // optimize the inner loop. |
236 | |
237 | // Safepoint insertion would involve creating a new basic block (as the |
238 | // target of the current backedge) which does the safepoint (of all live |
239 | // variables) and branches to the true header |
240 | Instruction *Term = Pred->getTerminator(); |
241 | |
242 | LLVM_DEBUG(dbgs() << "[LSP] terminator instruction: " << *Term); |
243 | |
244 | PollLocations.push_back(x: Term); |
245 | } |
246 | |
247 | return false; |
248 | } |
249 | |
250 | bool PlaceSafepointsPass::runImpl(Function &F, const TargetLibraryInfo &TLI) { |
251 | if (F.isDeclaration() || F.empty()) { |
252 | // This is a declaration, nothing to do. Must exit early to avoid crash in |
253 | // dom tree calculation |
254 | return false; |
255 | } |
256 | |
257 | if (isGCSafepointPoll(F)) { |
258 | // Given we're inlining this inside of safepoint poll insertion, this |
259 | // doesn't make any sense. Note that we do make any contained calls |
260 | // parseable after we inline a poll. |
261 | return false; |
262 | } |
263 | |
264 | if (!shouldRewriteFunction(F)) |
265 | return false; |
266 | |
267 | bool Modified = false; |
268 | |
269 | // In various bits below, we rely on the fact that uses are reachable from |
270 | // defs. When there are basic blocks unreachable from the entry, dominance |
271 | // and reachablity queries return non-sensical results. Thus, we preprocess |
272 | // the function to ensure these properties hold. |
273 | Modified |= removeUnreachableBlocks(F); |
274 | |
275 | // STEP 1 - Insert the safepoint polling locations. We do not need to |
276 | // actually insert parse points yet. That will be done for all polls and |
277 | // calls in a single pass. |
278 | |
279 | DominatorTree DT; |
280 | DT.recalculate(Func&: F); |
281 | |
282 | SmallVector<Instruction *, 16> PollsNeeded; |
283 | std::vector<CallBase *> ParsePointNeeded; |
284 | |
285 | if (enableBackedgeSafepoints(F)) { |
286 | // Construct a pass manager to run the LoopPass backedge logic. We |
287 | // need the pass manager to handle scheduling all the loop passes |
288 | // appropriately. Doing this by hand is painful and just not worth messing |
289 | // with for the moment. |
290 | legacy::FunctionPassManager FPM(F.getParent()); |
291 | bool CanAssumeCallSafepoints = enableCallSafepoints(F); |
292 | |
293 | FPM.add(P: new TargetLibraryInfoWrapperPass(TLI)); |
294 | auto *PBS = new PlaceBackedgeSafepointsLegacyPass(CanAssumeCallSafepoints); |
295 | FPM.add(P: PBS); |
296 | FPM.run(F); |
297 | |
298 | // We preserve dominance information when inserting the poll, otherwise |
299 | // we'd have to recalculate this on every insert |
300 | DT.recalculate(Func&: F); |
301 | |
302 | auto &PollLocations = PBS->PollLocations; |
303 | |
304 | auto OrderByBBName = [](Instruction *a, Instruction *b) { |
305 | return a->getParent()->getName() < b->getParent()->getName(); |
306 | }; |
307 | // We need the order of list to be stable so that naming ends up stable |
308 | // when we split edges. This makes test cases much easier to write. |
309 | llvm::sort(C&: PollLocations, Comp: OrderByBBName); |
310 | |
311 | // We can sometimes end up with duplicate poll locations. This happens if |
312 | // a single loop is visited more than once. The fact this happens seems |
313 | // wrong, but it does happen for the split-backedge.ll test case. |
314 | PollLocations.erase(first: llvm::unique(R&: PollLocations), last: PollLocations.end()); |
315 | |
316 | // Insert a poll at each point the analysis pass identified |
317 | // The poll location must be the terminator of a loop latch block. |
318 | for (Instruction *Term : PollLocations) { |
319 | // We are inserting a poll, the function is modified |
320 | Modified = true; |
321 | |
322 | if (SplitBackedge) { |
323 | // Split the backedge of the loop and insert the poll within that new |
324 | // basic block. This creates a loop with two latches per original |
325 | // latch (which is non-ideal), but this appears to be easier to |
326 | // optimize in practice than inserting the poll immediately before the |
327 | // latch test. |
328 | |
329 | // Since this is a latch, at least one of the successors must dominate |
330 | // it. Its possible that we have a) duplicate edges to the same header |
331 | // and b) edges to distinct loop headers. We need to insert pools on |
332 | // each. |
333 | SetVector<BasicBlock *> ; |
334 | for (unsigned i = 0; i < Term->getNumSuccessors(); i++) { |
335 | BasicBlock *Succ = Term->getSuccessor(Idx: i); |
336 | if (DT.dominates(A: Succ, B: Term->getParent())) { |
337 | Headers.insert(X: Succ); |
338 | } |
339 | } |
340 | assert(!Headers.empty() && "poll location is not a loop latch?" ); |
341 | |
342 | // The split loop structure here is so that we only need to recalculate |
343 | // the dominator tree once. Alternatively, we could just keep it up to |
344 | // date and use a more natural merged loop. |
345 | SetVector<BasicBlock *> SplitBackedges; |
346 | for (BasicBlock * : Headers) { |
347 | BasicBlock *NewBB = SplitEdge(From: Term->getParent(), To: Header, DT: &DT); |
348 | PollsNeeded.push_back(Elt: NewBB->getTerminator()); |
349 | NumBackedgeSafepoints++; |
350 | } |
351 | } else { |
352 | // Split the latch block itself, right before the terminator. |
353 | PollsNeeded.push_back(Elt: Term); |
354 | NumBackedgeSafepoints++; |
355 | } |
356 | } |
357 | } |
358 | |
359 | if (enableEntrySafepoints(F)) { |
360 | if (Instruction *Location = findLocationForEntrySafepoint(F, DT)) { |
361 | PollsNeeded.push_back(Elt: Location); |
362 | Modified = true; |
363 | NumEntrySafepoints++; |
364 | } |
365 | // TODO: else we should assert that there was, in fact, a policy choice to |
366 | // not insert a entry safepoint poll. |
367 | } |
368 | |
369 | // Now that we've identified all the needed safepoint poll locations, insert |
370 | // safepoint polls themselves. |
371 | for (Instruction *PollLocation : PollsNeeded) { |
372 | std::vector<CallBase *> RuntimeCalls; |
373 | InsertSafepointPoll(InsertBefore: PollLocation->getIterator(), ParsePointsNeeded&: RuntimeCalls, TLI); |
374 | llvm::append_range(C&: ParsePointNeeded, R&: RuntimeCalls); |
375 | } |
376 | |
377 | return Modified; |
378 | } |
379 | |
380 | PreservedAnalyses PlaceSafepointsPass::run(Function &F, |
381 | FunctionAnalysisManager &AM) { |
382 | auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F); |
383 | |
384 | if (!runImpl(F, TLI)) |
385 | return PreservedAnalyses::all(); |
386 | |
387 | // TODO: can we preserve more? |
388 | return PreservedAnalyses::none(); |
389 | } |
390 | |
391 | static bool needsStatepoint(CallBase *Call, const TargetLibraryInfo &TLI) { |
392 | if (callsGCLeafFunction(Call, TLI)) |
393 | return false; |
394 | if (auto *CI = dyn_cast<CallInst>(Val: Call)) { |
395 | if (CI->isInlineAsm()) |
396 | return false; |
397 | } |
398 | |
399 | return !(isa<GCStatepointInst>(Val: Call) || isa<GCRelocateInst>(Val: Call) || |
400 | isa<GCResultInst>(Val: Call)); |
401 | } |
402 | |
403 | /// Returns true if this loop is known to contain a call safepoint which |
404 | /// must unconditionally execute on any iteration of the loop which returns |
405 | /// to the loop header via an edge from Pred. Returns a conservative correct |
406 | /// answer; i.e. false is always valid. |
407 | static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *, |
408 | BasicBlock *Pred, |
409 | DominatorTree &DT, |
410 | const TargetLibraryInfo &TLI) { |
411 | // In general, we're looking for any cut of the graph which ensures |
412 | // there's a call safepoint along every edge between Header and Pred. |
413 | // For the moment, we look only for the 'cuts' that consist of a single call |
414 | // instruction in a block which is dominated by the Header and dominates the |
415 | // loop latch (Pred) block. Somewhat surprisingly, walking the entire chain |
416 | // of such dominating blocks gets substantially more occurrences than just |
417 | // checking the Pred and Header blocks themselves. This may be due to the |
418 | // density of loop exit conditions caused by range and null checks. |
419 | // TODO: structure this as an analysis pass, cache the result for subloops, |
420 | // avoid dom tree recalculations |
421 | assert(DT.dominates(Header, Pred) && "loop latch not dominated by header?" ); |
422 | |
423 | BasicBlock *Current = Pred; |
424 | while (true) { |
425 | for (Instruction &I : *Current) { |
426 | if (auto *Call = dyn_cast<CallBase>(Val: &I)) |
427 | // Note: Technically, needing a safepoint isn't quite the right |
428 | // condition here. We should instead be checking if the target method |
429 | // has an |
430 | // unconditional poll. In practice, this is only a theoretical concern |
431 | // since we don't have any methods with conditional-only safepoint |
432 | // polls. |
433 | if (needsStatepoint(Call, TLI)) |
434 | return true; |
435 | } |
436 | |
437 | if (Current == Header) |
438 | break; |
439 | Current = DT.getNode(BB: Current)->getIDom()->getBlock(); |
440 | } |
441 | |
442 | return false; |
443 | } |
444 | |
445 | /// Returns true if this loop is known to terminate in a finite number of |
446 | /// iterations. Note that this function may return false for a loop which |
447 | /// does actual terminate in a finite constant number of iterations due to |
448 | /// conservatism in the analysis. |
449 | static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE, |
450 | BasicBlock *Pred) { |
451 | // A conservative bound on the loop as a whole. |
452 | const SCEV *MaxTrips = SE->getConstantMaxBackedgeTakenCount(L); |
453 | if (!isa<SCEVCouldNotCompute>(Val: MaxTrips) && |
454 | SE->getUnsignedRange(S: MaxTrips).getUnsignedMax().isIntN( |
455 | N: CountedLoopTripWidth)) |
456 | return true; |
457 | |
458 | // If this is a conditional branch to the header with the alternate path |
459 | // being outside the loop, we can ask questions about the execution frequency |
460 | // of the exit block. |
461 | if (L->isLoopExiting(BB: Pred)) { |
462 | // This returns an exact expression only. TODO: We really only need an |
463 | // upper bound here, but SE doesn't expose that. |
464 | const SCEV *MaxExec = SE->getExitCount(L, ExitingBlock: Pred); |
465 | if (!isa<SCEVCouldNotCompute>(Val: MaxExec) && |
466 | SE->getUnsignedRange(S: MaxExec).getUnsignedMax().isIntN( |
467 | N: CountedLoopTripWidth)) |
468 | return true; |
469 | } |
470 | |
471 | return /* not finite */ false; |
472 | } |
473 | |
474 | static void scanOneBB(Instruction *Start, Instruction *End, |
475 | std::vector<CallInst *> &Calls, |
476 | DenseSet<BasicBlock *> &Seen, |
477 | std::vector<BasicBlock *> &Worklist) { |
478 | for (BasicBlock::iterator BBI(Start), BBE0 = Start->getParent()->end(), |
479 | BBE1 = BasicBlock::iterator(End); |
480 | BBI != BBE0 && BBI != BBE1; BBI++) { |
481 | if (CallInst *CI = dyn_cast<CallInst>(Val: &*BBI)) |
482 | Calls.push_back(x: CI); |
483 | |
484 | // FIXME: This code does not handle invokes |
485 | assert(!isa<InvokeInst>(&*BBI) && |
486 | "support for invokes in poll code needed" ); |
487 | |
488 | // Only add the successor blocks if we reach the terminator instruction |
489 | // without encountering end first |
490 | if (BBI->isTerminator()) { |
491 | BasicBlock *BB = BBI->getParent(); |
492 | for (BasicBlock *Succ : successors(BB)) { |
493 | if (Seen.insert(V: Succ).second) { |
494 | Worklist.push_back(x: Succ); |
495 | } |
496 | } |
497 | } |
498 | } |
499 | } |
500 | |
501 | static void scanInlinedCode(Instruction *Start, Instruction *End, |
502 | std::vector<CallInst *> &Calls, |
503 | DenseSet<BasicBlock *> &Seen) { |
504 | Calls.clear(); |
505 | std::vector<BasicBlock *> Worklist; |
506 | Seen.insert(V: Start->getParent()); |
507 | scanOneBB(Start, End, Calls, Seen, Worklist); |
508 | while (!Worklist.empty()) { |
509 | BasicBlock *BB = Worklist.back(); |
510 | Worklist.pop_back(); |
511 | scanOneBB(Start: &*BB->begin(), End, Calls, Seen, Worklist); |
512 | } |
513 | } |
514 | |
515 | /// Returns true if an entry safepoint is not required before this callsite in |
516 | /// the caller function. |
517 | static bool doesNotRequireEntrySafepointBefore(CallBase *Call) { |
518 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: Call)) { |
519 | switch (II->getIntrinsicID()) { |
520 | case Intrinsic::experimental_gc_statepoint: |
521 | case Intrinsic::experimental_patchpoint_void: |
522 | case Intrinsic::experimental_patchpoint: |
523 | // The can wrap an actual call which may grow the stack by an unbounded |
524 | // amount or run forever. |
525 | return false; |
526 | default: |
527 | // Most LLVM intrinsics are things which do not expand to actual calls, or |
528 | // at least if they do, are leaf functions that cause only finite stack |
529 | // growth. In particular, the optimizer likes to form things like memsets |
530 | // out of stores in the original IR. Another important example is |
531 | // llvm.localescape which must occur in the entry block. Inserting a |
532 | // safepoint before it is not legal since it could push the localescape |
533 | // out of the entry block. |
534 | return true; |
535 | } |
536 | } |
537 | return false; |
538 | } |
539 | |
540 | static Instruction *findLocationForEntrySafepoint(Function &F, |
541 | DominatorTree &DT) { |
542 | |
543 | // Conceptually, this poll needs to be on method entry, but in |
544 | // practice, we place it as late in the entry block as possible. We |
545 | // can place it as late as we want as long as it dominates all calls |
546 | // that can grow the stack. This, combined with backedge polls, |
547 | // give us all the progress guarantees we need. |
548 | |
549 | // hasNextInstruction and nextInstruction are used to iterate |
550 | // through a "straight line" execution sequence. |
551 | |
552 | auto HasNextInstruction = [](Instruction *I) { |
553 | if (!I->isTerminator()) |
554 | return true; |
555 | |
556 | BasicBlock *nextBB = I->getParent()->getUniqueSuccessor(); |
557 | return nextBB && (nextBB->getUniquePredecessor() != nullptr); |
558 | }; |
559 | |
560 | auto NextInstruction = [&](Instruction *I) { |
561 | assert(HasNextInstruction(I) && |
562 | "first check if there is a next instruction!" ); |
563 | |
564 | if (I->isTerminator()) |
565 | return &I->getParent()->getUniqueSuccessor()->front(); |
566 | return &*++I->getIterator(); |
567 | }; |
568 | |
569 | Instruction *Cursor = nullptr; |
570 | for (Cursor = &F.getEntryBlock().front(); HasNextInstruction(Cursor); |
571 | Cursor = NextInstruction(Cursor)) { |
572 | |
573 | // We need to ensure a safepoint poll occurs before any 'real' call. The |
574 | // easiest way to ensure finite execution between safepoints in the face of |
575 | // recursive and mutually recursive functions is to enforce that each take |
576 | // a safepoint. Additionally, we need to ensure a poll before any call |
577 | // which can grow the stack by an unbounded amount. This isn't required |
578 | // for GC semantics per se, but is a common requirement for languages |
579 | // which detect stack overflow via guard pages and then throw exceptions. |
580 | if (auto *Call = dyn_cast<CallBase>(Val: Cursor)) { |
581 | if (doesNotRequireEntrySafepointBefore(Call)) |
582 | continue; |
583 | break; |
584 | } |
585 | } |
586 | |
587 | assert((HasNextInstruction(Cursor) || Cursor->isTerminator()) && |
588 | "either we stopped because of a call, or because of terminator" ); |
589 | |
590 | return Cursor; |
591 | } |
592 | |
593 | const char GCSafepointPollName[] = "gc.safepoint_poll" ; |
594 | |
595 | static bool isGCSafepointPoll(Function &F) { |
596 | return F.getName() == GCSafepointPollName; |
597 | } |
598 | |
599 | /// Returns true if this function should be rewritten to include safepoint |
600 | /// polls and parseable call sites. The main point of this function is to be |
601 | /// an extension point for custom logic. |
602 | static bool shouldRewriteFunction(Function &F) { |
603 | // TODO: This should check the GCStrategy |
604 | if (F.hasGC()) { |
605 | const auto &FunctionGCName = F.getGC(); |
606 | const StringRef StatepointExampleName("statepoint-example" ); |
607 | const StringRef CoreCLRName("coreclr" ); |
608 | return (StatepointExampleName == FunctionGCName) || |
609 | (CoreCLRName == FunctionGCName); |
610 | } else |
611 | return false; |
612 | } |
613 | |
614 | // TODO: These should become properties of the GCStrategy, possibly with |
615 | // command line overrides. |
616 | static bool enableEntrySafepoints(Function &F) { return !NoEntry; } |
617 | static bool enableBackedgeSafepoints(Function &F) { return !NoBackedge; } |
618 | static bool enableCallSafepoints(Function &F) { return !NoCall; } |
619 | |
620 | // Insert a safepoint poll immediately before the given instruction. Does |
621 | // not handle the parsability of state at the runtime call, that's the |
622 | // callers job. |
623 | static void |
624 | InsertSafepointPoll(BasicBlock::iterator InsertBefore, |
625 | std::vector<CallBase *> &ParsePointsNeeded /*rval*/, |
626 | const TargetLibraryInfo &TLI) { |
627 | BasicBlock *OrigBB = InsertBefore->getParent(); |
628 | Module *M = InsertBefore->getModule(); |
629 | assert(M && "must be part of a module" ); |
630 | |
631 | // Inline the safepoint poll implementation - this will get all the branch, |
632 | // control flow, etc.. Most importantly, it will introduce the actual slow |
633 | // path call - where we need to insert a safepoint (parsepoint). |
634 | |
635 | auto *F = M->getFunction(Name: GCSafepointPollName); |
636 | assert(F && "gc.safepoint_poll function is missing" ); |
637 | assert(F->getValueType() == |
638 | FunctionType::get(Type::getVoidTy(M->getContext()), false) && |
639 | "gc.safepoint_poll declared with wrong type" ); |
640 | assert(!F->empty() && "gc.safepoint_poll must be a non-empty function" ); |
641 | CallInst *PollCall = CallInst::Create(Func: F, NameStr: "" , InsertBefore); |
642 | |
643 | // Record some information about the call site we're replacing |
644 | BasicBlock::iterator Before(PollCall), After(PollCall); |
645 | bool IsBegin = false; |
646 | if (Before == OrigBB->begin()) |
647 | IsBegin = true; |
648 | else |
649 | Before--; |
650 | |
651 | After++; |
652 | assert(After != OrigBB->end() && "must have successor" ); |
653 | |
654 | // Do the actual inlining |
655 | InlineFunctionInfo IFI; |
656 | bool InlineStatus = InlineFunction(CB&: *PollCall, IFI).isSuccess(); |
657 | assert(InlineStatus && "inline must succeed" ); |
658 | (void)InlineStatus; // suppress warning in release-asserts |
659 | |
660 | // Check post-conditions |
661 | assert(IFI.StaticAllocas.empty() && "can't have allocs" ); |
662 | |
663 | std::vector<CallInst *> Calls; // new calls |
664 | DenseSet<BasicBlock *> BBs; // new BBs + insertee |
665 | |
666 | // Include only the newly inserted instructions, Note: begin may not be valid |
667 | // if we inserted to the beginning of the basic block |
668 | BasicBlock::iterator Start = IsBegin ? OrigBB->begin() : std::next(x: Before); |
669 | |
670 | // If your poll function includes an unreachable at the end, that's not |
671 | // valid. Bugpoint likes to create this, so check for it. |
672 | assert(isPotentiallyReachable(&*Start, &*After) && |
673 | "malformed poll function" ); |
674 | |
675 | scanInlinedCode(Start: &*Start, End: &*After, Calls, Seen&: BBs); |
676 | assert(!Calls.empty() && "slow path not found for safepoint poll" ); |
677 | |
678 | // Record the fact we need a parsable state at the runtime call contained in |
679 | // the poll function. This is required so that the runtime knows how to |
680 | // parse the last frame when we actually take the safepoint (i.e. execute |
681 | // the slow path) |
682 | assert(ParsePointsNeeded.empty()); |
683 | for (auto *CI : Calls) { |
684 | // No safepoint needed or wanted |
685 | if (!needsStatepoint(Call: CI, TLI)) |
686 | continue; |
687 | |
688 | // These are likely runtime calls. Should we assert that via calling |
689 | // convention or something? |
690 | ParsePointsNeeded.push_back(x: CI); |
691 | } |
692 | assert(ParsePointsNeeded.size() <= Calls.size()); |
693 | } |
694 | |