1 | //===- TailRecursionElimination.cpp - Eliminate Tail Calls ----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file transforms calls of the current function (self recursion) followed |
10 | // by a return instruction with a branch to the entry of the function, creating |
11 | // a loop. This pass also implements the following extensions to the basic |
12 | // algorithm: |
13 | // |
14 | // 1. Trivial instructions between the call and return do not prevent the |
15 | // transformation from taking place, though currently the analysis cannot |
16 | // support moving any really useful instructions (only dead ones). |
17 | // 2. This pass transforms functions that are prevented from being tail |
18 | // recursive by an associative and commutative expression to use an |
19 | // accumulator variable, thus compiling the typical naive factorial or |
20 | // 'fib' implementation into efficient code. |
21 | // 3. TRE is performed if the function returns void, if the return |
22 | // returns the result returned by the call, or if the function returns a |
23 | // run-time constant on all exits from the function. It is possible, though |
24 | // unlikely, that the return returns something else (like constant 0), and |
25 | // can still be TRE'd. It can be TRE'd if ALL OTHER return instructions in |
26 | // the function return the exact same value. |
27 | // 4. If it can prove that callees do not access their caller stack frame, |
28 | // they are marked as eligible for tail call elimination (by the code |
29 | // generator). |
30 | // |
31 | // There are several improvements that could be made: |
32 | // |
33 | // 1. If the function has any alloca instructions, these instructions will be |
34 | // moved out of the entry block of the function, causing them to be |
35 | // evaluated each time through the tail recursion. Safely keeping allocas |
36 | // in the entry block requires analysis to proves that the tail-called |
37 | // function does not read or write the stack object. |
38 | // 2. Tail recursion is only performed if the call immediately precedes the |
39 | // return instruction. It's possible that there could be a jump between |
40 | // the call and the return. |
41 | // 3. There can be intervening operations between the call and the return that |
42 | // prevent the TRE from occurring. For example, there could be GEP's and |
43 | // stores to memory that will not be read or written by the call. This |
44 | // requires some substantial analysis (such as with DSA) to prove safe to |
45 | // move ahead of the call, but doing so could allow many more TREs to be |
46 | // performed, for example in TreeAdd/TreeAlloc from the treeadd benchmark. |
47 | // 4. The algorithm we use to detect if callees access their caller stack |
48 | // frames is very primitive. |
49 | // |
50 | //===----------------------------------------------------------------------===// |
51 | |
52 | #include "llvm/Transforms/Scalar/TailRecursionElimination.h" |
53 | #include "llvm/ADT/STLExtras.h" |
54 | #include "llvm/ADT/SmallPtrSet.h" |
55 | #include "llvm/ADT/Statistic.h" |
56 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
57 | #include "llvm/Analysis/DomTreeUpdater.h" |
58 | #include "llvm/Analysis/GlobalsModRef.h" |
59 | #include "llvm/Analysis/InstructionSimplify.h" |
60 | #include "llvm/Analysis/Loads.h" |
61 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
62 | #include "llvm/Analysis/PostDominators.h" |
63 | #include "llvm/Analysis/TargetTransformInfo.h" |
64 | #include "llvm/Analysis/ValueTracking.h" |
65 | #include "llvm/IR/CFG.h" |
66 | #include "llvm/IR/Constants.h" |
67 | #include "llvm/IR/DataLayout.h" |
68 | #include "llvm/IR/DerivedTypes.h" |
69 | #include "llvm/IR/DiagnosticInfo.h" |
70 | #include "llvm/IR/Dominators.h" |
71 | #include "llvm/IR/Function.h" |
72 | #include "llvm/IR/IRBuilder.h" |
73 | #include "llvm/IR/InstIterator.h" |
74 | #include "llvm/IR/Instructions.h" |
75 | #include "llvm/IR/IntrinsicInst.h" |
76 | #include "llvm/IR/Module.h" |
77 | #include "llvm/InitializePasses.h" |
78 | #include "llvm/Pass.h" |
79 | #include "llvm/Support/CommandLine.h" |
80 | #include "llvm/Support/Debug.h" |
81 | #include "llvm/Support/raw_ostream.h" |
82 | #include "llvm/Transforms/Scalar.h" |
83 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
84 | #include <cmath> |
85 | using namespace llvm; |
86 | |
87 | #define DEBUG_TYPE "tailcallelim" |
88 | |
89 | STATISTIC(NumEliminated, "Number of tail calls removed" ); |
90 | STATISTIC(NumRetDuped, "Number of return duplicated" ); |
91 | STATISTIC(NumAccumAdded, "Number of accumulators introduced" ); |
92 | |
93 | static cl::opt<bool> ForceDisableBFI( |
94 | "tre-disable-entrycount-recompute" , cl::init(Val: false), cl::Hidden, |
95 | cl::desc("Force disabling recomputing of function entry count, on " |
96 | "successful tail recursion elimination." )); |
97 | |
98 | /// Scan the specified function for alloca instructions. |
99 | /// If it contains any dynamic allocas, returns false. |
100 | static bool canTRE(Function &F) { |
101 | // TODO: We don't do TRE if dynamic allocas are used. |
102 | // Dynamic allocas allocate stack space which should be |
103 | // deallocated before new iteration started. That is |
104 | // currently not implemented. |
105 | return llvm::all_of(Range: instructions(F), P: [](Instruction &I) { |
106 | auto *AI = dyn_cast<AllocaInst>(Val: &I); |
107 | return !AI || AI->isStaticAlloca(); |
108 | }); |
109 | } |
110 | |
111 | namespace { |
112 | struct AllocaDerivedValueTracker { |
113 | // Start at a root value and walk its use-def chain to mark calls that use the |
114 | // value or a derived value in AllocaUsers, and places where it may escape in |
115 | // EscapePoints. |
116 | void walk(Value *Root) { |
117 | SmallVector<Use *, 32> Worklist; |
118 | SmallPtrSet<Use *, 32> Visited; |
119 | |
120 | auto AddUsesToWorklist = [&](Value *V) { |
121 | for (auto &U : V->uses()) { |
122 | if (!Visited.insert(Ptr: &U).second) |
123 | continue; |
124 | Worklist.push_back(Elt: &U); |
125 | } |
126 | }; |
127 | |
128 | AddUsesToWorklist(Root); |
129 | |
130 | while (!Worklist.empty()) { |
131 | Use *U = Worklist.pop_back_val(); |
132 | Instruction *I = cast<Instruction>(Val: U->getUser()); |
133 | |
134 | switch (I->getOpcode()) { |
135 | case Instruction::Call: |
136 | case Instruction::Invoke: { |
137 | auto &CB = cast<CallBase>(Val&: *I); |
138 | // If the alloca-derived argument is passed byval it is not an escape |
139 | // point, or a use of an alloca. Calling with byval copies the contents |
140 | // of the alloca into argument registers or stack slots, which exist |
141 | // beyond the lifetime of the current frame. |
142 | if (CB.isArgOperand(U) && CB.isByValArgument(ArgNo: CB.getArgOperandNo(U))) |
143 | continue; |
144 | bool IsNocapture = |
145 | CB.isDataOperand(U) && CB.doesNotCapture(OpNo: CB.getDataOperandNo(U)); |
146 | callUsesLocalStack(CB, IsNocapture); |
147 | if (IsNocapture) { |
148 | // If the alloca-derived argument is passed in as nocapture, then it |
149 | // can't propagate to the call's return. That would be capturing. |
150 | continue; |
151 | } |
152 | break; |
153 | } |
154 | case Instruction::Load: { |
155 | // The result of a load is not alloca-derived (unless an alloca has |
156 | // otherwise escaped, but this is a local analysis). |
157 | continue; |
158 | } |
159 | case Instruction::Store: { |
160 | if (U->getOperandNo() == 0) |
161 | EscapePoints.insert(Ptr: I); |
162 | continue; // Stores have no users to analyze. |
163 | } |
164 | case Instruction::BitCast: |
165 | case Instruction::GetElementPtr: |
166 | case Instruction::PHI: |
167 | case Instruction::Select: |
168 | case Instruction::AddrSpaceCast: |
169 | break; |
170 | default: |
171 | EscapePoints.insert(Ptr: I); |
172 | break; |
173 | } |
174 | |
175 | AddUsesToWorklist(I); |
176 | } |
177 | } |
178 | |
179 | void callUsesLocalStack(CallBase &CB, bool IsNocapture) { |
180 | // Add it to the list of alloca users. |
181 | AllocaUsers.insert(Ptr: &CB); |
182 | |
183 | // If it's nocapture then it can't capture this alloca. |
184 | if (IsNocapture) |
185 | return; |
186 | |
187 | // If it can write to memory, it can leak the alloca value. |
188 | if (!CB.onlyReadsMemory()) |
189 | EscapePoints.insert(Ptr: &CB); |
190 | } |
191 | |
192 | SmallPtrSet<Instruction *, 32> AllocaUsers; |
193 | SmallPtrSet<Instruction *, 32> EscapePoints; |
194 | }; |
195 | } |
196 | |
197 | static bool (Function &F, OptimizationRemarkEmitter *ORE) { |
198 | if (F.callsFunctionThatReturnsTwice()) |
199 | return false; |
200 | |
201 | // The local stack holds all alloca instructions and all byval arguments. |
202 | AllocaDerivedValueTracker Tracker; |
203 | for (Argument &Arg : F.args()) { |
204 | if (Arg.hasByValAttr()) |
205 | Tracker.walk(Root: &Arg); |
206 | } |
207 | for (auto &BB : F) { |
208 | for (auto &I : BB) |
209 | if (AllocaInst *AI = dyn_cast<AllocaInst>(Val: &I)) |
210 | Tracker.walk(Root: AI); |
211 | } |
212 | |
213 | bool Modified = false; |
214 | |
215 | // Track whether a block is reachable after an alloca has escaped. Blocks that |
216 | // contain the escaping instruction will be marked as being visited without an |
217 | // escaped alloca, since that is how the block began. |
218 | enum VisitType { |
219 | UNVISITED, |
220 | UNESCAPED, |
221 | ESCAPED |
222 | }; |
223 | DenseMap<BasicBlock *, VisitType> Visited; |
224 | |
225 | // We propagate the fact that an alloca has escaped from block to successor. |
226 | // Visit the blocks that are propagating the escapedness first. To do this, we |
227 | // maintain two worklists. |
228 | SmallVector<BasicBlock *, 32> WorklistUnescaped, WorklistEscaped; |
229 | |
230 | // We may enter a block and visit it thinking that no alloca has escaped yet, |
231 | // then see an escape point and go back around a loop edge and come back to |
232 | // the same block twice. Because of this, we defer setting tail on calls when |
233 | // we first encounter them in a block. Every entry in this list does not |
234 | // statically use an alloca via use-def chain analysis, but may find an alloca |
235 | // through other means if the block turns out to be reachable after an escape |
236 | // point. |
237 | SmallVector<CallInst *, 32> DeferredTails; |
238 | |
239 | BasicBlock *BB = &F.getEntryBlock(); |
240 | VisitType Escaped = UNESCAPED; |
241 | do { |
242 | for (auto &I : *BB) { |
243 | if (Tracker.EscapePoints.count(Ptr: &I)) |
244 | Escaped = ESCAPED; |
245 | |
246 | CallInst *CI = dyn_cast<CallInst>(Val: &I); |
247 | // A PseudoProbeInst has the IntrInaccessibleMemOnly tag hence it is |
248 | // considered accessing memory and will be marked as a tail call if we |
249 | // don't bail out here. |
250 | if (!CI || CI->isTailCall() || isa<PseudoProbeInst>(Val: &I)) |
251 | continue; |
252 | |
253 | // Bail out for intrinsic stackrestore call because it can modify |
254 | // unescaped allocas. |
255 | if (auto *II = dyn_cast<IntrinsicInst>(Val: CI)) |
256 | if (II->getIntrinsicID() == Intrinsic::stackrestore) |
257 | continue; |
258 | |
259 | // Special-case operand bundles "clang.arc.attachedcall", "ptrauth", and |
260 | // "kcfi". |
261 | bool IsNoTail = CI->isNoTailCall() || |
262 | CI->hasOperandBundlesOtherThan( |
263 | IDs: {LLVMContext::OB_clang_arc_attachedcall, |
264 | LLVMContext::OB_ptrauth, LLVMContext::OB_kcfi}); |
265 | |
266 | if (!IsNoTail && CI->doesNotAccessMemory()) { |
267 | // A call to a readnone function whose arguments are all things computed |
268 | // outside this function can be marked tail. Even if you stored the |
269 | // alloca address into a global, a readnone function can't load the |
270 | // global anyhow. |
271 | // |
272 | // Note that this runs whether we know an alloca has escaped or not. If |
273 | // it has, then we can't trust Tracker.AllocaUsers to be accurate. |
274 | bool SafeToTail = true; |
275 | for (auto &Arg : CI->args()) { |
276 | if (isa<Constant>(Val: Arg.getUser())) |
277 | continue; |
278 | if (Argument *A = dyn_cast<Argument>(Val: Arg.getUser())) |
279 | if (!A->hasByValAttr()) |
280 | continue; |
281 | SafeToTail = false; |
282 | break; |
283 | } |
284 | if (SafeToTail) { |
285 | using namespace ore; |
286 | ORE->emit(RemarkBuilder: [&]() { |
287 | return OptimizationRemark(DEBUG_TYPE, "tailcall-readnone" , CI) |
288 | << "marked as tail call candidate (readnone)" ; |
289 | }); |
290 | CI->setTailCall(); |
291 | Modified = true; |
292 | continue; |
293 | } |
294 | } |
295 | |
296 | if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(Ptr: CI)) |
297 | DeferredTails.push_back(Elt: CI); |
298 | } |
299 | |
300 | for (auto *SuccBB : successors(BB)) { |
301 | auto &State = Visited[SuccBB]; |
302 | if (State < Escaped) { |
303 | State = Escaped; |
304 | if (State == ESCAPED) |
305 | WorklistEscaped.push_back(Elt: SuccBB); |
306 | else |
307 | WorklistUnescaped.push_back(Elt: SuccBB); |
308 | } |
309 | } |
310 | |
311 | if (!WorklistEscaped.empty()) { |
312 | BB = WorklistEscaped.pop_back_val(); |
313 | Escaped = ESCAPED; |
314 | } else { |
315 | BB = nullptr; |
316 | while (!WorklistUnescaped.empty()) { |
317 | auto *NextBB = WorklistUnescaped.pop_back_val(); |
318 | if (Visited[NextBB] == UNESCAPED) { |
319 | BB = NextBB; |
320 | Escaped = UNESCAPED; |
321 | break; |
322 | } |
323 | } |
324 | } |
325 | } while (BB); |
326 | |
327 | for (CallInst *CI : DeferredTails) { |
328 | if (Visited[CI->getParent()] != ESCAPED) { |
329 | // If the escape point was part way through the block, calls after the |
330 | // escape point wouldn't have been put into DeferredTails. |
331 | LLVM_DEBUG(dbgs() << "Marked as tail call candidate: " << *CI << "\n" ); |
332 | CI->setTailCall(); |
333 | Modified = true; |
334 | } |
335 | } |
336 | |
337 | return Modified; |
338 | } |
339 | |
340 | /// Return true if it is safe to move the specified |
341 | /// instruction from after the call to before the call, assuming that all |
342 | /// instructions between the call and this instruction are movable. |
343 | /// |
344 | static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) { |
345 | if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) |
346 | if (II->getIntrinsicID() == Intrinsic::lifetime_end && |
347 | llvm::findAllocaForValue(V: II->getArgOperand(i: 1))) |
348 | return true; |
349 | |
350 | // FIXME: We can move load/store/call/free instructions above the call if the |
351 | // call does not mod/ref the memory location being processed. |
352 | if (I->mayHaveSideEffects()) // This also handles volatile loads. |
353 | return false; |
354 | |
355 | if (LoadInst *L = dyn_cast<LoadInst>(Val: I)) { |
356 | // Loads may always be moved above calls without side effects. |
357 | if (CI->mayHaveSideEffects()) { |
358 | // Non-volatile loads may be moved above a call with side effects if it |
359 | // does not write to memory and the load provably won't trap. |
360 | // Writes to memory only matter if they may alias the pointer |
361 | // being loaded from. |
362 | const DataLayout &DL = L->getDataLayout(); |
363 | if (isModSet(MRI: AA->getModRefInfo(I: CI, OptLoc: MemoryLocation::get(LI: L))) || |
364 | !isSafeToLoadUnconditionally(V: L->getPointerOperand(), Ty: L->getType(), |
365 | Alignment: L->getAlign(), DL, ScanFrom: L)) |
366 | return false; |
367 | } |
368 | } |
369 | |
370 | // Otherwise, if this is a side-effect free instruction, check to make sure |
371 | // that it does not use the return value of the call. If it doesn't use the |
372 | // return value of the call, it must only use things that are defined before |
373 | // the call, or movable instructions between the call and the instruction |
374 | // itself. |
375 | return !is_contained(Range: I->operands(), Element: CI); |
376 | } |
377 | |
378 | static bool canTransformAccumulatorRecursion(Instruction *I, CallInst *CI) { |
379 | if (!I->isAssociative() || !I->isCommutative()) |
380 | return false; |
381 | |
382 | assert(I->getNumOperands() >= 2 && |
383 | "Associative/commutative operations should have at least 2 args!" ); |
384 | |
385 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) { |
386 | // Accumulators must have an identity. |
387 | if (!ConstantExpr::getIntrinsicIdentity(II->getIntrinsicID(), Ty: I->getType())) |
388 | return false; |
389 | } |
390 | |
391 | // Exactly one operand should be the result of the call instruction. |
392 | if ((I->getOperand(i: 0) == CI && I->getOperand(i: 1) == CI) || |
393 | (I->getOperand(i: 0) != CI && I->getOperand(i: 1) != CI)) |
394 | return false; |
395 | |
396 | // The only user of this instruction we allow is a single return instruction. |
397 | if (!I->hasOneUse() || !isa<ReturnInst>(Val: I->user_back())) |
398 | return false; |
399 | |
400 | return true; |
401 | } |
402 | |
403 | namespace { |
404 | class TailRecursionEliminator { |
405 | Function &F; |
406 | const TargetTransformInfo *TTI; |
407 | AliasAnalysis *AA; |
408 | OptimizationRemarkEmitter *ORE; |
409 | DomTreeUpdater &DTU; |
410 | BlockFrequencyInfo *const BFI; |
411 | const uint64_t OrigEntryBBFreq; |
412 | const uint64_t OrigEntryCount; |
413 | |
414 | // The below are shared state we want to have available when eliminating any |
415 | // calls in the function. There values should be populated by |
416 | // createTailRecurseLoopHeader the first time we find a call we can eliminate. |
417 | BasicBlock * = nullptr; |
418 | SmallVector<PHINode *, 8> ArgumentPHIs; |
419 | |
420 | // PHI node to store our return value. |
421 | PHINode *RetPN = nullptr; |
422 | |
423 | // i1 PHI node to track if we have a valid return value stored in RetPN. |
424 | PHINode *RetKnownPN = nullptr; |
425 | |
426 | // Vector of select instructions we insereted. These selects use RetKnownPN |
427 | // to either propagate RetPN or select a new return value. |
428 | SmallVector<SelectInst *, 8> RetSelects; |
429 | |
430 | // The below are shared state needed when performing accumulator recursion. |
431 | // There values should be populated by insertAccumulator the first time we |
432 | // find an elimination that requires an accumulator. |
433 | |
434 | // PHI node to store our current accumulated value. |
435 | PHINode *AccPN = nullptr; |
436 | |
437 | // The instruction doing the accumulating. |
438 | Instruction *AccumulatorRecursionInstr = nullptr; |
439 | |
440 | (Function &F, const TargetTransformInfo *TTI, |
441 | AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, |
442 | DomTreeUpdater &DTU, BlockFrequencyInfo *BFI) |
443 | : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU), BFI(BFI), |
444 | OrigEntryBBFreq( |
445 | BFI ? BFI->getBlockFreq(BB: &F.getEntryBlock()).getFrequency() : 0U), |
446 | OrigEntryCount(F.getEntryCount() ? F.getEntryCount()->getCount() : 0) { |
447 | if (BFI) { |
448 | // The assert is meant as API documentation for the caller. |
449 | assert((OrigEntryCount != 0 && OrigEntryBBFreq != 0) && |
450 | "If a BFI was provided, the function should have both an entry " |
451 | "count that is non-zero and an entry basic block with a non-zero " |
452 | "frequency." ); |
453 | } |
454 | } |
455 | |
456 | CallInst *findTRECandidate(BasicBlock *BB); |
457 | |
458 | void createTailRecurseLoopHeader(CallInst *CI); |
459 | |
460 | void insertAccumulator(Instruction *AccRecInstr); |
461 | |
462 | bool eliminateCall(CallInst *CI); |
463 | |
464 | void cleanupAndFinalize(); |
465 | |
466 | bool processBlock(BasicBlock &BB); |
467 | |
468 | void copyByValueOperandIntoLocalTemp(CallInst *CI, int OpndIdx); |
469 | |
470 | void copyLocalTempOfByValueOperandIntoArguments(CallInst *CI, int OpndIdx); |
471 | |
472 | public: |
473 | static bool eliminate(Function &F, const TargetTransformInfo *TTI, |
474 | AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, |
475 | DomTreeUpdater &DTU, BlockFrequencyInfo *BFI); |
476 | }; |
477 | } // namespace |
478 | |
479 | CallInst *TailRecursionEliminator::findTRECandidate(BasicBlock *BB) { |
480 | Instruction *TI = BB->getTerminator(); |
481 | |
482 | if (&BB->front() == TI) // Make sure there is something before the terminator. |
483 | return nullptr; |
484 | |
485 | // Scan backwards from the return, checking to see if there is a tail call in |
486 | // this block. If so, set CI to it. |
487 | CallInst *CI = nullptr; |
488 | BasicBlock::iterator BBI(TI); |
489 | while (true) { |
490 | CI = dyn_cast<CallInst>(Val&: BBI); |
491 | if (CI && CI->getCalledFunction() == &F) |
492 | break; |
493 | |
494 | if (BBI == BB->begin()) |
495 | return nullptr; // Didn't find a potential tail call. |
496 | --BBI; |
497 | } |
498 | |
499 | assert((!CI->isTailCall() || !CI->isNoTailCall()) && |
500 | "Incompatible call site attributes(Tail,NoTail)" ); |
501 | if (!CI->isTailCall()) |
502 | return nullptr; |
503 | |
504 | // As a special case, detect code like this: |
505 | // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call |
506 | // and disable this xform in this case, because the code generator will |
507 | // lower the call to fabs into inline code. |
508 | if (BB == &F.getEntryBlock() && &BB->front() == CI && |
509 | &*std::next(x: BB->begin()) == TI && CI->getCalledFunction() && |
510 | !TTI->isLoweredToCall(F: CI->getCalledFunction())) { |
511 | // A single-block function with just a call and a return. Check that |
512 | // the arguments match. |
513 | auto I = CI->arg_begin(), E = CI->arg_end(); |
514 | Function::arg_iterator FI = F.arg_begin(), FE = F.arg_end(); |
515 | for (; I != E && FI != FE; ++I, ++FI) |
516 | if (*I != &*FI) break; |
517 | if (I == E && FI == FE) |
518 | return nullptr; |
519 | } |
520 | |
521 | return CI; |
522 | } |
523 | |
524 | void TailRecursionEliminator::(CallInst *CI) { |
525 | HeaderBB = &F.getEntryBlock(); |
526 | BasicBlock *NewEntry = BasicBlock::Create(Context&: F.getContext(), Name: "" , Parent: &F, InsertBefore: HeaderBB); |
527 | NewEntry->takeName(V: HeaderBB); |
528 | HeaderBB->setName("tailrecurse" ); |
529 | auto *BI = BranchInst::Create(IfTrue: HeaderBB, InsertBefore: NewEntry); |
530 | BI->setDebugLoc(DebugLoc::getCompilerGenerated()); |
531 | // If the new branch preserves the debug location of CI, it could result in |
532 | // misleading stepping, if CI is located in a conditional branch. |
533 | // So, here we don't give any debug location to the new branch. |
534 | |
535 | // Move all fixed sized allocas from HeaderBB to NewEntry. |
536 | for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(), |
537 | NEBI = NewEntry->begin(); |
538 | OEBI != E;) |
539 | if (AllocaInst *AI = dyn_cast<AllocaInst>(Val: OEBI++)) |
540 | if (isa<ConstantInt>(Val: AI->getArraySize())) |
541 | AI->moveBefore(InsertPos: NEBI); |
542 | |
543 | // Now that we have created a new block, which jumps to the entry |
544 | // block, insert a PHI node for each argument of the function. |
545 | // For now, we initialize each PHI to only have the real arguments |
546 | // which are passed in. |
547 | BasicBlock::iterator InsertPos = HeaderBB->begin(); |
548 | for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { |
549 | PHINode *PN = PHINode::Create(Ty: I->getType(), NumReservedValues: 2, NameStr: I->getName() + ".tr" ); |
550 | PN->insertBefore(InsertPos); |
551 | I->replaceAllUsesWith(V: PN); // Everyone use the PHI node now! |
552 | PN->addIncoming(V: &*I, BB: NewEntry); |
553 | ArgumentPHIs.push_back(Elt: PN); |
554 | } |
555 | |
556 | // If the function doen't return void, create the RetPN and RetKnownPN PHI |
557 | // nodes to track our return value. We initialize RetPN with poison and |
558 | // RetKnownPN with false since we can't know our return value at function |
559 | // entry. |
560 | Type *RetType = F.getReturnType(); |
561 | if (!RetType->isVoidTy()) { |
562 | Type *BoolType = Type::getInt1Ty(C&: F.getContext()); |
563 | RetPN = PHINode::Create(Ty: RetType, NumReservedValues: 2, NameStr: "ret.tr" ); |
564 | RetPN->insertBefore(InsertPos); |
565 | RetKnownPN = PHINode::Create(Ty: BoolType, NumReservedValues: 2, NameStr: "ret.known.tr" ); |
566 | RetKnownPN->insertBefore(InsertPos); |
567 | |
568 | RetPN->addIncoming(V: PoisonValue::get(T: RetType), BB: NewEntry); |
569 | RetKnownPN->addIncoming(V: ConstantInt::getFalse(Ty: BoolType), BB: NewEntry); |
570 | } |
571 | |
572 | // The entry block was changed from HeaderBB to NewEntry. |
573 | // The forward DominatorTree needs to be recalculated when the EntryBB is |
574 | // changed. In this corner-case we recalculate the entire tree. |
575 | DTU.recalculate(F&: *NewEntry->getParent()); |
576 | } |
577 | |
578 | void TailRecursionEliminator::insertAccumulator(Instruction *AccRecInstr) { |
579 | assert(!AccPN && "Trying to insert multiple accumulators" ); |
580 | |
581 | AccumulatorRecursionInstr = AccRecInstr; |
582 | |
583 | // Start by inserting a new PHI node for the accumulator. |
584 | pred_iterator PB = pred_begin(BB: HeaderBB), PE = pred_end(BB: HeaderBB); |
585 | AccPN = PHINode::Create(Ty: F.getReturnType(), NumReservedValues: std::distance(first: PB, last: PE) + 1, |
586 | NameStr: "accumulator.tr" ); |
587 | AccPN->insertBefore(InsertPos: HeaderBB->begin()); |
588 | |
589 | // Loop over all of the predecessors of the tail recursion block. For the |
590 | // real entry into the function we seed the PHI with the identity constant for |
591 | // the accumulation operation. For any other existing branches to this block |
592 | // (due to other tail recursions eliminated) the accumulator is not modified. |
593 | // Because we haven't added the branch in the current block to HeaderBB yet, |
594 | // it will not show up as a predecessor. |
595 | for (pred_iterator PI = PB; PI != PE; ++PI) { |
596 | BasicBlock *P = *PI; |
597 | if (P == &F.getEntryBlock()) { |
598 | Constant *Identity = |
599 | ConstantExpr::getIdentity(I: AccRecInstr, Ty: AccRecInstr->getType()); |
600 | AccPN->addIncoming(V: Identity, BB: P); |
601 | } else { |
602 | AccPN->addIncoming(V: AccPN, BB: P); |
603 | } |
604 | } |
605 | |
606 | ++NumAccumAdded; |
607 | } |
608 | |
609 | // Creates a copy of contents of ByValue operand of the specified |
610 | // call instruction into the newly created temporarily variable. |
611 | void TailRecursionEliminator::copyByValueOperandIntoLocalTemp(CallInst *CI, |
612 | int OpndIdx) { |
613 | Type *AggTy = CI->getParamByValType(ArgNo: OpndIdx); |
614 | assert(AggTy); |
615 | const DataLayout &DL = F.getDataLayout(); |
616 | |
617 | // Get alignment of byVal operand. |
618 | Align Alignment(CI->getParamAlign(ArgNo: OpndIdx).valueOrOne()); |
619 | |
620 | // Create alloca for temporarily byval operands. |
621 | // Put alloca into the entry block. |
622 | Value *NewAlloca = new AllocaInst( |
623 | AggTy, DL.getAllocaAddrSpace(), nullptr, Alignment, |
624 | CI->getArgOperand(i: OpndIdx)->getName(), F.getEntryBlock().begin()); |
625 | |
626 | IRBuilder<> Builder(CI); |
627 | Value *Size = Builder.getInt64(C: DL.getTypeAllocSize(Ty: AggTy)); |
628 | |
629 | // Copy data from byvalue operand into the temporarily variable. |
630 | Builder.CreateMemCpy(Dst: NewAlloca, /*DstAlign*/ Alignment, |
631 | Src: CI->getArgOperand(i: OpndIdx), |
632 | /*SrcAlign*/ Alignment, Size); |
633 | CI->setArgOperand(i: OpndIdx, v: NewAlloca); |
634 | } |
635 | |
636 | // Creates a copy from temporarily variable(keeping value of ByVal argument) |
637 | // into the corresponding function argument location. |
638 | void TailRecursionEliminator::copyLocalTempOfByValueOperandIntoArguments( |
639 | CallInst *CI, int OpndIdx) { |
640 | Type *AggTy = CI->getParamByValType(ArgNo: OpndIdx); |
641 | assert(AggTy); |
642 | const DataLayout &DL = F.getDataLayout(); |
643 | |
644 | // Get alignment of byVal operand. |
645 | Align Alignment(CI->getParamAlign(ArgNo: OpndIdx).valueOrOne()); |
646 | |
647 | IRBuilder<> Builder(CI); |
648 | Value *Size = Builder.getInt64(C: DL.getTypeAllocSize(Ty: AggTy)); |
649 | |
650 | // Copy data from the temporarily variable into corresponding |
651 | // function argument location. |
652 | Builder.CreateMemCpy(Dst: F.getArg(i: OpndIdx), /*DstAlign*/ Alignment, |
653 | Src: CI->getArgOperand(i: OpndIdx), |
654 | /*SrcAlign*/ Alignment, Size); |
655 | } |
656 | |
657 | bool TailRecursionEliminator::eliminateCall(CallInst *CI) { |
658 | ReturnInst *Ret = cast<ReturnInst>(Val: CI->getParent()->getTerminator()); |
659 | |
660 | // Ok, we found a potential tail call. We can currently only transform the |
661 | // tail call if all of the instructions between the call and the return are |
662 | // movable to above the call itself, leaving the call next to the return. |
663 | // Check that this is the case now. |
664 | Instruction *AccRecInstr = nullptr; |
665 | BasicBlock::iterator BBI(CI); |
666 | for (++BBI; &*BBI != Ret; ++BBI) { |
667 | if (canMoveAboveCall(I: &*BBI, CI, AA)) |
668 | continue; |
669 | |
670 | // If we can't move the instruction above the call, it might be because it |
671 | // is an associative and commutative operation that could be transformed |
672 | // using accumulator recursion elimination. Check to see if this is the |
673 | // case, and if so, remember which instruction accumulates for later. |
674 | if (AccPN || !canTransformAccumulatorRecursion(I: &*BBI, CI)) |
675 | return false; // We cannot eliminate the tail recursion! |
676 | |
677 | // Yes, this is accumulator recursion. Remember which instruction |
678 | // accumulates. |
679 | AccRecInstr = &*BBI; |
680 | } |
681 | |
682 | BasicBlock *BB = Ret->getParent(); |
683 | |
684 | using namespace ore; |
685 | ORE->emit(RemarkBuilder: [&]() { |
686 | return OptimizationRemark(DEBUG_TYPE, "tailcall-recursion" , CI) |
687 | << "transforming tail recursion into loop" ; |
688 | }); |
689 | |
690 | // OK! We can transform this tail call. If this is the first one found, |
691 | // create the new entry block, allowing us to branch back to the old entry. |
692 | if (!HeaderBB) |
693 | createTailRecurseLoopHeader(CI); |
694 | |
695 | // Copy values of ByVal operands into local temporarily variables. |
696 | for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) { |
697 | if (CI->isByValArgument(ArgNo: I)) |
698 | copyByValueOperandIntoLocalTemp(CI, OpndIdx: I); |
699 | } |
700 | |
701 | // Ok, now that we know we have a pseudo-entry block WITH all of the |
702 | // required PHI nodes, add entries into the PHI node for the actual |
703 | // parameters passed into the tail-recursive call. |
704 | for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) { |
705 | if (CI->isByValArgument(ArgNo: I)) { |
706 | copyLocalTempOfByValueOperandIntoArguments(CI, OpndIdx: I); |
707 | // When eliminating a tail call, we modify the values of the arguments. |
708 | // Therefore, if the byval parameter has a readonly attribute, we have to |
709 | // remove it. It is safe because, from the perspective of a caller, the |
710 | // byval parameter is always treated as "readonly," even if the readonly |
711 | // attribute is removed. |
712 | F.removeParamAttr(ArgNo: I, Kind: Attribute::ReadOnly); |
713 | ArgumentPHIs[I]->addIncoming(V: F.getArg(i: I), BB); |
714 | } else |
715 | ArgumentPHIs[I]->addIncoming(V: CI->getArgOperand(i: I), BB); |
716 | } |
717 | |
718 | if (AccRecInstr) { |
719 | insertAccumulator(AccRecInstr); |
720 | |
721 | // Rewrite the accumulator recursion instruction so that it does not use |
722 | // the result of the call anymore, instead, use the PHI node we just |
723 | // inserted. |
724 | AccRecInstr->setOperand(i: AccRecInstr->getOperand(i: 0) != CI, Val: AccPN); |
725 | } |
726 | |
727 | // Update our return value tracking |
728 | if (RetPN) { |
729 | if (Ret->getReturnValue() == CI || AccRecInstr) { |
730 | // Defer selecting a return value |
731 | RetPN->addIncoming(V: RetPN, BB); |
732 | RetKnownPN->addIncoming(V: RetKnownPN, BB); |
733 | } else { |
734 | // We found a return value we want to use, insert a select instruction to |
735 | // select it if we don't already know what our return value will be and |
736 | // store the result in our return value PHI node. |
737 | SelectInst *SI = |
738 | SelectInst::Create(C: RetKnownPN, S1: RetPN, S2: Ret->getReturnValue(), |
739 | NameStr: "current.ret.tr" , InsertBefore: Ret->getIterator()); |
740 | SI->setDebugLoc(Ret->getDebugLoc()); |
741 | RetSelects.push_back(Elt: SI); |
742 | |
743 | RetPN->addIncoming(V: SI, BB); |
744 | RetKnownPN->addIncoming(V: ConstantInt::getTrue(Ty: RetKnownPN->getType()), BB); |
745 | } |
746 | |
747 | if (AccPN) |
748 | AccPN->addIncoming(V: AccRecInstr ? AccRecInstr : AccPN, BB); |
749 | } |
750 | |
751 | // Now that all of the PHI nodes are in place, remove the call and |
752 | // ret instructions, replacing them with an unconditional branch. |
753 | BranchInst *NewBI = BranchInst::Create(IfTrue: HeaderBB, InsertBefore: Ret->getIterator()); |
754 | NewBI->setDebugLoc(CI->getDebugLoc()); |
755 | |
756 | Ret->eraseFromParent(); // Remove return. |
757 | CI->eraseFromParent(); // Remove call. |
758 | DTU.applyUpdates(Updates: {{DominatorTree::Insert, BB, HeaderBB}}); |
759 | ++NumEliminated; |
760 | if (OrigEntryBBFreq) { |
761 | assert(F.getEntryCount().has_value()); |
762 | // This pass is not expected to remove BBs, only add an entry BB. For that |
763 | // reason, and because the BB here isn't the new entry BB, the BFI lookup is |
764 | // expected to succeed. |
765 | assert(&F.getEntryBlock() != BB); |
766 | auto RelativeBBFreq = |
767 | static_cast<double>(BFI->getBlockFreq(BB).getFrequency()) / |
768 | static_cast<double>(OrigEntryBBFreq); |
769 | auto ToSubtract = |
770 | static_cast<uint64_t>(std::round(x: RelativeBBFreq * OrigEntryCount)); |
771 | auto OldEntryCount = F.getEntryCount()->getCount(); |
772 | if (OldEntryCount <= ToSubtract) { |
773 | LLVM_DEBUG( |
774 | errs() << "[TRE] The entrycount attributable to the recursive call, " |
775 | << ToSubtract |
776 | << ", should be strictly lower than the function entry count, " |
777 | << OldEntryCount << "\n" ); |
778 | } else { |
779 | F.setEntryCount(Count: OldEntryCount - ToSubtract, Type: F.getEntryCount()->getType()); |
780 | } |
781 | } |
782 | return true; |
783 | } |
784 | |
785 | void TailRecursionEliminator::cleanupAndFinalize() { |
786 | // If we eliminated any tail recursions, it's possible that we inserted some |
787 | // silly PHI nodes which just merge an initial value (the incoming operand) |
788 | // with themselves. Check to see if we did and clean up our mess if so. This |
789 | // occurs when a function passes an argument straight through to its tail |
790 | // call. |
791 | for (PHINode *PN : ArgumentPHIs) { |
792 | // If the PHI Node is a dynamic constant, replace it with the value it is. |
793 | if (Value *PNV = simplifyInstruction(I: PN, Q: F.getDataLayout())) { |
794 | PN->replaceAllUsesWith(V: PNV); |
795 | PN->eraseFromParent(); |
796 | } |
797 | } |
798 | |
799 | if (RetPN) { |
800 | if (RetSelects.empty()) { |
801 | // If we didn't insert any select instructions, then we know we didn't |
802 | // store a return value and we can remove the PHI nodes we inserted. |
803 | RetPN->dropAllReferences(); |
804 | RetPN->eraseFromParent(); |
805 | |
806 | RetKnownPN->dropAllReferences(); |
807 | RetKnownPN->eraseFromParent(); |
808 | |
809 | if (AccPN) { |
810 | // We need to insert a copy of our accumulator instruction before any |
811 | // return in the function, and return its result instead. |
812 | Instruction *AccRecInstr = AccumulatorRecursionInstr; |
813 | for (BasicBlock &BB : F) { |
814 | ReturnInst *RI = dyn_cast<ReturnInst>(Val: BB.getTerminator()); |
815 | if (!RI) |
816 | continue; |
817 | |
818 | Instruction *AccRecInstrNew = AccRecInstr->clone(); |
819 | AccRecInstrNew->setName("accumulator.ret.tr" ); |
820 | AccRecInstrNew->setOperand(i: AccRecInstr->getOperand(i: 0) == AccPN, |
821 | Val: RI->getOperand(i_nocapture: 0)); |
822 | AccRecInstrNew->insertBefore(InsertPos: RI->getIterator()); |
823 | AccRecInstrNew->dropLocation(); |
824 | RI->setOperand(i_nocapture: 0, Val_nocapture: AccRecInstrNew); |
825 | } |
826 | } |
827 | } else { |
828 | // We need to insert a select instruction before any return left in the |
829 | // function to select our stored return value if we have one. |
830 | for (BasicBlock &BB : F) { |
831 | ReturnInst *RI = dyn_cast<ReturnInst>(Val: BB.getTerminator()); |
832 | if (!RI) |
833 | continue; |
834 | |
835 | SelectInst *SI = |
836 | SelectInst::Create(C: RetKnownPN, S1: RetPN, S2: RI->getOperand(i_nocapture: 0), |
837 | NameStr: "current.ret.tr" , InsertBefore: RI->getIterator()); |
838 | SI->setDebugLoc(DebugLoc::getCompilerGenerated()); |
839 | RetSelects.push_back(Elt: SI); |
840 | RI->setOperand(i_nocapture: 0, Val_nocapture: SI); |
841 | } |
842 | |
843 | if (AccPN) { |
844 | // We need to insert a copy of our accumulator instruction before any |
845 | // of the selects we inserted, and select its result instead. |
846 | Instruction *AccRecInstr = AccumulatorRecursionInstr; |
847 | for (SelectInst *SI : RetSelects) { |
848 | Instruction *AccRecInstrNew = AccRecInstr->clone(); |
849 | AccRecInstrNew->setName("accumulator.ret.tr" ); |
850 | AccRecInstrNew->setOperand(i: AccRecInstr->getOperand(i: 0) == AccPN, |
851 | Val: SI->getFalseValue()); |
852 | AccRecInstrNew->insertBefore(InsertPos: SI->getIterator()); |
853 | AccRecInstrNew->dropLocation(); |
854 | SI->setFalseValue(AccRecInstrNew); |
855 | } |
856 | } |
857 | } |
858 | } |
859 | } |
860 | |
861 | bool TailRecursionEliminator::processBlock(BasicBlock &BB) { |
862 | Instruction *TI = BB.getTerminator(); |
863 | |
864 | if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI)) { |
865 | if (BI->isConditional()) |
866 | return false; |
867 | |
868 | BasicBlock *Succ = BI->getSuccessor(i: 0); |
869 | ReturnInst *Ret = dyn_cast<ReturnInst>(Val: Succ->getFirstNonPHIOrDbg(SkipPseudoOp: true)); |
870 | |
871 | if (!Ret) |
872 | return false; |
873 | |
874 | CallInst *CI = findTRECandidate(BB: &BB); |
875 | |
876 | if (!CI) |
877 | return false; |
878 | |
879 | LLVM_DEBUG(dbgs() << "FOLDING: " << *Succ |
880 | << "INTO UNCOND BRANCH PRED: " << BB); |
881 | FoldReturnIntoUncondBranch(RI: Ret, BB: Succ, Pred: &BB, DTU: &DTU); |
882 | ++NumRetDuped; |
883 | |
884 | // If all predecessors of Succ have been eliminated by |
885 | // FoldReturnIntoUncondBranch, delete it. It is important to empty it, |
886 | // because the ret instruction in there is still using a value which |
887 | // eliminateCall will attempt to remove. This block can only contain |
888 | // instructions that can't have uses, therefore it is safe to remove. |
889 | if (pred_empty(BB: Succ)) |
890 | DTU.deleteBB(DelBB: Succ); |
891 | |
892 | eliminateCall(CI); |
893 | return true; |
894 | } else if (isa<ReturnInst>(Val: TI)) { |
895 | CallInst *CI = findTRECandidate(BB: &BB); |
896 | |
897 | if (CI) |
898 | return eliminateCall(CI); |
899 | } |
900 | |
901 | return false; |
902 | } |
903 | |
904 | bool TailRecursionEliminator::(Function &F, |
905 | const TargetTransformInfo *TTI, |
906 | AliasAnalysis *AA, |
907 | OptimizationRemarkEmitter *ORE, |
908 | DomTreeUpdater &DTU, |
909 | BlockFrequencyInfo *BFI) { |
910 | if (F.getFnAttribute(Kind: "disable-tail-calls" ).getValueAsBool()) |
911 | return false; |
912 | |
913 | bool MadeChange = false; |
914 | MadeChange |= markTails(F, ORE); |
915 | |
916 | // If this function is a varargs function, we won't be able to PHI the args |
917 | // right, so don't even try to convert it... |
918 | if (F.getFunctionType()->isVarArg()) |
919 | return MadeChange; |
920 | |
921 | if (!canTRE(F)) |
922 | return MadeChange; |
923 | |
924 | // Change any tail recursive calls to loops. |
925 | TailRecursionEliminator TRE(F, TTI, AA, ORE, DTU, BFI); |
926 | |
927 | for (BasicBlock &BB : F) |
928 | MadeChange |= TRE.processBlock(BB); |
929 | |
930 | TRE.cleanupAndFinalize(); |
931 | |
932 | return MadeChange; |
933 | } |
934 | |
935 | namespace { |
936 | struct TailCallElim : public FunctionPass { |
937 | static char ID; // Pass identification, replacement for typeid |
938 | TailCallElim() : FunctionPass(ID) { |
939 | initializeTailCallElimPass(*PassRegistry::getPassRegistry()); |
940 | } |
941 | |
942 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
943 | AU.addRequired<TargetTransformInfoWrapperPass>(); |
944 | AU.addRequired<AAResultsWrapperPass>(); |
945 | AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); |
946 | AU.addPreserved<GlobalsAAWrapperPass>(); |
947 | AU.addPreserved<DominatorTreeWrapperPass>(); |
948 | AU.addPreserved<PostDominatorTreeWrapperPass>(); |
949 | } |
950 | |
951 | bool runOnFunction(Function &F) override { |
952 | if (skipFunction(F)) |
953 | return false; |
954 | |
955 | auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); |
956 | auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; |
957 | auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>(); |
958 | auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr; |
959 | // There is no noticable performance difference here between Lazy and Eager |
960 | // UpdateStrategy based on some test results. It is feasible to switch the |
961 | // UpdateStrategy to Lazy if we find it profitable later. |
962 | DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Eager); |
963 | |
964 | return TailRecursionEliminator::eliminate( |
965 | F, TTI: &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F), |
966 | AA: &getAnalysis<AAResultsWrapperPass>().getAAResults(), |
967 | ORE: &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(), DTU, |
968 | /*BFI=*/nullptr); |
969 | } |
970 | }; |
971 | } |
972 | |
973 | char TailCallElim::ID = 0; |
974 | INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim" , "Tail Call Elimination" , |
975 | false, false) |
976 | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) |
977 | INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) |
978 | INITIALIZE_PASS_END(TailCallElim, "tailcallelim" , "Tail Call Elimination" , |
979 | false, false) |
980 | |
981 | // Public interface to the TailCallElimination pass |
982 | FunctionPass *llvm::createTailCallEliminationPass() { |
983 | return new TailCallElim(); |
984 | } |
985 | |
986 | PreservedAnalyses TailCallElimPass::run(Function &F, |
987 | FunctionAnalysisManager &AM) { |
988 | |
989 | TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(IR&: F); |
990 | AliasAnalysis &AA = AM.getResult<AAManager>(IR&: F); |
991 | // This must come first. It needs the 2 analyses, meaning, if it came after |
992 | // the lines asking for the cached result, should they be nullptr (which, in |
993 | // the case of the PDT, is likely), updates to the trees would be missed. |
994 | auto *BFI = (!ForceDisableBFI && UpdateFunctionEntryCount && |
995 | F.getEntryCount().has_value() && F.getEntryCount()->getCount()) |
996 | ? &AM.getResult<BlockFrequencyAnalysis>(IR&: F) |
997 | : nullptr; |
998 | auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F); |
999 | auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(IR&: F); |
1000 | auto *PDT = AM.getCachedResult<PostDominatorTreeAnalysis>(IR&: F); |
1001 | // There is no noticable performance difference here between Lazy and Eager |
1002 | // UpdateStrategy based on some test results. It is feasible to switch the |
1003 | // UpdateStrategy to Lazy if we find it profitable later. |
1004 | DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Eager); |
1005 | bool Changed = |
1006 | TailRecursionEliminator::eliminate(F, TTI: &TTI, AA: &AA, ORE: &ORE, DTU, BFI); |
1007 | |
1008 | if (!Changed) |
1009 | return PreservedAnalyses::all(); |
1010 | PreservedAnalyses PA; |
1011 | PA.preserve<DominatorTreeAnalysis>(); |
1012 | PA.preserve<PostDominatorTreeAnalysis>(); |
1013 | return PA; |
1014 | } |
1015 | |