| 1 | //===- TailRecursionElimination.cpp - Eliminate Tail Calls ----------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file transforms calls of the current function (self recursion) followed |
| 10 | // by a return instruction with a branch to the entry of the function, creating |
| 11 | // a loop. This pass also implements the following extensions to the basic |
| 12 | // algorithm: |
| 13 | // |
| 14 | // 1. Trivial instructions between the call and return do not prevent the |
| 15 | // transformation from taking place, though currently the analysis cannot |
| 16 | // support moving any really useful instructions (only dead ones). |
| 17 | // 2. This pass transforms functions that are prevented from being tail |
| 18 | // recursive by an associative and commutative expression to use an |
| 19 | // accumulator variable, thus compiling the typical naive factorial or |
| 20 | // 'fib' implementation into efficient code. |
| 21 | // 3. TRE is performed if the function returns void, if the return |
| 22 | // returns the result returned by the call, or if the function returns a |
| 23 | // run-time constant on all exits from the function. It is possible, though |
| 24 | // unlikely, that the return returns something else (like constant 0), and |
| 25 | // can still be TRE'd. It can be TRE'd if ALL OTHER return instructions in |
| 26 | // the function return the exact same value. |
| 27 | // 4. If it can prove that callees do not access their caller stack frame, |
| 28 | // they are marked as eligible for tail call elimination (by the code |
| 29 | // generator). |
| 30 | // |
| 31 | // There are several improvements that could be made: |
| 32 | // |
| 33 | // 1. If the function has any alloca instructions, these instructions will be |
| 34 | // moved out of the entry block of the function, causing them to be |
| 35 | // evaluated each time through the tail recursion. Safely keeping allocas |
| 36 | // in the entry block requires analysis to proves that the tail-called |
| 37 | // function does not read or write the stack object. |
| 38 | // 2. Tail recursion is only performed if the call immediately precedes the |
| 39 | // return instruction. It's possible that there could be a jump between |
| 40 | // the call and the return. |
| 41 | // 3. There can be intervening operations between the call and the return that |
| 42 | // prevent the TRE from occurring. For example, there could be GEP's and |
| 43 | // stores to memory that will not be read or written by the call. This |
| 44 | // requires some substantial analysis (such as with DSA) to prove safe to |
| 45 | // move ahead of the call, but doing so could allow many more TREs to be |
| 46 | // performed, for example in TreeAdd/TreeAlloc from the treeadd benchmark. |
| 47 | // 4. The algorithm we use to detect if callees access their caller stack |
| 48 | // frames is very primitive. |
| 49 | // |
| 50 | //===----------------------------------------------------------------------===// |
| 51 | |
| 52 | #include "llvm/Transforms/Scalar/TailRecursionElimination.h" |
| 53 | #include "llvm/ADT/STLExtras.h" |
| 54 | #include "llvm/ADT/SmallPtrSet.h" |
| 55 | #include "llvm/ADT/Statistic.h" |
| 56 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
| 57 | #include "llvm/Analysis/DomTreeUpdater.h" |
| 58 | #include "llvm/Analysis/GlobalsModRef.h" |
| 59 | #include "llvm/Analysis/InstructionSimplify.h" |
| 60 | #include "llvm/Analysis/Loads.h" |
| 61 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
| 62 | #include "llvm/Analysis/PostDominators.h" |
| 63 | #include "llvm/Analysis/TargetTransformInfo.h" |
| 64 | #include "llvm/Analysis/ValueTracking.h" |
| 65 | #include "llvm/IR/CFG.h" |
| 66 | #include "llvm/IR/Constants.h" |
| 67 | #include "llvm/IR/DataLayout.h" |
| 68 | #include "llvm/IR/DerivedTypes.h" |
| 69 | #include "llvm/IR/DiagnosticInfo.h" |
| 70 | #include "llvm/IR/Dominators.h" |
| 71 | #include "llvm/IR/Function.h" |
| 72 | #include "llvm/IR/IRBuilder.h" |
| 73 | #include "llvm/IR/InstIterator.h" |
| 74 | #include "llvm/IR/Instructions.h" |
| 75 | #include "llvm/IR/IntrinsicInst.h" |
| 76 | #include "llvm/IR/Module.h" |
| 77 | #include "llvm/InitializePasses.h" |
| 78 | #include "llvm/Pass.h" |
| 79 | #include "llvm/Support/CommandLine.h" |
| 80 | #include "llvm/Support/Debug.h" |
| 81 | #include "llvm/Support/raw_ostream.h" |
| 82 | #include "llvm/Transforms/Scalar.h" |
| 83 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| 84 | #include <cmath> |
| 85 | using namespace llvm; |
| 86 | |
| 87 | #define DEBUG_TYPE "tailcallelim" |
| 88 | |
| 89 | STATISTIC(NumEliminated, "Number of tail calls removed" ); |
| 90 | STATISTIC(NumRetDuped, "Number of return duplicated" ); |
| 91 | STATISTIC(NumAccumAdded, "Number of accumulators introduced" ); |
| 92 | |
| 93 | static cl::opt<bool> ForceDisableBFI( |
| 94 | "tre-disable-entrycount-recompute" , cl::init(Val: false), cl::Hidden, |
| 95 | cl::desc("Force disabling recomputing of function entry count, on " |
| 96 | "successful tail recursion elimination." )); |
| 97 | |
| 98 | /// Scan the specified function for alloca instructions. |
| 99 | /// If it contains any dynamic allocas, returns false. |
| 100 | static bool canTRE(Function &F) { |
| 101 | // TODO: We don't do TRE if dynamic allocas are used. |
| 102 | // Dynamic allocas allocate stack space which should be |
| 103 | // deallocated before new iteration started. That is |
| 104 | // currently not implemented. |
| 105 | return llvm::all_of(Range: instructions(F), P: [](Instruction &I) { |
| 106 | auto *AI = dyn_cast<AllocaInst>(Val: &I); |
| 107 | return !AI || AI->isStaticAlloca(); |
| 108 | }); |
| 109 | } |
| 110 | |
| 111 | namespace { |
| 112 | struct AllocaDerivedValueTracker { |
| 113 | // Start at a root value and walk its use-def chain to mark calls that use the |
| 114 | // value or a derived value in AllocaUsers, and places where it may escape in |
| 115 | // EscapePoints. |
| 116 | void walk(Value *Root) { |
| 117 | SmallVector<Use *, 32> Worklist; |
| 118 | SmallPtrSet<Use *, 32> Visited; |
| 119 | |
| 120 | auto AddUsesToWorklist = [&](Value *V) { |
| 121 | for (auto &U : V->uses()) { |
| 122 | if (!Visited.insert(Ptr: &U).second) |
| 123 | continue; |
| 124 | Worklist.push_back(Elt: &U); |
| 125 | } |
| 126 | }; |
| 127 | |
| 128 | AddUsesToWorklist(Root); |
| 129 | |
| 130 | while (!Worklist.empty()) { |
| 131 | Use *U = Worklist.pop_back_val(); |
| 132 | Instruction *I = cast<Instruction>(Val: U->getUser()); |
| 133 | |
| 134 | switch (I->getOpcode()) { |
| 135 | case Instruction::Call: |
| 136 | case Instruction::Invoke: { |
| 137 | auto &CB = cast<CallBase>(Val&: *I); |
| 138 | // If the alloca-derived argument is passed byval it is not an escape |
| 139 | // point, or a use of an alloca. Calling with byval copies the contents |
| 140 | // of the alloca into argument registers or stack slots, which exist |
| 141 | // beyond the lifetime of the current frame. |
| 142 | if (CB.isArgOperand(U) && CB.isByValArgument(ArgNo: CB.getArgOperandNo(U))) |
| 143 | continue; |
| 144 | bool IsNocapture = |
| 145 | CB.isDataOperand(U) && CB.doesNotCapture(OpNo: CB.getDataOperandNo(U)); |
| 146 | callUsesLocalStack(CB, IsNocapture); |
| 147 | if (IsNocapture) { |
| 148 | // If the alloca-derived argument is passed in as nocapture, then it |
| 149 | // can't propagate to the call's return. That would be capturing. |
| 150 | continue; |
| 151 | } |
| 152 | break; |
| 153 | } |
| 154 | case Instruction::Load: { |
| 155 | // The result of a load is not alloca-derived (unless an alloca has |
| 156 | // otherwise escaped, but this is a local analysis). |
| 157 | continue; |
| 158 | } |
| 159 | case Instruction::Store: { |
| 160 | if (U->getOperandNo() == 0) |
| 161 | EscapePoints.insert(Ptr: I); |
| 162 | continue; // Stores have no users to analyze. |
| 163 | } |
| 164 | case Instruction::BitCast: |
| 165 | case Instruction::GetElementPtr: |
| 166 | case Instruction::PHI: |
| 167 | case Instruction::Select: |
| 168 | case Instruction::AddrSpaceCast: |
| 169 | break; |
| 170 | default: |
| 171 | EscapePoints.insert(Ptr: I); |
| 172 | break; |
| 173 | } |
| 174 | |
| 175 | AddUsesToWorklist(I); |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | void callUsesLocalStack(CallBase &CB, bool IsNocapture) { |
| 180 | // Add it to the list of alloca users. |
| 181 | AllocaUsers.insert(Ptr: &CB); |
| 182 | |
| 183 | // If it's nocapture then it can't capture this alloca. |
| 184 | if (IsNocapture) |
| 185 | return; |
| 186 | |
| 187 | // If it can write to memory, it can leak the alloca value. |
| 188 | if (!CB.onlyReadsMemory()) |
| 189 | EscapePoints.insert(Ptr: &CB); |
| 190 | } |
| 191 | |
| 192 | SmallPtrSet<Instruction *, 32> AllocaUsers; |
| 193 | SmallPtrSet<Instruction *, 32> EscapePoints; |
| 194 | }; |
| 195 | } |
| 196 | |
| 197 | static bool (Function &F, OptimizationRemarkEmitter *ORE) { |
| 198 | if (F.callsFunctionThatReturnsTwice()) |
| 199 | return false; |
| 200 | |
| 201 | // The local stack holds all alloca instructions and all byval arguments. |
| 202 | AllocaDerivedValueTracker Tracker; |
| 203 | for (Argument &Arg : F.args()) { |
| 204 | if (Arg.hasByValAttr()) |
| 205 | Tracker.walk(Root: &Arg); |
| 206 | } |
| 207 | for (auto &BB : F) { |
| 208 | for (auto &I : BB) |
| 209 | if (AllocaInst *AI = dyn_cast<AllocaInst>(Val: &I)) |
| 210 | Tracker.walk(Root: AI); |
| 211 | } |
| 212 | |
| 213 | bool Modified = false; |
| 214 | |
| 215 | // Track whether a block is reachable after an alloca has escaped. Blocks that |
| 216 | // contain the escaping instruction will be marked as being visited without an |
| 217 | // escaped alloca, since that is how the block began. |
| 218 | enum VisitType { |
| 219 | UNVISITED, |
| 220 | UNESCAPED, |
| 221 | ESCAPED |
| 222 | }; |
| 223 | DenseMap<BasicBlock *, VisitType> Visited; |
| 224 | |
| 225 | // We propagate the fact that an alloca has escaped from block to successor. |
| 226 | // Visit the blocks that are propagating the escapedness first. To do this, we |
| 227 | // maintain two worklists. |
| 228 | SmallVector<BasicBlock *, 32> WorklistUnescaped, WorklistEscaped; |
| 229 | |
| 230 | // We may enter a block and visit it thinking that no alloca has escaped yet, |
| 231 | // then see an escape point and go back around a loop edge and come back to |
| 232 | // the same block twice. Because of this, we defer setting tail on calls when |
| 233 | // we first encounter them in a block. Every entry in this list does not |
| 234 | // statically use an alloca via use-def chain analysis, but may find an alloca |
| 235 | // through other means if the block turns out to be reachable after an escape |
| 236 | // point. |
| 237 | SmallVector<CallInst *, 32> DeferredTails; |
| 238 | |
| 239 | BasicBlock *BB = &F.getEntryBlock(); |
| 240 | VisitType Escaped = UNESCAPED; |
| 241 | do { |
| 242 | for (auto &I : *BB) { |
| 243 | if (Tracker.EscapePoints.count(Ptr: &I)) |
| 244 | Escaped = ESCAPED; |
| 245 | |
| 246 | CallInst *CI = dyn_cast<CallInst>(Val: &I); |
| 247 | // A PseudoProbeInst has the IntrInaccessibleMemOnly tag hence it is |
| 248 | // considered accessing memory and will be marked as a tail call if we |
| 249 | // don't bail out here. |
| 250 | if (!CI || CI->isTailCall() || isa<PseudoProbeInst>(Val: &I)) |
| 251 | continue; |
| 252 | |
| 253 | // Bail out for intrinsic stackrestore call because it can modify |
| 254 | // unescaped allocas. |
| 255 | if (auto *II = dyn_cast<IntrinsicInst>(Val: CI)) |
| 256 | if (II->getIntrinsicID() == Intrinsic::stackrestore) |
| 257 | continue; |
| 258 | |
| 259 | // Special-case operand bundles "clang.arc.attachedcall", "ptrauth", and |
| 260 | // "kcfi". |
| 261 | bool IsNoTail = CI->isNoTailCall() || |
| 262 | CI->hasOperandBundlesOtherThan( |
| 263 | IDs: {LLVMContext::OB_clang_arc_attachedcall, |
| 264 | LLVMContext::OB_ptrauth, LLVMContext::OB_kcfi}); |
| 265 | |
| 266 | if (!IsNoTail && CI->doesNotAccessMemory()) { |
| 267 | // A call to a readnone function whose arguments are all things computed |
| 268 | // outside this function can be marked tail. Even if you stored the |
| 269 | // alloca address into a global, a readnone function can't load the |
| 270 | // global anyhow. |
| 271 | // |
| 272 | // Note that this runs whether we know an alloca has escaped or not. If |
| 273 | // it has, then we can't trust Tracker.AllocaUsers to be accurate. |
| 274 | bool SafeToTail = true; |
| 275 | for (auto &Arg : CI->args()) { |
| 276 | if (isa<Constant>(Val: Arg.getUser())) |
| 277 | continue; |
| 278 | if (Argument *A = dyn_cast<Argument>(Val: Arg.getUser())) |
| 279 | if (!A->hasByValAttr()) |
| 280 | continue; |
| 281 | SafeToTail = false; |
| 282 | break; |
| 283 | } |
| 284 | if (SafeToTail) { |
| 285 | using namespace ore; |
| 286 | ORE->emit(RemarkBuilder: [&]() { |
| 287 | return OptimizationRemark(DEBUG_TYPE, "tailcall-readnone" , CI) |
| 288 | << "marked as tail call candidate (readnone)" ; |
| 289 | }); |
| 290 | CI->setTailCall(); |
| 291 | Modified = true; |
| 292 | continue; |
| 293 | } |
| 294 | } |
| 295 | |
| 296 | if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(Ptr: CI)) |
| 297 | DeferredTails.push_back(Elt: CI); |
| 298 | } |
| 299 | |
| 300 | for (auto *SuccBB : successors(BB)) { |
| 301 | auto &State = Visited[SuccBB]; |
| 302 | if (State < Escaped) { |
| 303 | State = Escaped; |
| 304 | if (State == ESCAPED) |
| 305 | WorklistEscaped.push_back(Elt: SuccBB); |
| 306 | else |
| 307 | WorklistUnescaped.push_back(Elt: SuccBB); |
| 308 | } |
| 309 | } |
| 310 | |
| 311 | if (!WorklistEscaped.empty()) { |
| 312 | BB = WorklistEscaped.pop_back_val(); |
| 313 | Escaped = ESCAPED; |
| 314 | } else { |
| 315 | BB = nullptr; |
| 316 | while (!WorklistUnescaped.empty()) { |
| 317 | auto *NextBB = WorklistUnescaped.pop_back_val(); |
| 318 | if (Visited[NextBB] == UNESCAPED) { |
| 319 | BB = NextBB; |
| 320 | Escaped = UNESCAPED; |
| 321 | break; |
| 322 | } |
| 323 | } |
| 324 | } |
| 325 | } while (BB); |
| 326 | |
| 327 | for (CallInst *CI : DeferredTails) { |
| 328 | if (Visited[CI->getParent()] != ESCAPED) { |
| 329 | // If the escape point was part way through the block, calls after the |
| 330 | // escape point wouldn't have been put into DeferredTails. |
| 331 | LLVM_DEBUG(dbgs() << "Marked as tail call candidate: " << *CI << "\n" ); |
| 332 | CI->setTailCall(); |
| 333 | Modified = true; |
| 334 | } |
| 335 | } |
| 336 | |
| 337 | return Modified; |
| 338 | } |
| 339 | |
| 340 | /// Return true if it is safe to move the specified |
| 341 | /// instruction from after the call to before the call, assuming that all |
| 342 | /// instructions between the call and this instruction are movable. |
| 343 | /// |
| 344 | static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) { |
| 345 | if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) |
| 346 | if (II->getIntrinsicID() == Intrinsic::lifetime_end && |
| 347 | llvm::findAllocaForValue(V: II->getArgOperand(i: 1))) |
| 348 | return true; |
| 349 | |
| 350 | // FIXME: We can move load/store/call/free instructions above the call if the |
| 351 | // call does not mod/ref the memory location being processed. |
| 352 | if (I->mayHaveSideEffects()) // This also handles volatile loads. |
| 353 | return false; |
| 354 | |
| 355 | if (LoadInst *L = dyn_cast<LoadInst>(Val: I)) { |
| 356 | // Loads may always be moved above calls without side effects. |
| 357 | if (CI->mayHaveSideEffects()) { |
| 358 | // Non-volatile loads may be moved above a call with side effects if it |
| 359 | // does not write to memory and the load provably won't trap. |
| 360 | // Writes to memory only matter if they may alias the pointer |
| 361 | // being loaded from. |
| 362 | const DataLayout &DL = L->getDataLayout(); |
| 363 | if (isModSet(MRI: AA->getModRefInfo(I: CI, OptLoc: MemoryLocation::get(LI: L))) || |
| 364 | !isSafeToLoadUnconditionally(V: L->getPointerOperand(), Ty: L->getType(), |
| 365 | Alignment: L->getAlign(), DL, ScanFrom: L)) |
| 366 | return false; |
| 367 | } |
| 368 | } |
| 369 | |
| 370 | // Otherwise, if this is a side-effect free instruction, check to make sure |
| 371 | // that it does not use the return value of the call. If it doesn't use the |
| 372 | // return value of the call, it must only use things that are defined before |
| 373 | // the call, or movable instructions between the call and the instruction |
| 374 | // itself. |
| 375 | return !is_contained(Range: I->operands(), Element: CI); |
| 376 | } |
| 377 | |
| 378 | static bool canTransformAccumulatorRecursion(Instruction *I, CallInst *CI) { |
| 379 | if (!I->isAssociative() || !I->isCommutative()) |
| 380 | return false; |
| 381 | |
| 382 | assert(I->getNumOperands() >= 2 && |
| 383 | "Associative/commutative operations should have at least 2 args!" ); |
| 384 | |
| 385 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) { |
| 386 | // Accumulators must have an identity. |
| 387 | if (!ConstantExpr::getIntrinsicIdentity(II->getIntrinsicID(), Ty: I->getType())) |
| 388 | return false; |
| 389 | } |
| 390 | |
| 391 | // Exactly one operand should be the result of the call instruction. |
| 392 | if ((I->getOperand(i: 0) == CI && I->getOperand(i: 1) == CI) || |
| 393 | (I->getOperand(i: 0) != CI && I->getOperand(i: 1) != CI)) |
| 394 | return false; |
| 395 | |
| 396 | // The only user of this instruction we allow is a single return instruction. |
| 397 | if (!I->hasOneUse() || !isa<ReturnInst>(Val: I->user_back())) |
| 398 | return false; |
| 399 | |
| 400 | return true; |
| 401 | } |
| 402 | |
| 403 | namespace { |
| 404 | class TailRecursionEliminator { |
| 405 | Function &F; |
| 406 | const TargetTransformInfo *TTI; |
| 407 | AliasAnalysis *AA; |
| 408 | OptimizationRemarkEmitter *ORE; |
| 409 | DomTreeUpdater &DTU; |
| 410 | BlockFrequencyInfo *const BFI; |
| 411 | const uint64_t OrigEntryBBFreq; |
| 412 | const uint64_t OrigEntryCount; |
| 413 | |
| 414 | // The below are shared state we want to have available when eliminating any |
| 415 | // calls in the function. There values should be populated by |
| 416 | // createTailRecurseLoopHeader the first time we find a call we can eliminate. |
| 417 | BasicBlock * = nullptr; |
| 418 | SmallVector<PHINode *, 8> ArgumentPHIs; |
| 419 | |
| 420 | // PHI node to store our return value. |
| 421 | PHINode *RetPN = nullptr; |
| 422 | |
| 423 | // i1 PHI node to track if we have a valid return value stored in RetPN. |
| 424 | PHINode *RetKnownPN = nullptr; |
| 425 | |
| 426 | // Vector of select instructions we insereted. These selects use RetKnownPN |
| 427 | // to either propagate RetPN or select a new return value. |
| 428 | SmallVector<SelectInst *, 8> RetSelects; |
| 429 | |
| 430 | // The below are shared state needed when performing accumulator recursion. |
| 431 | // There values should be populated by insertAccumulator the first time we |
| 432 | // find an elimination that requires an accumulator. |
| 433 | |
| 434 | // PHI node to store our current accumulated value. |
| 435 | PHINode *AccPN = nullptr; |
| 436 | |
| 437 | // The instruction doing the accumulating. |
| 438 | Instruction *AccumulatorRecursionInstr = nullptr; |
| 439 | |
| 440 | (Function &F, const TargetTransformInfo *TTI, |
| 441 | AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, |
| 442 | DomTreeUpdater &DTU, BlockFrequencyInfo *BFI) |
| 443 | : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU), BFI(BFI), |
| 444 | OrigEntryBBFreq( |
| 445 | BFI ? BFI->getBlockFreq(BB: &F.getEntryBlock()).getFrequency() : 0U), |
| 446 | OrigEntryCount(F.getEntryCount() ? F.getEntryCount()->getCount() : 0) { |
| 447 | if (BFI) { |
| 448 | // The assert is meant as API documentation for the caller. |
| 449 | assert((OrigEntryCount != 0 && OrigEntryBBFreq != 0) && |
| 450 | "If a BFI was provided, the function should have both an entry " |
| 451 | "count that is non-zero and an entry basic block with a non-zero " |
| 452 | "frequency." ); |
| 453 | } |
| 454 | } |
| 455 | |
| 456 | CallInst *findTRECandidate(BasicBlock *BB); |
| 457 | |
| 458 | void createTailRecurseLoopHeader(CallInst *CI); |
| 459 | |
| 460 | void insertAccumulator(Instruction *AccRecInstr); |
| 461 | |
| 462 | bool eliminateCall(CallInst *CI); |
| 463 | |
| 464 | void cleanupAndFinalize(); |
| 465 | |
| 466 | bool processBlock(BasicBlock &BB); |
| 467 | |
| 468 | void copyByValueOperandIntoLocalTemp(CallInst *CI, int OpndIdx); |
| 469 | |
| 470 | void copyLocalTempOfByValueOperandIntoArguments(CallInst *CI, int OpndIdx); |
| 471 | |
| 472 | public: |
| 473 | static bool eliminate(Function &F, const TargetTransformInfo *TTI, |
| 474 | AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, |
| 475 | DomTreeUpdater &DTU, BlockFrequencyInfo *BFI); |
| 476 | }; |
| 477 | } // namespace |
| 478 | |
| 479 | CallInst *TailRecursionEliminator::findTRECandidate(BasicBlock *BB) { |
| 480 | Instruction *TI = BB->getTerminator(); |
| 481 | |
| 482 | if (&BB->front() == TI) // Make sure there is something before the terminator. |
| 483 | return nullptr; |
| 484 | |
| 485 | // Scan backwards from the return, checking to see if there is a tail call in |
| 486 | // this block. If so, set CI to it. |
| 487 | CallInst *CI = nullptr; |
| 488 | BasicBlock::iterator BBI(TI); |
| 489 | while (true) { |
| 490 | CI = dyn_cast<CallInst>(Val&: BBI); |
| 491 | if (CI && CI->getCalledFunction() == &F) |
| 492 | break; |
| 493 | |
| 494 | if (BBI == BB->begin()) |
| 495 | return nullptr; // Didn't find a potential tail call. |
| 496 | --BBI; |
| 497 | } |
| 498 | |
| 499 | assert((!CI->isTailCall() || !CI->isNoTailCall()) && |
| 500 | "Incompatible call site attributes(Tail,NoTail)" ); |
| 501 | if (!CI->isTailCall()) |
| 502 | return nullptr; |
| 503 | |
| 504 | // As a special case, detect code like this: |
| 505 | // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call |
| 506 | // and disable this xform in this case, because the code generator will |
| 507 | // lower the call to fabs into inline code. |
| 508 | if (BB == &F.getEntryBlock() && &BB->front() == CI && |
| 509 | &*std::next(x: BB->begin()) == TI && CI->getCalledFunction() && |
| 510 | !TTI->isLoweredToCall(F: CI->getCalledFunction())) { |
| 511 | // A single-block function with just a call and a return. Check that |
| 512 | // the arguments match. |
| 513 | auto I = CI->arg_begin(), E = CI->arg_end(); |
| 514 | Function::arg_iterator FI = F.arg_begin(), FE = F.arg_end(); |
| 515 | for (; I != E && FI != FE; ++I, ++FI) |
| 516 | if (*I != &*FI) break; |
| 517 | if (I == E && FI == FE) |
| 518 | return nullptr; |
| 519 | } |
| 520 | |
| 521 | return CI; |
| 522 | } |
| 523 | |
| 524 | void TailRecursionEliminator::(CallInst *CI) { |
| 525 | HeaderBB = &F.getEntryBlock(); |
| 526 | BasicBlock *NewEntry = BasicBlock::Create(Context&: F.getContext(), Name: "" , Parent: &F, InsertBefore: HeaderBB); |
| 527 | NewEntry->takeName(V: HeaderBB); |
| 528 | HeaderBB->setName("tailrecurse" ); |
| 529 | auto *BI = BranchInst::Create(IfTrue: HeaderBB, InsertBefore: NewEntry); |
| 530 | BI->setDebugLoc(DebugLoc::getCompilerGenerated()); |
| 531 | // If the new branch preserves the debug location of CI, it could result in |
| 532 | // misleading stepping, if CI is located in a conditional branch. |
| 533 | // So, here we don't give any debug location to the new branch. |
| 534 | |
| 535 | // Move all fixed sized allocas from HeaderBB to NewEntry. |
| 536 | for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(), |
| 537 | NEBI = NewEntry->begin(); |
| 538 | OEBI != E;) |
| 539 | if (AllocaInst *AI = dyn_cast<AllocaInst>(Val: OEBI++)) |
| 540 | if (isa<ConstantInt>(Val: AI->getArraySize())) |
| 541 | AI->moveBefore(InsertPos: NEBI); |
| 542 | |
| 543 | // Now that we have created a new block, which jumps to the entry |
| 544 | // block, insert a PHI node for each argument of the function. |
| 545 | // For now, we initialize each PHI to only have the real arguments |
| 546 | // which are passed in. |
| 547 | BasicBlock::iterator InsertPos = HeaderBB->begin(); |
| 548 | for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { |
| 549 | PHINode *PN = PHINode::Create(Ty: I->getType(), NumReservedValues: 2, NameStr: I->getName() + ".tr" ); |
| 550 | PN->insertBefore(InsertPos); |
| 551 | I->replaceAllUsesWith(V: PN); // Everyone use the PHI node now! |
| 552 | PN->addIncoming(V: &*I, BB: NewEntry); |
| 553 | ArgumentPHIs.push_back(Elt: PN); |
| 554 | } |
| 555 | |
| 556 | // If the function doen't return void, create the RetPN and RetKnownPN PHI |
| 557 | // nodes to track our return value. We initialize RetPN with poison and |
| 558 | // RetKnownPN with false since we can't know our return value at function |
| 559 | // entry. |
| 560 | Type *RetType = F.getReturnType(); |
| 561 | if (!RetType->isVoidTy()) { |
| 562 | Type *BoolType = Type::getInt1Ty(C&: F.getContext()); |
| 563 | RetPN = PHINode::Create(Ty: RetType, NumReservedValues: 2, NameStr: "ret.tr" ); |
| 564 | RetPN->insertBefore(InsertPos); |
| 565 | RetKnownPN = PHINode::Create(Ty: BoolType, NumReservedValues: 2, NameStr: "ret.known.tr" ); |
| 566 | RetKnownPN->insertBefore(InsertPos); |
| 567 | |
| 568 | RetPN->addIncoming(V: PoisonValue::get(T: RetType), BB: NewEntry); |
| 569 | RetKnownPN->addIncoming(V: ConstantInt::getFalse(Ty: BoolType), BB: NewEntry); |
| 570 | } |
| 571 | |
| 572 | // The entry block was changed from HeaderBB to NewEntry. |
| 573 | // The forward DominatorTree needs to be recalculated when the EntryBB is |
| 574 | // changed. In this corner-case we recalculate the entire tree. |
| 575 | DTU.recalculate(F&: *NewEntry->getParent()); |
| 576 | } |
| 577 | |
| 578 | void TailRecursionEliminator::insertAccumulator(Instruction *AccRecInstr) { |
| 579 | assert(!AccPN && "Trying to insert multiple accumulators" ); |
| 580 | |
| 581 | AccumulatorRecursionInstr = AccRecInstr; |
| 582 | |
| 583 | // Start by inserting a new PHI node for the accumulator. |
| 584 | pred_iterator PB = pred_begin(BB: HeaderBB), PE = pred_end(BB: HeaderBB); |
| 585 | AccPN = PHINode::Create(Ty: F.getReturnType(), NumReservedValues: std::distance(first: PB, last: PE) + 1, |
| 586 | NameStr: "accumulator.tr" ); |
| 587 | AccPN->insertBefore(InsertPos: HeaderBB->begin()); |
| 588 | |
| 589 | // Loop over all of the predecessors of the tail recursion block. For the |
| 590 | // real entry into the function we seed the PHI with the identity constant for |
| 591 | // the accumulation operation. For any other existing branches to this block |
| 592 | // (due to other tail recursions eliminated) the accumulator is not modified. |
| 593 | // Because we haven't added the branch in the current block to HeaderBB yet, |
| 594 | // it will not show up as a predecessor. |
| 595 | for (pred_iterator PI = PB; PI != PE; ++PI) { |
| 596 | BasicBlock *P = *PI; |
| 597 | if (P == &F.getEntryBlock()) { |
| 598 | Constant *Identity = |
| 599 | ConstantExpr::getIdentity(I: AccRecInstr, Ty: AccRecInstr->getType()); |
| 600 | AccPN->addIncoming(V: Identity, BB: P); |
| 601 | } else { |
| 602 | AccPN->addIncoming(V: AccPN, BB: P); |
| 603 | } |
| 604 | } |
| 605 | |
| 606 | ++NumAccumAdded; |
| 607 | } |
| 608 | |
| 609 | // Creates a copy of contents of ByValue operand of the specified |
| 610 | // call instruction into the newly created temporarily variable. |
| 611 | void TailRecursionEliminator::copyByValueOperandIntoLocalTemp(CallInst *CI, |
| 612 | int OpndIdx) { |
| 613 | Type *AggTy = CI->getParamByValType(ArgNo: OpndIdx); |
| 614 | assert(AggTy); |
| 615 | const DataLayout &DL = F.getDataLayout(); |
| 616 | |
| 617 | // Get alignment of byVal operand. |
| 618 | Align Alignment(CI->getParamAlign(ArgNo: OpndIdx).valueOrOne()); |
| 619 | |
| 620 | // Create alloca for temporarily byval operands. |
| 621 | // Put alloca into the entry block. |
| 622 | Value *NewAlloca = new AllocaInst( |
| 623 | AggTy, DL.getAllocaAddrSpace(), nullptr, Alignment, |
| 624 | CI->getArgOperand(i: OpndIdx)->getName(), F.getEntryBlock().begin()); |
| 625 | |
| 626 | IRBuilder<> Builder(CI); |
| 627 | Value *Size = Builder.getInt64(C: DL.getTypeAllocSize(Ty: AggTy)); |
| 628 | |
| 629 | // Copy data from byvalue operand into the temporarily variable. |
| 630 | Builder.CreateMemCpy(Dst: NewAlloca, /*DstAlign*/ Alignment, |
| 631 | Src: CI->getArgOperand(i: OpndIdx), |
| 632 | /*SrcAlign*/ Alignment, Size); |
| 633 | CI->setArgOperand(i: OpndIdx, v: NewAlloca); |
| 634 | } |
| 635 | |
| 636 | // Creates a copy from temporarily variable(keeping value of ByVal argument) |
| 637 | // into the corresponding function argument location. |
| 638 | void TailRecursionEliminator::copyLocalTempOfByValueOperandIntoArguments( |
| 639 | CallInst *CI, int OpndIdx) { |
| 640 | Type *AggTy = CI->getParamByValType(ArgNo: OpndIdx); |
| 641 | assert(AggTy); |
| 642 | const DataLayout &DL = F.getDataLayout(); |
| 643 | |
| 644 | // Get alignment of byVal operand. |
| 645 | Align Alignment(CI->getParamAlign(ArgNo: OpndIdx).valueOrOne()); |
| 646 | |
| 647 | IRBuilder<> Builder(CI); |
| 648 | Value *Size = Builder.getInt64(C: DL.getTypeAllocSize(Ty: AggTy)); |
| 649 | |
| 650 | // Copy data from the temporarily variable into corresponding |
| 651 | // function argument location. |
| 652 | Builder.CreateMemCpy(Dst: F.getArg(i: OpndIdx), /*DstAlign*/ Alignment, |
| 653 | Src: CI->getArgOperand(i: OpndIdx), |
| 654 | /*SrcAlign*/ Alignment, Size); |
| 655 | } |
| 656 | |
| 657 | bool TailRecursionEliminator::eliminateCall(CallInst *CI) { |
| 658 | ReturnInst *Ret = cast<ReturnInst>(Val: CI->getParent()->getTerminator()); |
| 659 | |
| 660 | // Ok, we found a potential tail call. We can currently only transform the |
| 661 | // tail call if all of the instructions between the call and the return are |
| 662 | // movable to above the call itself, leaving the call next to the return. |
| 663 | // Check that this is the case now. |
| 664 | Instruction *AccRecInstr = nullptr; |
| 665 | BasicBlock::iterator BBI(CI); |
| 666 | for (++BBI; &*BBI != Ret; ++BBI) { |
| 667 | if (canMoveAboveCall(I: &*BBI, CI, AA)) |
| 668 | continue; |
| 669 | |
| 670 | // If we can't move the instruction above the call, it might be because it |
| 671 | // is an associative and commutative operation that could be transformed |
| 672 | // using accumulator recursion elimination. Check to see if this is the |
| 673 | // case, and if so, remember which instruction accumulates for later. |
| 674 | if (AccPN || !canTransformAccumulatorRecursion(I: &*BBI, CI)) |
| 675 | return false; // We cannot eliminate the tail recursion! |
| 676 | |
| 677 | // Yes, this is accumulator recursion. Remember which instruction |
| 678 | // accumulates. |
| 679 | AccRecInstr = &*BBI; |
| 680 | } |
| 681 | |
| 682 | BasicBlock *BB = Ret->getParent(); |
| 683 | |
| 684 | using namespace ore; |
| 685 | ORE->emit(RemarkBuilder: [&]() { |
| 686 | return OptimizationRemark(DEBUG_TYPE, "tailcall-recursion" , CI) |
| 687 | << "transforming tail recursion into loop" ; |
| 688 | }); |
| 689 | |
| 690 | // OK! We can transform this tail call. If this is the first one found, |
| 691 | // create the new entry block, allowing us to branch back to the old entry. |
| 692 | if (!HeaderBB) |
| 693 | createTailRecurseLoopHeader(CI); |
| 694 | |
| 695 | // Copy values of ByVal operands into local temporarily variables. |
| 696 | for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) { |
| 697 | if (CI->isByValArgument(ArgNo: I)) |
| 698 | copyByValueOperandIntoLocalTemp(CI, OpndIdx: I); |
| 699 | } |
| 700 | |
| 701 | // Ok, now that we know we have a pseudo-entry block WITH all of the |
| 702 | // required PHI nodes, add entries into the PHI node for the actual |
| 703 | // parameters passed into the tail-recursive call. |
| 704 | for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) { |
| 705 | if (CI->isByValArgument(ArgNo: I)) { |
| 706 | copyLocalTempOfByValueOperandIntoArguments(CI, OpndIdx: I); |
| 707 | // When eliminating a tail call, we modify the values of the arguments. |
| 708 | // Therefore, if the byval parameter has a readonly attribute, we have to |
| 709 | // remove it. It is safe because, from the perspective of a caller, the |
| 710 | // byval parameter is always treated as "readonly," even if the readonly |
| 711 | // attribute is removed. |
| 712 | F.removeParamAttr(ArgNo: I, Kind: Attribute::ReadOnly); |
| 713 | ArgumentPHIs[I]->addIncoming(V: F.getArg(i: I), BB); |
| 714 | } else |
| 715 | ArgumentPHIs[I]->addIncoming(V: CI->getArgOperand(i: I), BB); |
| 716 | } |
| 717 | |
| 718 | if (AccRecInstr) { |
| 719 | insertAccumulator(AccRecInstr); |
| 720 | |
| 721 | // Rewrite the accumulator recursion instruction so that it does not use |
| 722 | // the result of the call anymore, instead, use the PHI node we just |
| 723 | // inserted. |
| 724 | AccRecInstr->setOperand(i: AccRecInstr->getOperand(i: 0) != CI, Val: AccPN); |
| 725 | } |
| 726 | |
| 727 | // Update our return value tracking |
| 728 | if (RetPN) { |
| 729 | if (Ret->getReturnValue() == CI || AccRecInstr) { |
| 730 | // Defer selecting a return value |
| 731 | RetPN->addIncoming(V: RetPN, BB); |
| 732 | RetKnownPN->addIncoming(V: RetKnownPN, BB); |
| 733 | } else { |
| 734 | // We found a return value we want to use, insert a select instruction to |
| 735 | // select it if we don't already know what our return value will be and |
| 736 | // store the result in our return value PHI node. |
| 737 | SelectInst *SI = |
| 738 | SelectInst::Create(C: RetKnownPN, S1: RetPN, S2: Ret->getReturnValue(), |
| 739 | NameStr: "current.ret.tr" , InsertBefore: Ret->getIterator()); |
| 740 | SI->setDebugLoc(Ret->getDebugLoc()); |
| 741 | RetSelects.push_back(Elt: SI); |
| 742 | |
| 743 | RetPN->addIncoming(V: SI, BB); |
| 744 | RetKnownPN->addIncoming(V: ConstantInt::getTrue(Ty: RetKnownPN->getType()), BB); |
| 745 | } |
| 746 | |
| 747 | if (AccPN) |
| 748 | AccPN->addIncoming(V: AccRecInstr ? AccRecInstr : AccPN, BB); |
| 749 | } |
| 750 | |
| 751 | // Now that all of the PHI nodes are in place, remove the call and |
| 752 | // ret instructions, replacing them with an unconditional branch. |
| 753 | BranchInst *NewBI = BranchInst::Create(IfTrue: HeaderBB, InsertBefore: Ret->getIterator()); |
| 754 | NewBI->setDebugLoc(CI->getDebugLoc()); |
| 755 | |
| 756 | Ret->eraseFromParent(); // Remove return. |
| 757 | CI->eraseFromParent(); // Remove call. |
| 758 | DTU.applyUpdates(Updates: {{DominatorTree::Insert, BB, HeaderBB}}); |
| 759 | ++NumEliminated; |
| 760 | if (OrigEntryBBFreq) { |
| 761 | assert(F.getEntryCount().has_value()); |
| 762 | // This pass is not expected to remove BBs, only add an entry BB. For that |
| 763 | // reason, and because the BB here isn't the new entry BB, the BFI lookup is |
| 764 | // expected to succeed. |
| 765 | assert(&F.getEntryBlock() != BB); |
| 766 | auto RelativeBBFreq = |
| 767 | static_cast<double>(BFI->getBlockFreq(BB).getFrequency()) / |
| 768 | static_cast<double>(OrigEntryBBFreq); |
| 769 | auto ToSubtract = |
| 770 | static_cast<uint64_t>(std::round(x: RelativeBBFreq * OrigEntryCount)); |
| 771 | auto OldEntryCount = F.getEntryCount()->getCount(); |
| 772 | if (OldEntryCount <= ToSubtract) { |
| 773 | LLVM_DEBUG( |
| 774 | errs() << "[TRE] The entrycount attributable to the recursive call, " |
| 775 | << ToSubtract |
| 776 | << ", should be strictly lower than the function entry count, " |
| 777 | << OldEntryCount << "\n" ); |
| 778 | } else { |
| 779 | F.setEntryCount(Count: OldEntryCount - ToSubtract, Type: F.getEntryCount()->getType()); |
| 780 | } |
| 781 | } |
| 782 | return true; |
| 783 | } |
| 784 | |
| 785 | void TailRecursionEliminator::cleanupAndFinalize() { |
| 786 | // If we eliminated any tail recursions, it's possible that we inserted some |
| 787 | // silly PHI nodes which just merge an initial value (the incoming operand) |
| 788 | // with themselves. Check to see if we did and clean up our mess if so. This |
| 789 | // occurs when a function passes an argument straight through to its tail |
| 790 | // call. |
| 791 | for (PHINode *PN : ArgumentPHIs) { |
| 792 | // If the PHI Node is a dynamic constant, replace it with the value it is. |
| 793 | if (Value *PNV = simplifyInstruction(I: PN, Q: F.getDataLayout())) { |
| 794 | PN->replaceAllUsesWith(V: PNV); |
| 795 | PN->eraseFromParent(); |
| 796 | } |
| 797 | } |
| 798 | |
| 799 | if (RetPN) { |
| 800 | if (RetSelects.empty()) { |
| 801 | // If we didn't insert any select instructions, then we know we didn't |
| 802 | // store a return value and we can remove the PHI nodes we inserted. |
| 803 | RetPN->dropAllReferences(); |
| 804 | RetPN->eraseFromParent(); |
| 805 | |
| 806 | RetKnownPN->dropAllReferences(); |
| 807 | RetKnownPN->eraseFromParent(); |
| 808 | |
| 809 | if (AccPN) { |
| 810 | // We need to insert a copy of our accumulator instruction before any |
| 811 | // return in the function, and return its result instead. |
| 812 | Instruction *AccRecInstr = AccumulatorRecursionInstr; |
| 813 | for (BasicBlock &BB : F) { |
| 814 | ReturnInst *RI = dyn_cast<ReturnInst>(Val: BB.getTerminator()); |
| 815 | if (!RI) |
| 816 | continue; |
| 817 | |
| 818 | Instruction *AccRecInstrNew = AccRecInstr->clone(); |
| 819 | AccRecInstrNew->setName("accumulator.ret.tr" ); |
| 820 | AccRecInstrNew->setOperand(i: AccRecInstr->getOperand(i: 0) == AccPN, |
| 821 | Val: RI->getOperand(i_nocapture: 0)); |
| 822 | AccRecInstrNew->insertBefore(InsertPos: RI->getIterator()); |
| 823 | AccRecInstrNew->dropLocation(); |
| 824 | RI->setOperand(i_nocapture: 0, Val_nocapture: AccRecInstrNew); |
| 825 | } |
| 826 | } |
| 827 | } else { |
| 828 | // We need to insert a select instruction before any return left in the |
| 829 | // function to select our stored return value if we have one. |
| 830 | for (BasicBlock &BB : F) { |
| 831 | ReturnInst *RI = dyn_cast<ReturnInst>(Val: BB.getTerminator()); |
| 832 | if (!RI) |
| 833 | continue; |
| 834 | |
| 835 | SelectInst *SI = |
| 836 | SelectInst::Create(C: RetKnownPN, S1: RetPN, S2: RI->getOperand(i_nocapture: 0), |
| 837 | NameStr: "current.ret.tr" , InsertBefore: RI->getIterator()); |
| 838 | SI->setDebugLoc(DebugLoc::getCompilerGenerated()); |
| 839 | RetSelects.push_back(Elt: SI); |
| 840 | RI->setOperand(i_nocapture: 0, Val_nocapture: SI); |
| 841 | } |
| 842 | |
| 843 | if (AccPN) { |
| 844 | // We need to insert a copy of our accumulator instruction before any |
| 845 | // of the selects we inserted, and select its result instead. |
| 846 | Instruction *AccRecInstr = AccumulatorRecursionInstr; |
| 847 | for (SelectInst *SI : RetSelects) { |
| 848 | Instruction *AccRecInstrNew = AccRecInstr->clone(); |
| 849 | AccRecInstrNew->setName("accumulator.ret.tr" ); |
| 850 | AccRecInstrNew->setOperand(i: AccRecInstr->getOperand(i: 0) == AccPN, |
| 851 | Val: SI->getFalseValue()); |
| 852 | AccRecInstrNew->insertBefore(InsertPos: SI->getIterator()); |
| 853 | AccRecInstrNew->dropLocation(); |
| 854 | SI->setFalseValue(AccRecInstrNew); |
| 855 | } |
| 856 | } |
| 857 | } |
| 858 | } |
| 859 | } |
| 860 | |
| 861 | bool TailRecursionEliminator::processBlock(BasicBlock &BB) { |
| 862 | Instruction *TI = BB.getTerminator(); |
| 863 | |
| 864 | if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI)) { |
| 865 | if (BI->isConditional()) |
| 866 | return false; |
| 867 | |
| 868 | BasicBlock *Succ = BI->getSuccessor(i: 0); |
| 869 | ReturnInst *Ret = dyn_cast<ReturnInst>(Val: Succ->getFirstNonPHIOrDbg(SkipPseudoOp: true)); |
| 870 | |
| 871 | if (!Ret) |
| 872 | return false; |
| 873 | |
| 874 | CallInst *CI = findTRECandidate(BB: &BB); |
| 875 | |
| 876 | if (!CI) |
| 877 | return false; |
| 878 | |
| 879 | LLVM_DEBUG(dbgs() << "FOLDING: " << *Succ |
| 880 | << "INTO UNCOND BRANCH PRED: " << BB); |
| 881 | FoldReturnIntoUncondBranch(RI: Ret, BB: Succ, Pred: &BB, DTU: &DTU); |
| 882 | ++NumRetDuped; |
| 883 | |
| 884 | // If all predecessors of Succ have been eliminated by |
| 885 | // FoldReturnIntoUncondBranch, delete it. It is important to empty it, |
| 886 | // because the ret instruction in there is still using a value which |
| 887 | // eliminateCall will attempt to remove. This block can only contain |
| 888 | // instructions that can't have uses, therefore it is safe to remove. |
| 889 | if (pred_empty(BB: Succ)) |
| 890 | DTU.deleteBB(DelBB: Succ); |
| 891 | |
| 892 | eliminateCall(CI); |
| 893 | return true; |
| 894 | } else if (isa<ReturnInst>(Val: TI)) { |
| 895 | CallInst *CI = findTRECandidate(BB: &BB); |
| 896 | |
| 897 | if (CI) |
| 898 | return eliminateCall(CI); |
| 899 | } |
| 900 | |
| 901 | return false; |
| 902 | } |
| 903 | |
| 904 | bool TailRecursionEliminator::(Function &F, |
| 905 | const TargetTransformInfo *TTI, |
| 906 | AliasAnalysis *AA, |
| 907 | OptimizationRemarkEmitter *ORE, |
| 908 | DomTreeUpdater &DTU, |
| 909 | BlockFrequencyInfo *BFI) { |
| 910 | if (F.getFnAttribute(Kind: "disable-tail-calls" ).getValueAsBool()) |
| 911 | return false; |
| 912 | |
| 913 | bool MadeChange = false; |
| 914 | MadeChange |= markTails(F, ORE); |
| 915 | |
| 916 | // If this function is a varargs function, we won't be able to PHI the args |
| 917 | // right, so don't even try to convert it... |
| 918 | if (F.getFunctionType()->isVarArg()) |
| 919 | return MadeChange; |
| 920 | |
| 921 | if (!canTRE(F)) |
| 922 | return MadeChange; |
| 923 | |
| 924 | // Change any tail recursive calls to loops. |
| 925 | TailRecursionEliminator TRE(F, TTI, AA, ORE, DTU, BFI); |
| 926 | |
| 927 | for (BasicBlock &BB : F) |
| 928 | MadeChange |= TRE.processBlock(BB); |
| 929 | |
| 930 | TRE.cleanupAndFinalize(); |
| 931 | |
| 932 | return MadeChange; |
| 933 | } |
| 934 | |
| 935 | namespace { |
| 936 | struct TailCallElim : public FunctionPass { |
| 937 | static char ID; // Pass identification, replacement for typeid |
| 938 | TailCallElim() : FunctionPass(ID) { |
| 939 | initializeTailCallElimPass(*PassRegistry::getPassRegistry()); |
| 940 | } |
| 941 | |
| 942 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 943 | AU.addRequired<TargetTransformInfoWrapperPass>(); |
| 944 | AU.addRequired<AAResultsWrapperPass>(); |
| 945 | AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); |
| 946 | AU.addPreserved<GlobalsAAWrapperPass>(); |
| 947 | AU.addPreserved<DominatorTreeWrapperPass>(); |
| 948 | AU.addPreserved<PostDominatorTreeWrapperPass>(); |
| 949 | } |
| 950 | |
| 951 | bool runOnFunction(Function &F) override { |
| 952 | if (skipFunction(F)) |
| 953 | return false; |
| 954 | |
| 955 | auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); |
| 956 | auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; |
| 957 | auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>(); |
| 958 | auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr; |
| 959 | // There is no noticable performance difference here between Lazy and Eager |
| 960 | // UpdateStrategy based on some test results. It is feasible to switch the |
| 961 | // UpdateStrategy to Lazy if we find it profitable later. |
| 962 | DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Eager); |
| 963 | |
| 964 | return TailRecursionEliminator::eliminate( |
| 965 | F, TTI: &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F), |
| 966 | AA: &getAnalysis<AAResultsWrapperPass>().getAAResults(), |
| 967 | ORE: &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(), DTU, |
| 968 | /*BFI=*/nullptr); |
| 969 | } |
| 970 | }; |
| 971 | } |
| 972 | |
| 973 | char TailCallElim::ID = 0; |
| 974 | INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim" , "Tail Call Elimination" , |
| 975 | false, false) |
| 976 | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) |
| 977 | INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) |
| 978 | INITIALIZE_PASS_END(TailCallElim, "tailcallelim" , "Tail Call Elimination" , |
| 979 | false, false) |
| 980 | |
| 981 | // Public interface to the TailCallElimination pass |
| 982 | FunctionPass *llvm::createTailCallEliminationPass() { |
| 983 | return new TailCallElim(); |
| 984 | } |
| 985 | |
| 986 | PreservedAnalyses TailCallElimPass::run(Function &F, |
| 987 | FunctionAnalysisManager &AM) { |
| 988 | |
| 989 | TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(IR&: F); |
| 990 | AliasAnalysis &AA = AM.getResult<AAManager>(IR&: F); |
| 991 | // This must come first. It needs the 2 analyses, meaning, if it came after |
| 992 | // the lines asking for the cached result, should they be nullptr (which, in |
| 993 | // the case of the PDT, is likely), updates to the trees would be missed. |
| 994 | auto *BFI = (!ForceDisableBFI && UpdateFunctionEntryCount && |
| 995 | F.getEntryCount().has_value() && F.getEntryCount()->getCount()) |
| 996 | ? &AM.getResult<BlockFrequencyAnalysis>(IR&: F) |
| 997 | : nullptr; |
| 998 | auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F); |
| 999 | auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(IR&: F); |
| 1000 | auto *PDT = AM.getCachedResult<PostDominatorTreeAnalysis>(IR&: F); |
| 1001 | // There is no noticable performance difference here between Lazy and Eager |
| 1002 | // UpdateStrategy based on some test results. It is feasible to switch the |
| 1003 | // UpdateStrategy to Lazy if we find it profitable later. |
| 1004 | DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Eager); |
| 1005 | bool Changed = |
| 1006 | TailRecursionEliminator::eliminate(F, TTI: &TTI, AA: &AA, ORE: &ORE, DTU, BFI); |
| 1007 | |
| 1008 | if (!Changed) |
| 1009 | return PreservedAnalyses::all(); |
| 1010 | PreservedAnalyses PA; |
| 1011 | PA.preserve<DominatorTreeAnalysis>(); |
| 1012 | PA.preserve<PostDominatorTreeAnalysis>(); |
| 1013 | return PA; |
| 1014 | } |
| 1015 | |