1//===- UnifyLoopExits.cpp - Redirect exiting edges to one block -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// For each natural loop with multiple exit blocks, this pass creates a new
10// block N such that all exiting blocks now branch to N, and then control flow
11// is redistributed to all the original exit blocks.
12//
13// Limitation: This assumes that all terminators in the CFG are direct branches
14// (the "br" instruction). The presence of any other control flow
15// such as indirectbr, switch or callbr will cause an assert.
16//
17//===----------------------------------------------------------------------===//
18
19#include "llvm/Transforms/Utils/UnifyLoopExits.h"
20#include "llvm/ADT/MapVector.h"
21#include "llvm/Analysis/DomTreeUpdater.h"
22#include "llvm/Analysis/LoopInfo.h"
23#include "llvm/IR/Constants.h"
24#include "llvm/IR/Dominators.h"
25#include "llvm/InitializePasses.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Transforms/Utils.h"
28#include "llvm/Transforms/Utils/BasicBlockUtils.h"
29#include "llvm/Transforms/Utils/ControlFlowUtils.h"
30
31#define DEBUG_TYPE "unify-loop-exits"
32
33using namespace llvm;
34
35static cl::opt<unsigned> MaxBooleansInControlFlowHub(
36 "max-booleans-in-control-flow-hub", cl::init(Val: 32), cl::Hidden,
37 cl::desc("Set the maximum number of outgoing blocks for using a boolean "
38 "value to record the exiting block in the ControlFlowHub."));
39
40namespace {
41struct UnifyLoopExitsLegacyPass : public FunctionPass {
42 static char ID;
43 UnifyLoopExitsLegacyPass() : FunctionPass(ID) {
44 initializeUnifyLoopExitsLegacyPassPass(*PassRegistry::getPassRegistry());
45 }
46
47 void getAnalysisUsage(AnalysisUsage &AU) const override {
48 AU.addRequired<LoopInfoWrapperPass>();
49 AU.addRequired<DominatorTreeWrapperPass>();
50 AU.addPreserved<LoopInfoWrapperPass>();
51 AU.addPreserved<DominatorTreeWrapperPass>();
52 }
53
54 bool runOnFunction(Function &F) override;
55};
56} // namespace
57
58char UnifyLoopExitsLegacyPass::ID = 0;
59
60FunctionPass *llvm::createUnifyLoopExitsPass() {
61 return new UnifyLoopExitsLegacyPass();
62}
63
64INITIALIZE_PASS_BEGIN(UnifyLoopExitsLegacyPass, "unify-loop-exits",
65 "Fixup each natural loop to have a single exit block",
66 false /* Only looks at CFG */, false /* Analysis Pass */)
67INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
68INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
69INITIALIZE_PASS_END(UnifyLoopExitsLegacyPass, "unify-loop-exits",
70 "Fixup each natural loop to have a single exit block",
71 false /* Only looks at CFG */, false /* Analysis Pass */)
72
73// The current transform introduces new control flow paths which may break the
74// SSA requirement that every def must dominate all its uses. For example,
75// consider a value D defined inside the loop that is used by some instruction
76// U outside the loop. It follows that D dominates U, since the original
77// program has valid SSA form. After merging the exits, all paths from D to U
78// now flow through the unified exit block. In addition, there may be other
79// paths that do not pass through D, but now reach the unified exit
80// block. Thus, D no longer dominates U.
81//
82// Restore the dominance by creating a phi for each such D at the new unified
83// loop exit. But when doing this, ignore any uses U that are in the new unified
84// loop exit, since those were introduced specially when the block was created.
85//
86// The use of SSAUpdater seems like overkill for this operation. The location
87// for creating the new PHI is well-known, and also the set of incoming blocks
88// to the new PHI.
89static void restoreSSA(const DominatorTree &DT, const Loop *L,
90 SmallVectorImpl<BasicBlock *> &Incoming,
91 BasicBlock *LoopExitBlock) {
92 using InstVector = SmallVector<Instruction *, 8>;
93 using IIMap = MapVector<Instruction *, InstVector>;
94 IIMap ExternalUsers;
95 for (auto *BB : L->blocks()) {
96 for (auto &I : *BB) {
97 for (auto &U : I.uses()) {
98 auto UserInst = cast<Instruction>(Val: U.getUser());
99 auto UserBlock = UserInst->getParent();
100 if (UserBlock == LoopExitBlock)
101 continue;
102 if (L->contains(BB: UserBlock))
103 continue;
104 LLVM_DEBUG(dbgs() << "added ext use for " << I.getName() << "("
105 << BB->getName() << ")"
106 << ": " << UserInst->getName() << "("
107 << UserBlock->getName() << ")"
108 << "\n");
109 ExternalUsers[&I].push_back(Elt: UserInst);
110 }
111 }
112 }
113
114 for (const auto &II : ExternalUsers) {
115 // For each Def used outside the loop, create NewPhi in
116 // LoopExitBlock. NewPhi receives Def only along exiting blocks that
117 // dominate it, while the remaining values are undefined since those paths
118 // didn't exist in the original CFG.
119 auto Def = II.first;
120 LLVM_DEBUG(dbgs() << "externally used: " << Def->getName() << "\n");
121 auto NewPhi =
122 PHINode::Create(Ty: Def->getType(), NumReservedValues: Incoming.size(),
123 NameStr: Def->getName() + ".moved", InsertBefore: LoopExitBlock->begin());
124 for (auto *In : Incoming) {
125 LLVM_DEBUG(dbgs() << "predecessor " << In->getName() << ": ");
126 if (Def->getParent() == In || DT.dominates(Def, BB: In)) {
127 LLVM_DEBUG(dbgs() << "dominated\n");
128 NewPhi->addIncoming(V: Def, BB: In);
129 } else {
130 LLVM_DEBUG(dbgs() << "not dominated\n");
131 NewPhi->addIncoming(V: PoisonValue::get(T: Def->getType()), BB: In);
132 }
133 }
134
135 LLVM_DEBUG(dbgs() << "external users:");
136 for (auto *U : II.second) {
137 LLVM_DEBUG(dbgs() << " " << U->getName());
138 U->replaceUsesOfWith(From: Def, To: NewPhi);
139 }
140 LLVM_DEBUG(dbgs() << "\n");
141 }
142}
143
144static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
145 // To unify the loop exits, we need a list of the exiting blocks as
146 // well as exit blocks. The functions for locating these lists both
147 // traverse the entire loop body. It is more efficient to first
148 // locate the exiting blocks and then examine their successors to
149 // locate the exit blocks.
150 SmallVector<BasicBlock *, 8> ExitingBlocks;
151 L->getExitingBlocks(ExitingBlocks);
152
153 // Redirect exiting edges through a control flow hub.
154 ControlFlowHub CHub;
155 for (auto *BB : ExitingBlocks) {
156 auto *Branch = cast<BranchInst>(Val: BB->getTerminator());
157 BasicBlock *Succ0 = Branch->getSuccessor(i: 0);
158 Succ0 = L->contains(BB: Succ0) ? nullptr : Succ0;
159
160 BasicBlock *Succ1 =
161 Branch->isUnconditional() ? nullptr : Branch->getSuccessor(i: 1);
162 Succ1 = L->contains(BB: Succ1) ? nullptr : Succ1;
163 CHub.addBranch(BB, Succ0, Succ1);
164
165 LLVM_DEBUG(dbgs() << "Added exiting branch: " << BB->getName() << " -> {"
166 << (Succ0 ? Succ0->getName() : "<none>") << ", "
167 << (Succ1 ? Succ1->getName() : "<none>") << "}\n");
168 }
169
170 SmallVector<BasicBlock *, 8> GuardBlocks;
171 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
172 BasicBlock *LoopExitBlock;
173 bool ChangedCFG;
174 std::tie(args&: LoopExitBlock, args&: ChangedCFG) = CHub.finalize(
175 DTU: &DTU, GuardBlocks, Prefix: "loop.exit", MaxControlFlowBooleans: MaxBooleansInControlFlowHub.getValue());
176 if (!ChangedCFG)
177 return false;
178
179 restoreSSA(DT, L, Incoming&: ExitingBlocks, LoopExitBlock);
180
181#if defined(EXPENSIVE_CHECKS)
182 assert(DT.verify(DominatorTree::VerificationLevel::Full));
183#else
184 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
185#endif // EXPENSIVE_CHECKS
186 L->verifyLoop();
187
188 // The guard blocks were created outside the loop, so they need to become
189 // members of the parent loop.
190 if (auto ParentLoop = L->getParentLoop()) {
191 for (auto *G : GuardBlocks) {
192 ParentLoop->addBasicBlockToLoop(NewBB: G, LI);
193 }
194 ParentLoop->verifyLoop();
195 }
196
197#if defined(EXPENSIVE_CHECKS)
198 LI.verify(DT);
199#endif // EXPENSIVE_CHECKS
200
201 return true;
202}
203
204static bool runImpl(LoopInfo &LI, DominatorTree &DT) {
205
206 bool Changed = false;
207 auto Loops = LI.getLoopsInPreorder();
208 for (auto *L : Loops) {
209 LLVM_DEBUG(dbgs() << "Processing loop:\n"; L->print(dbgs()));
210 Changed |= unifyLoopExits(DT, LI, L);
211 }
212 return Changed;
213}
214
215bool UnifyLoopExitsLegacyPass::runOnFunction(Function &F) {
216 LLVM_DEBUG(dbgs() << "===== Unifying loop exits in function " << F.getName()
217 << "\n");
218 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
219 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
220
221 assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator.");
222
223 return runImpl(LI, DT);
224}
225
226namespace llvm {
227
228PreservedAnalyses UnifyLoopExitsPass::run(Function &F,
229 FunctionAnalysisManager &AM) {
230 LLVM_DEBUG(dbgs() << "===== Unifying loop exits in function " << F.getName()
231 << "\n");
232 auto &LI = AM.getResult<LoopAnalysis>(IR&: F);
233 auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
234
235 if (!runImpl(LI, DT))
236 return PreservedAnalyses::all();
237 PreservedAnalyses PA;
238 PA.preserve<LoopAnalysis>();
239 PA.preserve<DominatorTreeAnalysis>();
240 return PA;
241}
242} // namespace llvm
243