1//===- UnifyLoopExits.cpp - Redirect exiting edges to one block -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// For each natural loop with multiple exit blocks, this pass creates a new
10// block N such that all exiting blocks now branch to N, and then control flow
11// is redistributed to all the original exit blocks.
12//
13// Limitation: This assumes that all terminators in the CFG are direct branches
14// (the "br" instruction). The presence of any other control flow
15// such as indirectbr or switch will cause an assert.
16// The callbr terminator is supported by creating intermediate
17// target blocks that unconditionally branch to the original target
18// blocks. These intermediate target blocks can then be redirected
19// through the ControlFlowHub as usual.
20//
21//===----------------------------------------------------------------------===//
22
23#include "llvm/Transforms/Utils/UnifyLoopExits.h"
24#include "llvm/ADT/MapVector.h"
25#include "llvm/Analysis/DomTreeUpdater.h"
26#include "llvm/Analysis/LoopInfo.h"
27#include "llvm/IR/Constants.h"
28#include "llvm/IR/Dominators.h"
29#include "llvm/InitializePasses.h"
30#include "llvm/Support/CommandLine.h"
31#include "llvm/Transforms/Utils.h"
32#include "llvm/Transforms/Utils/BasicBlockUtils.h"
33#include "llvm/Transforms/Utils/ControlFlowUtils.h"
34
35#define DEBUG_TYPE "unify-loop-exits"
36
37using namespace llvm;
38
39static cl::opt<unsigned> MaxBooleansInControlFlowHub(
40 "max-booleans-in-control-flow-hub", cl::init(Val: 32), cl::Hidden,
41 cl::desc("Set the maximum number of outgoing blocks for using a boolean "
42 "value to record the exiting block in the ControlFlowHub."));
43
44namespace {
45struct UnifyLoopExitsLegacyPass : public FunctionPass {
46 static char ID;
47 UnifyLoopExitsLegacyPass() : FunctionPass(ID) {
48 initializeUnifyLoopExitsLegacyPassPass(*PassRegistry::getPassRegistry());
49 }
50
51 void getAnalysisUsage(AnalysisUsage &AU) const override {
52 AU.addRequired<LoopInfoWrapperPass>();
53 AU.addRequired<DominatorTreeWrapperPass>();
54 AU.addPreserved<LoopInfoWrapperPass>();
55 AU.addPreserved<DominatorTreeWrapperPass>();
56 }
57
58 bool runOnFunction(Function &F) override;
59};
60} // namespace
61
62char UnifyLoopExitsLegacyPass::ID = 0;
63
64FunctionPass *llvm::createUnifyLoopExitsPass() {
65 return new UnifyLoopExitsLegacyPass();
66}
67
68INITIALIZE_PASS_BEGIN(UnifyLoopExitsLegacyPass, "unify-loop-exits",
69 "Fixup each natural loop to have a single exit block",
70 false /* Only looks at CFG */, false /* Analysis Pass */)
71INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
72INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
73INITIALIZE_PASS_END(UnifyLoopExitsLegacyPass, "unify-loop-exits",
74 "Fixup each natural loop to have a single exit block",
75 false /* Only looks at CFG */, false /* Analysis Pass */)
76
77// The current transform introduces new control flow paths which may break the
78// SSA requirement that every def must dominate all its uses. For example,
79// consider a value D defined inside the loop that is used by some instruction
80// U outside the loop. It follows that D dominates U, since the original
81// program has valid SSA form. After merging the exits, all paths from D to U
82// now flow through the unified exit block. In addition, there may be other
83// paths that do not pass through D, but now reach the unified exit
84// block. Thus, D no longer dominates U.
85//
86// Restore the dominance by creating a phi for each such D at the new unified
87// loop exit. But when doing this, ignore any uses U that are in the new unified
88// loop exit, since those were introduced specially when the block was created.
89//
90// The use of SSAUpdater seems like overkill for this operation. The location
91// for creating the new PHI is well-known, and also the set of incoming blocks
92// to the new PHI.
93static void restoreSSA(const DominatorTree &DT, const Loop *L,
94 SmallVectorImpl<BasicBlock *> &Incoming,
95 BasicBlock *LoopExitBlock) {
96 using InstVector = SmallVector<Instruction *, 8>;
97 using IIMap = MapVector<Instruction *, InstVector>;
98 IIMap ExternalUsers;
99 for (auto *BB : L->blocks()) {
100 for (auto &I : *BB) {
101 for (auto &U : I.uses()) {
102 auto UserInst = cast<Instruction>(Val: U.getUser());
103 auto UserBlock = UserInst->getParent();
104 if (UserBlock == LoopExitBlock)
105 continue;
106 if (L->contains(BB: UserBlock))
107 continue;
108 LLVM_DEBUG(dbgs() << "added ext use for " << I.getName() << "("
109 << BB->getName() << ")"
110 << ": " << UserInst->getName() << "("
111 << UserBlock->getName() << ")"
112 << "\n");
113 ExternalUsers[&I].push_back(Elt: UserInst);
114 }
115 }
116 }
117
118 for (const auto &II : ExternalUsers) {
119 // For each Def used outside the loop, create NewPhi in
120 // LoopExitBlock. NewPhi receives Def only along exiting blocks that
121 // dominate it, while the remaining values are undefined since those paths
122 // didn't exist in the original CFG.
123 auto Def = II.first;
124 LLVM_DEBUG(dbgs() << "externally used: " << Def->getName() << "\n");
125 auto NewPhi =
126 PHINode::Create(Ty: Def->getType(), NumReservedValues: Incoming.size(),
127 NameStr: Def->getName() + ".moved", InsertBefore: LoopExitBlock->begin());
128 for (auto *In : Incoming) {
129 LLVM_DEBUG(dbgs() << "predecessor " << In->getName() << ": ");
130 if (Def->getParent() == In || DT.dominates(Def, BB: In)) {
131 LLVM_DEBUG(dbgs() << "dominated\n");
132 NewPhi->addIncoming(V: Def, BB: In);
133 } else {
134 LLVM_DEBUG(dbgs() << "not dominated\n");
135 NewPhi->addIncoming(V: PoisonValue::get(T: Def->getType()), BB: In);
136 }
137 }
138
139 LLVM_DEBUG(dbgs() << "external users:");
140 for (auto *U : II.second) {
141 LLVM_DEBUG(dbgs() << " " << U->getName());
142 U->replaceUsesOfWith(From: Def, To: NewPhi);
143 }
144 LLVM_DEBUG(dbgs() << "\n");
145 }
146}
147
148static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
149 // To unify the loop exits, we need a list of the exiting blocks as
150 // well as exit blocks. The functions for locating these lists both
151 // traverse the entire loop body. It is more efficient to first
152 // locate the exiting blocks and then examine their successors to
153 // locate the exit blocks.
154 SmallVector<BasicBlock *, 8> ExitingBlocks;
155 L->getExitingBlocks(ExitingBlocks);
156
157 // No exit blocks, so nothing to do. Just return.
158 if (ExitingBlocks.empty())
159 return false;
160
161 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
162 SmallVector<BasicBlock *, 8> CallBrTargetBlocksToFix;
163
164 // Redirect exiting edges through a control flow hub.
165 ControlFlowHub CHub;
166 bool Changed = false;
167
168 for (unsigned I = 0; I < ExitingBlocks.size(); ++I) {
169 BasicBlock *BB = ExitingBlocks[I];
170 if (BranchInst *Branch = dyn_cast<BranchInst>(Val: BB->getTerminator())) {
171 BasicBlock *Succ0 = Branch->getSuccessor(i: 0);
172 Succ0 = L->contains(BB: Succ0) ? nullptr : Succ0;
173
174 BasicBlock *Succ1 =
175 Branch->isUnconditional() ? nullptr : Branch->getSuccessor(i: 1);
176 Succ1 = L->contains(BB: Succ1) ? nullptr : Succ1;
177 CHub.addBranch(BB, Succ0, Succ1);
178
179 LLVM_DEBUG(dbgs() << "Added extiting branch: " << printBasicBlock(BB)
180 << " -> " << printBasicBlock(Succ0)
181 << (Succ0 && Succ1 ? " " : "") << printBasicBlock(Succ1)
182 << '\n');
183 } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(Val: BB->getTerminator())) {
184 for (unsigned J = 0; J < CallBr->getNumSuccessors(); ++J) {
185 BasicBlock *Succ = CallBr->getSuccessor(i: J);
186 if (L->contains(BB: Succ))
187 continue;
188 bool UpdatedLI = false;
189 BasicBlock *NewSucc =
190 SplitCallBrEdge(CallBrBlock: BB, Succ, SuccIdx: J, DTU: &DTU, CI: nullptr, LI: &LI, UpdatedLI: &UpdatedLI);
191 // SplitCallBrEdge modifies the CFG because it creates an intermediate
192 // block. So we need to set the changed flag no matter what the
193 // ControlFlowHub is going to do later.
194 Changed = true;
195 // Even if CallBr and Succ do not have a common parent loop, we need to
196 // add the new target block to the parent loop of the current loop.
197 if (!UpdatedLI)
198 CallBrTargetBlocksToFix.push_back(Elt: NewSucc);
199 // ExitingBlocks is later used to restore SSA, so we need to make sure
200 // that the blocks used for phi nodes in the guard blocks match the
201 // predecessors of the guard blocks, which, in the case of callbr, are
202 // the new intermediate target blocks instead of the callbr blocks
203 // themselves.
204 ExitingBlocks[I] = NewSucc;
205 CHub.addBranch(BB: NewSucc, Succ0: Succ);
206 LLVM_DEBUG(dbgs() << "Added exiting branch: "
207 << printBasicBlock(NewSucc) << " -> "
208 << printBasicBlock(Succ) << '\n');
209 }
210 } else {
211 llvm_unreachable("unsupported block terminator");
212 }
213 }
214
215 SmallVector<BasicBlock *, 8> GuardBlocks;
216 BasicBlock *LoopExitBlock;
217 bool ChangedCFG;
218 std::tie(args&: LoopExitBlock, args&: ChangedCFG) = CHub.finalize(
219 DTU: &DTU, GuardBlocks, Prefix: "loop.exit", MaxControlFlowBooleans: MaxBooleansInControlFlowHub.getValue());
220 ChangedCFG |= Changed;
221 if (!ChangedCFG)
222 return false;
223
224 restoreSSA(DT, L, Incoming&: ExitingBlocks, LoopExitBlock);
225
226#if defined(EXPENSIVE_CHECKS)
227 assert(DT.verify(DominatorTree::VerificationLevel::Full));
228#else
229 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
230#endif // EXPENSIVE_CHECKS
231 L->verifyLoop();
232
233 // The guard blocks were created outside the loop, so they need to become
234 // members of the parent loop.
235 // Same goes for the callbr target blocks. Although we try to add them to the
236 // smallest common parent loop of the callbr block and the corresponding
237 // original target block, there might not have been such a loop, in which case
238 // the newly created callbr target blocks are not part of any loop. For nested
239 // loops, this might result in them leading to a loop with multiple entry
240 // points.
241 if (auto *ParentLoop = L->getParentLoop()) {
242 for (auto *G : GuardBlocks) {
243 ParentLoop->addBasicBlockToLoop(NewBB: G, LI);
244 }
245 for (auto *C : CallBrTargetBlocksToFix) {
246 ParentLoop->addBasicBlockToLoop(NewBB: C, LI);
247 }
248 ParentLoop->verifyLoop();
249 }
250
251#if defined(EXPENSIVE_CHECKS)
252 LI.verify(DT);
253#endif // EXPENSIVE_CHECKS
254
255 return true;
256}
257
258static bool runImpl(LoopInfo &LI, DominatorTree &DT) {
259
260 bool Changed = false;
261 auto Loops = LI.getLoopsInPreorder();
262 for (auto *L : Loops) {
263 LLVM_DEBUG(dbgs() << "Processing loop:\n"; L->print(dbgs()));
264 Changed |= unifyLoopExits(DT, LI, L);
265 }
266 return Changed;
267}
268
269bool UnifyLoopExitsLegacyPass::runOnFunction(Function &F) {
270 LLVM_DEBUG(dbgs() << "===== Unifying loop exits in function " << F.getName()
271 << "\n");
272 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
273 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
274
275 return runImpl(LI, DT);
276}
277
278namespace llvm {
279
280PreservedAnalyses UnifyLoopExitsPass::run(Function &F,
281 FunctionAnalysisManager &AM) {
282 LLVM_DEBUG(dbgs() << "===== Unifying loop exits in function " << F.getName()
283 << "\n");
284 auto &LI = AM.getResult<LoopAnalysis>(IR&: F);
285 auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
286
287 if (!runImpl(LI, DT))
288 return PreservedAnalyses::all();
289 PreservedAnalyses PA;
290 PA.preserve<LoopAnalysis>();
291 PA.preserve<DominatorTreeAnalysis>();
292 return PA;
293}
294} // namespace llvm
295