1//===- FixIrreducible.cpp - Convert irreducible control-flow into loops ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// INPUT CFG: The blocks H and B form an irreducible cycle with two headers.
10//
11// Entry
12// / \
13// v v
14// H ----> B
15// ^ /|
16// `----' |
17// v
18// Exit
19//
20// OUTPUT CFG: Converted to a natural loop with a new header N.
21//
22// Entry
23// |
24// v
25// N <---.
26// / \ \
27// / \ |
28// v v /
29// H --> B --'
30// |
31// v
32// Exit
33//
34// To convert an irreducible cycle C to a natural loop L:
35//
36// 1. Add a new node N to C.
37// 2. Redirect all external incoming edges through N.
38// 3. Redirect all edges incident on header H through N.
39//
40// This is sufficient to ensure that:
41//
42// a. Every closed path in C also exists in L, with the modification that any
43// path passing through H now passes through N before reaching H.
44// b. Every external path incident on any entry of C is now incident on N and
45// then redirected to the entry.
46//
47// Thus, L is a strongly connected component dominated by N, and hence L is a
48// natural loop with header N.
49//
50// When an irreducible cycle C with header H is transformed into a loop, the
51// following invariants hold:
52//
53// 1. No new subcycles are "discovered" in the set (C-H). The only internal
54// edges that are redirected by the transform are incident on H. Any subcycle
55// S in (C-H), already existed prior to this transform, and is already in the
56// list of children for this cycle C.
57//
58// 2. Subcycles of C are not modified by the transform. For some subcycle S of
59// C, edges incident on the entries of S are either internal to C, or they
60// are now redirected through N, which is outside of S. So the list of
61// entries to S does not change. Since the transform only adds a block
62// outside S, and redirects edges that are not internal to S, the list of
63// blocks in S does not change.
64//
65// 3. Similarly, any natural loop L included in C is not affected, with one
66// exception: L is "destroyed" by the transform iff its header is H. The
67// backedges of such a loop are now redirected to N instead, and hence the
68// body of this loop gets merged into the new loop with header N.
69//
70// The actual transformation is handled by the ControlFlowHub, which redirects
71// specified control flow edges through a set of guard blocks. This also moves
72// every PHINode in an outgoing block to the hub. Since the hub dominates all
73// the outgoing blocks, each such PHINode continues to dominate its uses. Since
74// every header in an SCC has at least two predecessors, every value used in the
75// header (or later) but defined in a predecessor (or earlier) is represented by
76// a PHINode in a header. Hence the above handling of PHINodes is sufficient and
77// no further processing is required to restore SSA.
78//
79// Limitation: The pass cannot handle switch statements and indirect
80// branches. Both must be lowered to plain branches first.
81//
82// CallBr support: CallBr is handled as a more general branch instruction which
83// can have multiple successors. The pass redirects the edges to intermediate
84// target blocks that unconditionally branch to the original callbr target
85// blocks. This allows the control flow hub to know to which of the original
86// target blocks to jump to.
87// Example input CFG:
88// Entry (callbr)
89// / \
90// v v
91// H ----> B
92// ^ /|
93// `----' |
94// v
95// Exit
96//
97// becomes:
98// Entry (callbr)
99// / \
100// v v
101// target.H target.B
102// | |
103// v v
104// H ----> B
105// ^ /|
106// `----' |
107// v
108// Exit
109//
110// Note
111// OUTPUT CFG: Converted to a natural loop with a new header N.
112//
113// Entry (callbr)
114// / \
115// v v
116// target.H target.B
117// \ /
118// \ /
119// v v
120// N <---.
121// / \ \
122// / \ |
123// v v /
124// H --> B --'
125// |
126// v
127// Exit
128//
129//===----------------------------------------------------------------------===//
130
131#include "llvm/Transforms/Utils/FixIrreducible.h"
132#include "llvm/Analysis/CycleAnalysis.h"
133#include "llvm/Analysis/DomTreeUpdater.h"
134#include "llvm/Analysis/LoopInfo.h"
135#include "llvm/InitializePasses.h"
136#include "llvm/Pass.h"
137#include "llvm/Support/ErrorHandling.h"
138#include "llvm/Transforms/Utils.h"
139#include "llvm/Transforms/Utils/BasicBlockUtils.h"
140#include "llvm/Transforms/Utils/ControlFlowUtils.h"
141
142#define DEBUG_TYPE "fix-irreducible"
143
144using namespace llvm;
145
146namespace {
147struct FixIrreducible : public FunctionPass {
148 static char ID;
149 FixIrreducible() : FunctionPass(ID) {
150 initializeFixIrreduciblePass(*PassRegistry::getPassRegistry());
151 }
152
153 void getAnalysisUsage(AnalysisUsage &AU) const override {
154 AU.addRequired<DominatorTreeWrapperPass>();
155 AU.addRequired<CycleInfoWrapperPass>();
156 AU.addPreserved<DominatorTreeWrapperPass>();
157 AU.addPreserved<CycleInfoWrapperPass>();
158 AU.addPreserved<LoopInfoWrapperPass>();
159 }
160
161 bool runOnFunction(Function &F) override;
162};
163} // namespace
164
165char FixIrreducible::ID = 0;
166
167FunctionPass *llvm::createFixIrreduciblePass() { return new FixIrreducible(); }
168
169INITIALIZE_PASS_BEGIN(FixIrreducible, "fix-irreducible",
170 "Convert irreducible control-flow into natural loops",
171 false /* Only looks at CFG */, false /* Analysis Pass */)
172INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
173INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
174INITIALIZE_PASS_END(FixIrreducible, "fix-irreducible",
175 "Convert irreducible control-flow into natural loops",
176 false /* Only looks at CFG */, false /* Analysis Pass */)
177
178// When a new loop is created, existing children of the parent loop may now be
179// fully inside the new loop. Reconnect these as children of the new loop.
180static void reconnectChildLoops(LoopInfo &LI, Loop *ParentLoop, Loop *NewLoop,
181 BasicBlock *OldHeader) {
182 auto &CandidateLoops = ParentLoop ? ParentLoop->getSubLoopsVector()
183 : LI.getTopLevelLoopsVector();
184 // Any candidate is a child iff its header is owned by the new loop. Move all
185 // the children to a new vector.
186 auto FirstChild = llvm::partition(Range&: CandidateLoops, P: [&](Loop *L) {
187 return NewLoop == L || !NewLoop->contains(BB: L->getHeader());
188 });
189 SmallVector<Loop *, 8> ChildLoops(FirstChild, CandidateLoops.end());
190 CandidateLoops.erase(first: FirstChild, last: CandidateLoops.end());
191
192 for (Loop *Child : ChildLoops) {
193 LLVM_DEBUG(dbgs() << "child loop: " << Child->getHeader()->getName()
194 << "\n");
195 // A child loop whose header was the old cycle header gets destroyed since
196 // its backedges are removed.
197 if (Child->getHeader() == OldHeader) {
198 for (auto *BB : Child->blocks()) {
199 if (LI.getLoopFor(BB) != Child)
200 continue;
201 LI.changeLoopFor(BB, L: NewLoop);
202 LLVM_DEBUG(dbgs() << "moved block from child: " << BB->getName()
203 << "\n");
204 }
205 std::vector<Loop *> GrandChildLoops;
206 std::swap(x&: GrandChildLoops, y&: Child->getSubLoopsVector());
207 for (auto *GrandChildLoop : GrandChildLoops) {
208 GrandChildLoop->setParentLoop(nullptr);
209 NewLoop->addChildLoop(NewChild: GrandChildLoop);
210 }
211 LI.destroy(L: Child);
212 LLVM_DEBUG(dbgs() << "subsumed child loop (common header)\n");
213 continue;
214 }
215
216 Child->setParentLoop(nullptr);
217 NewLoop->addChildLoop(NewChild: Child);
218 LLVM_DEBUG(dbgs() << "added child loop to new loop\n");
219 }
220}
221
222static void updateLoopInfo(LoopInfo &LI, Cycle &C,
223 ArrayRef<BasicBlock *> GuardBlocks) {
224 // The parent loop is a natural loop L mapped to the cycle header H as long as
225 // H is not also the header of L. In the latter case, L is destroyed and we
226 // seek its parent instead.
227 BasicBlock *CycleHeader = C.getHeader();
228 Loop *ParentLoop = LI.getLoopFor(BB: CycleHeader);
229 if (ParentLoop && ParentLoop->getHeader() == CycleHeader)
230 ParentLoop = ParentLoop->getParentLoop();
231
232 // Create a new loop from the now-transformed cycle
233 auto *NewLoop = LI.AllocateLoop();
234 if (ParentLoop) {
235 ParentLoop->addChildLoop(NewChild: NewLoop);
236 } else {
237 LI.addTopLevelLoop(New: NewLoop);
238 }
239
240 // Add the guard blocks to the new loop. The first guard block is
241 // the head of all the backedges, and it is the first to be inserted
242 // in the loop. This ensures that it is recognized as the
243 // header. Since the new loop is already in LoopInfo, the new blocks
244 // are also propagated up the chain of parent loops.
245 for (auto *G : GuardBlocks) {
246 LLVM_DEBUG(dbgs() << "added guard block to loop: " << G->getName() << "\n");
247 NewLoop->addBasicBlockToLoop(NewBB: G, LI);
248 }
249
250 for (auto *BB : C.blocks()) {
251 NewLoop->addBlockEntry(BB);
252 if (LI.getLoopFor(BB) == ParentLoop) {
253 LLVM_DEBUG(dbgs() << "moved block from parent: " << BB->getName()
254 << "\n");
255 LI.changeLoopFor(BB, L: NewLoop);
256 } else {
257 LLVM_DEBUG(dbgs() << "added block from child: " << BB->getName() << "\n");
258 }
259 }
260 LLVM_DEBUG(dbgs() << "header for new loop: "
261 << NewLoop->getHeader()->getName() << "\n");
262
263 reconnectChildLoops(LI, ParentLoop, NewLoop, OldHeader: C.getHeader());
264
265 LLVM_DEBUG(dbgs() << "Verify new loop.\n"; NewLoop->print(dbgs()));
266 NewLoop->verifyLoop();
267 if (ParentLoop) {
268 LLVM_DEBUG(dbgs() << "Verify parent loop.\n"; ParentLoop->print(dbgs()));
269 ParentLoop->verifyLoop();
270 }
271}
272
273// Given a set of blocks and headers in an irreducible SCC, convert it into a
274// natural loop. Also insert this new loop at its appropriate place in the
275// hierarchy of loops.
276static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
277 LoopInfo *LI) {
278 if (C.isReducible())
279 return false;
280 LLVM_DEBUG(dbgs() << "Processing cycle:\n" << CI.print(&C) << "\n";);
281
282 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
283 ControlFlowHub CHub;
284 SetVector<BasicBlock *> Predecessors;
285
286 // Redirect internal edges incident on the header.
287 BasicBlock *Header = C.getHeader();
288 for (BasicBlock *P : predecessors(BB: Header)) {
289 if (C.contains(Block: P))
290 Predecessors.insert(X: P);
291 }
292
293 for (BasicBlock *P : Predecessors) {
294 if (isa<UncondBrInst>(Val: P->getTerminator())) {
295 assert(P->getTerminator()->getSuccessor(0) == Header);
296 CHub.addBranch(BB: P, Succ0: Header);
297
298 LLVM_DEBUG(dbgs() << "Added internal branch: " << printBasicBlock(P)
299 << " -> " << printBasicBlock(Header) << '\n');
300 } else if (CondBrInst *Branch = dyn_cast<CondBrInst>(Val: P->getTerminator())) {
301 // Exactly one of the two successors is the header.
302 BasicBlock *Succ0 = Branch->getSuccessor(i: 0) == Header ? Header : nullptr;
303 BasicBlock *Succ1 = Succ0 ? nullptr : Header;
304 assert(Succ0 || Branch->getSuccessor(1) == Header);
305 assert(Succ0 || Succ1);
306 CHub.addBranch(BB: P, Succ0, Succ1);
307
308 LLVM_DEBUG(dbgs() << "Added internal branch: " << printBasicBlock(P)
309 << " -> " << printBasicBlock(Succ0)
310 << (Succ0 && Succ1 ? " " : "") << printBasicBlock(Succ1)
311 << '\n');
312 } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(Val: P->getTerminator())) {
313 for (unsigned I = 0; I < CallBr->getNumSuccessors(); ++I) {
314 BasicBlock *Succ = CallBr->getSuccessor(i: I);
315 if (Succ != Header)
316 continue;
317 BasicBlock *NewSucc = SplitCallBrEdge(CallBrBlock: P, Succ, SuccIdx: I, DTU: &DTU, CI: &CI, LI);
318 CHub.addBranch(BB: NewSucc, Succ0: Succ);
319 LLVM_DEBUG(dbgs() << "Added internal branch: "
320 << printBasicBlock(NewSucc) << " -> "
321 << printBasicBlock(Succ) << '\n');
322 }
323 } else {
324 reportFatalUsageError(reason: "unsupported block terminator: fix-irreducible "
325 "only supports br and callbr instructions");
326 }
327 }
328
329 // Redirect external incoming edges. This includes the edges on the header.
330 Predecessors.clear();
331 for (BasicBlock *E : C.entries()) {
332 for (BasicBlock *P : predecessors(BB: E)) {
333 if (!C.contains(Block: P))
334 Predecessors.insert(X: P);
335 }
336 }
337
338 for (BasicBlock *P : Predecessors) {
339 if (UncondBrInst *Branch = dyn_cast<UncondBrInst>(Val: P->getTerminator())) {
340 BasicBlock *Succ0 = Branch->getSuccessor();
341 Succ0 = C.contains(Block: Succ0) ? Succ0 : nullptr;
342 CHub.addBranch(BB: P, Succ0);
343
344 LLVM_DEBUG(dbgs() << "Added external branch: " << printBasicBlock(P)
345 << " -> " << printBasicBlock(Succ0) << '\n');
346 } else if (CondBrInst *Branch = dyn_cast<CondBrInst>(Val: P->getTerminator())) {
347 BasicBlock *Succ0 = Branch->getSuccessor(i: 0);
348 Succ0 = C.contains(Block: Succ0) ? Succ0 : nullptr;
349 BasicBlock *Succ1 = Branch->getSuccessor(i: 1);
350 Succ1 = C.contains(Block: Succ1) ? Succ1 : nullptr;
351 CHub.addBranch(BB: P, Succ0, Succ1);
352
353 LLVM_DEBUG(dbgs() << "Added external branch: " << printBasicBlock(P)
354 << " -> " << printBasicBlock(Succ0)
355 << (Succ0 && Succ1 ? " " : "") << printBasicBlock(Succ1)
356 << '\n');
357 } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(Val: P->getTerminator())) {
358 for (unsigned I = 0; I < CallBr->getNumSuccessors(); ++I) {
359 BasicBlock *Succ = CallBr->getSuccessor(i: I);
360 if (!C.contains(Block: Succ))
361 continue;
362 BasicBlock *NewSucc = SplitCallBrEdge(CallBrBlock: P, Succ, SuccIdx: I, DTU: &DTU, CI: &CI, LI);
363 CHub.addBranch(BB: NewSucc, Succ0: Succ);
364 LLVM_DEBUG(dbgs() << "Added external branch: "
365 << printBasicBlock(NewSucc) << " -> "
366 << printBasicBlock(Succ) << '\n');
367 }
368 } else {
369 reportFatalUsageError(reason: "unsupported block terminator: fix-irreducible "
370 "only supports br and callbr instructions");
371 }
372 }
373
374 // Redirect all the backedges through a "hub" consisting of a series
375 // of guard blocks that manage the flow of control from the
376 // predecessors to the headers.
377 SmallVector<BasicBlock *> GuardBlocks;
378
379 // Minor optimization: The cycle entries are discovered in an order that is
380 // the opposite of the order in which these blocks appear as branch targets.
381 // This results in a lot of condition inversions in the control flow out of
382 // the new ControlFlowHub, which can be mitigated if the orders match. So we
383 // reverse the entries when adding them to the hub.
384 SetVector<BasicBlock *> Entries;
385 Entries.insert(Start: C.entry_rbegin(), End: C.entry_rend());
386
387 CHub.finalize(DTU: &DTU, GuardBlocks, Prefix: "irr");
388#if defined(EXPENSIVE_CHECKS)
389 assert(DT.verify(DominatorTree::VerificationLevel::Full));
390#else
391 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
392#endif
393
394 // If we are updating LoopInfo, do that now before modifying the cycle. This
395 // ensures that the first guard block is the header of a new natural loop.
396 if (LI)
397 updateLoopInfo(LI&: *LI, C, GuardBlocks);
398
399 for (auto *G : GuardBlocks) {
400 LLVM_DEBUG(dbgs() << "added guard block to cycle: " << G->getName()
401 << "\n");
402 CI.addBlockToCycle(Block: G, Cycle: &C);
403 }
404 C.setSingleEntry(GuardBlocks[0]);
405
406 C.verifyCycle();
407 if (Cycle *Parent = C.getParentCycle())
408 Parent->verifyCycle();
409
410 LLVM_DEBUG(dbgs() << "Finished one cycle:\n"; CI.print(dbgs()););
411 return true;
412}
413
414static bool FixIrreducibleImpl(Function &F, CycleInfo &CI, DominatorTree &DT,
415 LoopInfo *LI) {
416 LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: "
417 << F.getName() << "\n");
418
419 bool Changed = false;
420 for (Cycle *TopCycle : CI.toplevel_cycles()) {
421 for (Cycle *C : depth_first(G: TopCycle)) {
422 Changed |= fixIrreducible(C&: *C, CI, DT, LI);
423 }
424 }
425
426 if (!Changed)
427 return false;
428
429#if defined(EXPENSIVE_CHECKS)
430 CI.verify();
431 if (LI) {
432 LI->verify(DT);
433 }
434#endif // EXPENSIVE_CHECKS
435
436 return true;
437}
438
439bool FixIrreducible::runOnFunction(Function &F) {
440 auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
441 LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
442 auto &CI = getAnalysis<CycleInfoWrapperPass>().getResult();
443 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
444 return FixIrreducibleImpl(F, CI, DT, LI);
445}
446
447PreservedAnalyses FixIrreduciblePass::run(Function &F,
448 FunctionAnalysisManager &AM) {
449 auto *LI = AM.getCachedResult<LoopAnalysis>(IR&: F);
450 auto &CI = AM.getResult<CycleAnalysis>(IR&: F);
451 auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
452
453 if (!FixIrreducibleImpl(F, CI, DT, LI))
454 return PreservedAnalyses::all();
455
456 PreservedAnalyses PA;
457 PA.preserve<LoopAnalysis>();
458 PA.preserve<CycleAnalysis>();
459 PA.preserve<DominatorTreeAnalysis>();
460 return PA;
461}
462