1//===- StructurizeCFG.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/Transforms/Scalar/StructurizeCFG.h"
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/MapVector.h"
12#include "llvm/ADT/SCCIterator.h"
13#include "llvm/ADT/STLExtras.h"
14#include "llvm/ADT/SmallPtrSet.h"
15#include "llvm/ADT/SmallSet.h"
16#include "llvm/ADT/SmallVector.h"
17#include "llvm/Analysis/InstructionSimplify.h"
18#include "llvm/Analysis/RegionInfo.h"
19#include "llvm/Analysis/RegionIterator.h"
20#include "llvm/Analysis/RegionPass.h"
21#include "llvm/Analysis/UniformityAnalysis.h"
22#include "llvm/IR/BasicBlock.h"
23#include "llvm/IR/CFG.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/Dominators.h"
26#include "llvm/IR/Function.h"
27#include "llvm/IR/InstrTypes.h"
28#include "llvm/IR/Instruction.h"
29#include "llvm/IR/Instructions.h"
30#include "llvm/IR/Metadata.h"
31#include "llvm/IR/PassManager.h"
32#include "llvm/IR/PatternMatch.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/Use.h"
35#include "llvm/IR/Value.h"
36#include "llvm/IR/ValueHandle.h"
37#include "llvm/InitializePasses.h"
38#include "llvm/Pass.h"
39#include "llvm/Support/Casting.h"
40#include "llvm/Support/CommandLine.h"
41#include "llvm/Support/Debug.h"
42#include "llvm/Support/raw_ostream.h"
43#include "llvm/Transforms/Scalar.h"
44#include "llvm/Transforms/Utils.h"
45#include "llvm/Transforms/Utils/BasicBlockUtils.h"
46#include "llvm/Transforms/Utils/Local.h"
47#include "llvm/Transforms/Utils/SSAUpdater.h"
48#include <algorithm>
49#include <cassert>
50#include <utility>
51
52using namespace llvm;
53using namespace llvm::PatternMatch;
54
55#define DEBUG_TYPE "structurizecfg"
56
57// The name for newly created blocks.
58const char FlowBlockName[] = "Flow";
59
60namespace {
61
62static cl::opt<bool> ForceSkipUniformRegions(
63 "structurizecfg-skip-uniform-regions",
64 cl::Hidden,
65 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
66 cl::init(Val: false));
67
68static cl::opt<bool>
69 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
70 cl::desc("Allow relaxed uniform region checks"),
71 cl::init(Val: true));
72
73// Definition of the complex types used in this pass.
74
75using BBValuePair = std::pair<BasicBlock *, Value *>;
76
77using RNVector = SmallVector<RegionNode *, 8>;
78using BBVector = SmallVector<BasicBlock *, 8>;
79using BranchVector = SmallVector<BranchInst *, 8>;
80using BBValueVector = SmallVector<BBValuePair, 2>;
81
82using BBSet = SmallPtrSet<BasicBlock *, 8>;
83
84using PhiMap = MapVector<PHINode *, BBValueVector>;
85using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;
86
87using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
88using BBPredicates = DenseMap<BasicBlock *, Value *>;
89using PredMap = DenseMap<BasicBlock *, BBPredicates>;
90using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
91
92using BranchDebugLocMap = DenseMap<BasicBlock *, DebugLoc>;
93
94// A traits type that is intended to be used in graph algorithms. The graph
95// traits starts at an entry node, and traverses the RegionNodes that are in
96// the Nodes set.
97struct SubGraphTraits {
98 using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>;
99 using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType;
100
101 // This wraps a set of Nodes into the iterator, so we know which edges to
102 // filter out.
103 class WrappedSuccIterator
104 : public iterator_adaptor_base<
105 WrappedSuccIterator, BaseSuccIterator,
106 typename std::iterator_traits<BaseSuccIterator>::iterator_category,
107 NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
108 SmallDenseSet<RegionNode *> *Nodes;
109
110 public:
111 WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes)
112 : iterator_adaptor_base(It), Nodes(Nodes) {}
113
114 NodeRef operator*() const { return {*I, Nodes}; }
115 };
116
117 static bool filterAll(const NodeRef &N) { return true; }
118 static bool filterSet(const NodeRef &N) { return N.second->count(V: N.first); }
119
120 using ChildIteratorType =
121 filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>;
122
123 static NodeRef getEntryNode(Region *R) {
124 return {GraphTraits<Region *>::getEntryNode(R), nullptr};
125 }
126
127 static NodeRef getEntryNode(NodeRef N) { return N; }
128
129 static iterator_range<ChildIteratorType> children(const NodeRef &N) {
130 auto *filter = N.second ? &filterSet : &filterAll;
131 return make_filter_range(
132 Range: make_range<WrappedSuccIterator>(
133 x: {GraphTraits<RegionNode *>::child_begin(N: N.first), N.second},
134 y: {GraphTraits<RegionNode *>::child_end(N: N.first), N.second}),
135 Pred: filter);
136 }
137
138 static ChildIteratorType child_begin(const NodeRef &N) {
139 return children(N).begin();
140 }
141
142 static ChildIteratorType child_end(const NodeRef &N) {
143 return children(N).end();
144 }
145};
146
147/// Finds the nearest common dominator of a set of BasicBlocks.
148///
149/// For every BB you add to the set, you can specify whether we "remember" the
150/// block. When you get the common dominator, you can also ask whether it's one
151/// of the blocks we remembered.
152class NearestCommonDominator {
153 DominatorTree *DT;
154 BasicBlock *Result = nullptr;
155 bool ResultIsRemembered = false;
156
157 /// Add BB to the resulting dominator.
158 void addBlock(BasicBlock *BB, bool Remember) {
159 if (!Result) {
160 Result = BB;
161 ResultIsRemembered = Remember;
162 return;
163 }
164
165 BasicBlock *NewResult = DT->findNearestCommonDominator(A: Result, B: BB);
166 if (NewResult != Result)
167 ResultIsRemembered = false;
168 if (NewResult == BB)
169 ResultIsRemembered |= Remember;
170 Result = NewResult;
171 }
172
173public:
174 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {}
175
176 void addBlock(BasicBlock *BB) {
177 addBlock(BB, /* Remember = */ false);
178 }
179
180 void addAndRememberBlock(BasicBlock *BB) {
181 addBlock(BB, /* Remember = */ true);
182 }
183
184 /// Get the nearest common dominator of all the BBs added via addBlock() and
185 /// addAndRememberBlock().
186 BasicBlock *result() { return Result; }
187
188 /// Is the BB returned by getResult() one of the blocks we added to the set
189 /// with addAndRememberBlock()?
190 bool resultIsRememberedBlock() { return ResultIsRemembered; }
191};
192
193/// Transforms the control flow graph on one single entry/exit region
194/// at a time.
195///
196/// After the transform all "If"/"Then"/"Else" style control flow looks like
197/// this:
198///
199/// \verbatim
200/// 1
201/// ||
202/// | |
203/// 2 |
204/// | /
205/// |/
206/// 3
207/// || Where:
208/// | | 1 = "If" block, calculates the condition
209/// 4 | 2 = "Then" subregion, runs if the condition is true
210/// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
211/// |/ 4 = "Else" optional subregion, runs if the condition is false
212/// 5 5 = "End" block, also rejoins the control flow
213/// \endverbatim
214///
215/// Control flow is expressed as a branch where the true exit goes into the
216/// "Then"/"Else" region, while the false exit skips the region
217/// The condition for the optional "Else" region is expressed as a PHI node.
218/// The incoming values of the PHI node are true for the "If" edge and false
219/// for the "Then" edge.
220///
221/// Additionally to that even complicated loops look like this:
222///
223/// \verbatim
224/// 1
225/// ||
226/// | |
227/// 2 ^ Where:
228/// | / 1 = "Entry" block
229/// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
230/// 3 3 = "Flow" block, with back edge to entry block
231/// |
232/// \endverbatim
233///
234/// The back edge of the "Flow" block is always on the false side of the branch
235/// while the true side continues the general flow. So the loop condition
236/// consist of a network of PHI nodes where the true incoming values expresses
237/// breaks and the false values expresses continue states.
238
239class StructurizeCFG {
240 Type *Boolean;
241 ConstantInt *BoolTrue;
242 ConstantInt *BoolFalse;
243 Value *BoolPoison;
244
245 Function *Func;
246 Region *ParentRegion;
247
248 UniformityInfo *UA = nullptr;
249 DominatorTree *DT;
250
251 SmallVector<RegionNode *, 8> Order;
252 BBSet Visited;
253 BBSet FlowSet;
254
255 SmallVector<WeakVH, 8> AffectedPhis;
256 BBPhiMap DeletedPhis;
257 BB2BBVecMap AddedPhis;
258
259 PredMap Predicates;
260 BranchVector Conditions;
261
262 BB2BBMap Loops;
263 PredMap LoopPreds;
264 BranchVector LoopConds;
265
266 BranchDebugLocMap TermDL;
267
268 RegionNode *PrevNode;
269
270 void orderNodes();
271
272 void analyzeLoops(RegionNode *N);
273
274 Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
275
276 void gatherPredicates(RegionNode *N);
277
278 void collectInfos();
279
280 void insertConditions(bool Loops);
281
282 void simplifyConditions();
283
284 void delPhiValues(BasicBlock *From, BasicBlock *To);
285
286 void addPhiValues(BasicBlock *From, BasicBlock *To);
287
288 void findUndefBlocks(BasicBlock *PHIBlock,
289 const SmallSet<BasicBlock *, 8> &Incomings,
290 SmallVector<BasicBlock *> &UndefBlks) const;
291 void setPhiValues();
292
293 void simplifyAffectedPhis();
294
295 void killTerminator(BasicBlock *BB);
296
297 void changeExit(RegionNode *Node, BasicBlock *NewExit,
298 bool IncludeDominator);
299
300 BasicBlock *getNextFlow(BasicBlock *Dominator);
301
302 BasicBlock *needPrefix(bool NeedEmpty);
303
304 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
305
306 void setPrevNode(BasicBlock *BB);
307
308 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
309
310 bool isPredictableTrue(RegionNode *Node);
311
312 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
313
314 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
315
316 void createFlow();
317
318 void rebuildSSA();
319
320public:
321 void init(Region *R);
322 bool run(Region *R, DominatorTree *DT);
323 bool makeUniformRegion(Region *R, UniformityInfo &UA);
324};
325
326class StructurizeCFGLegacyPass : public RegionPass {
327 bool SkipUniformRegions;
328
329public:
330 static char ID;
331
332 explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false)
333 : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) {
334 if (ForceSkipUniformRegions.getNumOccurrences())
335 SkipUniformRegions = ForceSkipUniformRegions.getValue();
336 initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry());
337 }
338
339 bool runOnRegion(Region *R, RGPassManager &RGM) override {
340 StructurizeCFG SCFG;
341 SCFG.init(R);
342 if (SkipUniformRegions) {
343 UniformityInfo &UA =
344 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
345 if (SCFG.makeUniformRegion(R, UA))
346 return false;
347 }
348 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
349 return SCFG.run(R, DT);
350 }
351
352 StringRef getPassName() const override { return "Structurize control flow"; }
353
354 void getAnalysisUsage(AnalysisUsage &AU) const override {
355 if (SkipUniformRegions)
356 AU.addRequired<UniformityInfoWrapperPass>();
357 AU.addRequired<DominatorTreeWrapperPass>();
358
359 AU.addPreserved<DominatorTreeWrapperPass>();
360 RegionPass::getAnalysisUsage(AU);
361 }
362};
363
364} // end anonymous namespace
365
366char StructurizeCFGLegacyPass::ID = 0;
367
368INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
369 "Structurize the CFG", false, false)
370INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
371INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
372INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
373INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
374 "Structurize the CFG", false, false)
375
376/// Build up the general order of nodes, by performing a topological sort of the
377/// parent region's nodes, while ensuring that there is no outer cycle node
378/// between any two inner cycle nodes.
379void StructurizeCFG::orderNodes() {
380 Order.resize(N: std::distance(first: GraphTraits<Region *>::nodes_begin(R: ParentRegion),
381 last: GraphTraits<Region *>::nodes_end(R: ParentRegion)));
382 if (Order.empty())
383 return;
384
385 SmallDenseSet<RegionNode *> Nodes;
386 auto EntryNode = SubGraphTraits::getEntryNode(R: ParentRegion);
387
388 // A list of range indices of SCCs in Order, to be processed.
389 SmallVector<std::pair<unsigned, unsigned>, 8> WorkList;
390 unsigned I = 0, E = Order.size();
391 while (true) {
392 // Run through all the SCCs in the subgraph starting with Entry.
393 for (auto SCCI =
394 scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin(
395 G: EntryNode);
396 !SCCI.isAtEnd(); ++SCCI) {
397 auto &SCC = *SCCI;
398
399 // An SCC up to the size of 2, can be reduced to an entry (the last node),
400 // and a possible additional node. Therefore, it is already in order, and
401 // there is no need to add it to the work-list.
402 unsigned Size = SCC.size();
403 if (Size > 2)
404 WorkList.emplace_back(Args&: I, Args: I + Size);
405
406 // Add the SCC nodes to the Order array.
407 for (const auto &N : SCC) {
408 assert(I < E && "SCC size mismatch!");
409 Order[I++] = N.first;
410 }
411 }
412 assert(I == E && "SCC size mismatch!");
413
414 // If there are no more SCCs to order, then we are done.
415 if (WorkList.empty())
416 break;
417
418 std::tie(args&: I, args&: E) = WorkList.pop_back_val();
419
420 // Collect the set of nodes in the SCC's subgraph. These are only the
421 // possible child nodes; we do not add the entry (last node) otherwise we
422 // will have the same exact SCC all over again.
423 Nodes.clear();
424 Nodes.insert(I: Order.begin() + I, E: Order.begin() + E - 1);
425
426 // Update the entry node.
427 EntryNode.first = Order[E - 1];
428 EntryNode.second = &Nodes;
429 }
430}
431
432/// Determine the end of the loops
433void StructurizeCFG::analyzeLoops(RegionNode *N) {
434 if (N->isSubRegion()) {
435 // Test for exit as back edge
436 BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
437 if (Visited.count(Ptr: Exit))
438 Loops[Exit] = N->getEntry();
439
440 } else {
441 // Test for successors as back edge
442 BasicBlock *BB = N->getNodeAs<BasicBlock>();
443 BranchInst *Term = cast<BranchInst>(Val: BB->getTerminator());
444
445 for (BasicBlock *Succ : Term->successors())
446 if (Visited.count(Ptr: Succ))
447 Loops[Succ] = BB;
448 }
449}
450
451/// Build the condition for one edge
452Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
453 bool Invert) {
454 Value *Cond = Invert ? BoolFalse : BoolTrue;
455 if (Term->isConditional()) {
456 Cond = Term->getCondition();
457
458 if (Idx != (unsigned)Invert)
459 Cond = invertCondition(Condition: Cond);
460 }
461 return Cond;
462}
463
464/// Analyze the predecessors of each block and build up predicates
465void StructurizeCFG::gatherPredicates(RegionNode *N) {
466 RegionInfo *RI = ParentRegion->getRegionInfo();
467 BasicBlock *BB = N->getEntry();
468 BBPredicates &Pred = Predicates[BB];
469 BBPredicates &LPred = LoopPreds[BB];
470
471 for (BasicBlock *P : predecessors(BB)) {
472 // Ignore it if it's a branch from outside into our region entry
473 if (!ParentRegion->contains(BB: P))
474 continue;
475
476 Region *R = RI->getRegionFor(BB: P);
477 if (R == ParentRegion) {
478 // It's a top level block in our region
479 BranchInst *Term = cast<BranchInst>(Val: P->getTerminator());
480 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
481 BasicBlock *Succ = Term->getSuccessor(i);
482 if (Succ != BB)
483 continue;
484
485 if (Visited.count(Ptr: P)) {
486 // Normal forward edge
487 if (Term->isConditional()) {
488 // Try to treat it like an ELSE block
489 BasicBlock *Other = Term->getSuccessor(i: !i);
490 if (Visited.count(Ptr: Other) && !Loops.count(Val: Other) &&
491 !Pred.count(Val: Other) && !Pred.count(Val: P)) {
492
493 Pred[Other] = BoolFalse;
494 Pred[P] = BoolTrue;
495 continue;
496 }
497 }
498 Pred[P] = buildCondition(Term, Idx: i, Invert: false);
499 } else {
500 // Back edge
501 LPred[P] = buildCondition(Term, Idx: i, Invert: true);
502 }
503 }
504 } else {
505 // It's an exit from a sub region
506 while (R->getParent() != ParentRegion)
507 R = R->getParent();
508
509 // Edge from inside a subregion to its entry, ignore it
510 if (*R == *N)
511 continue;
512
513 BasicBlock *Entry = R->getEntry();
514 if (Visited.count(Ptr: Entry))
515 Pred[Entry] = BoolTrue;
516 else
517 LPred[Entry] = BoolFalse;
518 }
519 }
520}
521
522/// Collect various loop and predicate infos
523void StructurizeCFG::collectInfos() {
524 // Reset predicate
525 Predicates.clear();
526
527 // and loop infos
528 Loops.clear();
529 LoopPreds.clear();
530
531 // Reset the visited nodes
532 Visited.clear();
533
534 for (RegionNode *RN : reverse(C&: Order)) {
535 LLVM_DEBUG(dbgs() << "Visiting: "
536 << (RN->isSubRegion() ? "SubRegion with entry: " : "")
537 << RN->getEntry()->getName() << "\n");
538
539 // Analyze all the conditions leading to a node
540 gatherPredicates(N: RN);
541
542 // Remember that we've seen this node
543 Visited.insert(Ptr: RN->getEntry());
544
545 // Find the last back edges
546 analyzeLoops(N: RN);
547 }
548
549 // Reset the collected term debug locations
550 TermDL.clear();
551
552 for (BasicBlock &BB : *Func) {
553 if (const DebugLoc &DL = BB.getTerminator()->getDebugLoc())
554 TermDL[&BB] = DL;
555 }
556}
557
558/// Insert the missing branch conditions
559void StructurizeCFG::insertConditions(bool Loops) {
560 BranchVector &Conds = Loops ? LoopConds : Conditions;
561 Value *Default = Loops ? BoolTrue : BoolFalse;
562 SSAUpdater PhiInserter;
563
564 for (BranchInst *Term : Conds) {
565 assert(Term->isConditional());
566
567 BasicBlock *Parent = Term->getParent();
568 BasicBlock *SuccTrue = Term->getSuccessor(i: 0);
569 BasicBlock *SuccFalse = Term->getSuccessor(i: 1);
570
571 PhiInserter.Initialize(Ty: Boolean, Name: "");
572 PhiInserter.AddAvailableValue(BB: &Func->getEntryBlock(), V: Default);
573 PhiInserter.AddAvailableValue(BB: Loops ? SuccFalse : Parent, V: Default);
574
575 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
576
577 NearestCommonDominator Dominator(DT);
578 Dominator.addBlock(BB: Parent);
579
580 Value *ParentValue = nullptr;
581 for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) {
582 BasicBlock *BB = BBAndPred.first;
583 Value *Pred = BBAndPred.second;
584
585 if (BB == Parent) {
586 ParentValue = Pred;
587 break;
588 }
589 PhiInserter.AddAvailableValue(BB, V: Pred);
590 Dominator.addAndRememberBlock(BB);
591 }
592
593 if (ParentValue) {
594 Term->setCondition(ParentValue);
595 } else {
596 if (!Dominator.resultIsRememberedBlock())
597 PhiInserter.AddAvailableValue(BB: Dominator.result(), V: Default);
598
599 Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(BB: Parent));
600 }
601 }
602}
603
604/// Simplify any inverted conditions that were built by buildConditions.
605void StructurizeCFG::simplifyConditions() {
606 SmallVector<Instruction *> InstToErase;
607 for (auto &I : concat<PredMap::value_type>(Ranges&: Predicates, Ranges&: LoopPreds)) {
608 auto &Preds = I.second;
609 for (auto &J : Preds) {
610 auto &Cond = J.second;
611 Instruction *Inverted;
612 if (match(V: Cond, P: m_Not(V: m_OneUse(SubPattern: m_Instruction(I&: Inverted)))) &&
613 !Cond->use_empty()) {
614 if (auto *InvertedCmp = dyn_cast<CmpInst>(Val: Inverted)) {
615 InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
616 Cond->replaceAllUsesWith(V: InvertedCmp);
617 InstToErase.push_back(Elt: cast<Instruction>(Val: Cond));
618 }
619 }
620 }
621 }
622 for (auto *I : InstToErase)
623 I->eraseFromParent();
624}
625
626/// Remove all PHI values coming from "From" into "To" and remember
627/// them in DeletedPhis
628void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
629 PhiMap &Map = DeletedPhis[To];
630 for (PHINode &Phi : To->phis()) {
631 bool Recorded = false;
632 while (Phi.getBasicBlockIndex(BB: From) != -1) {
633 Value *Deleted = Phi.removeIncomingValue(BB: From, DeletePHIIfEmpty: false);
634 Map[&Phi].push_back(Elt: std::make_pair(x&: From, y&: Deleted));
635 if (!Recorded) {
636 AffectedPhis.push_back(Elt: &Phi);
637 Recorded = true;
638 }
639 }
640 }
641}
642
643/// Add a dummy PHI value as soon as we knew the new predecessor
644void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
645 for (PHINode &Phi : To->phis()) {
646 Value *Undef = UndefValue::get(T: Phi.getType());
647 Phi.addIncoming(V: Undef, BB: From);
648 }
649 AddedPhis[To].push_back(Elt: From);
650}
651
652/// When we are reconstructing a PHI inside \p PHIBlock with incoming values
653/// from predecessors \p Incomings, we have a chance to mark the available value
654/// from some blocks as undefined. The function will find out all such blocks
655/// and return in \p UndefBlks.
656void StructurizeCFG::findUndefBlocks(
657 BasicBlock *PHIBlock, const SmallSet<BasicBlock *, 8> &Incomings,
658 SmallVector<BasicBlock *> &UndefBlks) const {
659 // We may get a post-structured CFG like below:
660 //
661 // | P1
662 // |/
663 // F1
664 // |\
665 // | N
666 // |/
667 // F2
668 // |\
669 // | P2
670 // |/
671 // F3
672 // |\
673 // B
674 //
675 // B is the block that has a PHI being reconstructed. P1/P2 are predecessors
676 // of B before structurization. F1/F2/F3 are flow blocks inserted during
677 // structurization process. Block N is not a predecessor of B before
678 // structurization, but are placed between the predecessors(P1/P2) of B after
679 // structurization. This usually means that threads went to N never take the
680 // path N->F2->F3->B. For example, the threads take the branch F1->N may
681 // always take the branch F2->P2. So, when we are reconstructing a PHI
682 // originally in B, we can safely say the incoming value from N is undefined.
683 SmallSet<BasicBlock *, 8> VisitedBlock;
684 SmallVector<BasicBlock *, 8> Stack;
685 if (PHIBlock == ParentRegion->getExit()) {
686 for (auto P : predecessors(BB: PHIBlock)) {
687 if (ParentRegion->contains(BB: P))
688 Stack.push_back(Elt: P);
689 }
690 } else {
691 append_range(C&: Stack, R: predecessors(BB: PHIBlock));
692 }
693
694 // Do a backward traversal over the CFG, and stop further searching if
695 // the block is not a Flow. If a block is neither flow block nor the
696 // incoming predecessor, then the incoming value from the block is
697 // undefined value for the PHI being reconstructed.
698 while (!Stack.empty()) {
699 BasicBlock *Current = Stack.pop_back_val();
700 if (VisitedBlock.contains(Ptr: Current))
701 continue;
702
703 VisitedBlock.insert(Ptr: Current);
704 if (FlowSet.contains(Ptr: Current)) {
705 for (auto P : predecessors(BB: Current))
706 Stack.push_back(Elt: P);
707 } else if (!Incomings.contains(Ptr: Current)) {
708 UndefBlks.push_back(Elt: Current);
709 }
710 }
711}
712
713/// Add the real PHI value as soon as everything is set up
714void StructurizeCFG::setPhiValues() {
715 SmallVector<PHINode *, 8> InsertedPhis;
716 SSAUpdater Updater(&InsertedPhis);
717 for (const auto &AddedPhi : AddedPhis) {
718 BasicBlock *To = AddedPhi.first;
719 const BBVector &From = AddedPhi.second;
720
721 if (!DeletedPhis.count(Val: To))
722 continue;
723
724 SmallVector<BasicBlock *> UndefBlks;
725 bool CachedUndefs = false;
726 PhiMap &Map = DeletedPhis[To];
727 for (const auto &PI : Map) {
728 PHINode *Phi = PI.first;
729 Value *Undef = UndefValue::get(T: Phi->getType());
730 Updater.Initialize(Ty: Phi->getType(), Name: "");
731 Updater.AddAvailableValue(BB: &Func->getEntryBlock(), V: Undef);
732 Updater.AddAvailableValue(BB: To, V: Undef);
733
734 SmallSet<BasicBlock *, 8> Incomings;
735 SmallVector<BasicBlock *> ConstantPreds;
736 for (const auto &VI : PI.second) {
737 Incomings.insert(Ptr: VI.first);
738 Updater.AddAvailableValue(BB: VI.first, V: VI.second);
739 if (isa<Constant>(Val: VI.second))
740 ConstantPreds.push_back(Elt: VI.first);
741 }
742
743 if (!CachedUndefs) {
744 findUndefBlocks(PHIBlock: To, Incomings, UndefBlks);
745 CachedUndefs = true;
746 }
747
748 for (auto UB : UndefBlks) {
749 // If this undef block is dominated by any predecessor(before
750 // structurization) of reconstructed PHI with constant incoming value,
751 // don't mark the available value as undefined. Setting undef to such
752 // block will stop us from getting optimal phi insertion.
753 if (any_of(Range&: ConstantPreds,
754 P: [&](BasicBlock *CP) { return DT->dominates(A: CP, B: UB); }))
755 continue;
756 Updater.AddAvailableValue(BB: UB, V: Undef);
757 }
758
759 for (BasicBlock *FI : From)
760 Phi->setIncomingValueForBlock(BB: FI, V: Updater.GetValueAtEndOfBlock(BB: FI));
761 AffectedPhis.push_back(Elt: Phi);
762 }
763
764 DeletedPhis.erase(Val: To);
765 }
766 assert(DeletedPhis.empty());
767
768 AffectedPhis.append(in_start: InsertedPhis.begin(), in_end: InsertedPhis.end());
769}
770
771void StructurizeCFG::simplifyAffectedPhis() {
772 bool Changed;
773 do {
774 Changed = false;
775 SimplifyQuery Q(Func->getDataLayout());
776 Q.DT = DT;
777 // Setting CanUseUndef to true might extend value liveness, set it to false
778 // to achieve better register pressure.
779 Q.CanUseUndef = false;
780 for (WeakVH VH : AffectedPhis) {
781 if (auto Phi = dyn_cast_or_null<PHINode>(Val&: VH)) {
782 if (auto NewValue = simplifyInstruction(I: Phi, Q)) {
783 Phi->replaceAllUsesWith(V: NewValue);
784 Phi->eraseFromParent();
785 Changed = true;
786 }
787 }
788 }
789 } while (Changed);
790}
791
792/// Remove phi values from all successors and then remove the terminator.
793void StructurizeCFG::killTerminator(BasicBlock *BB) {
794 Instruction *Term = BB->getTerminator();
795 if (!Term)
796 return;
797
798 for (BasicBlock *Succ : successors(BB))
799 delPhiValues(From: BB, To: Succ);
800
801 Term->eraseFromParent();
802}
803
804/// Let node exit(s) point to NewExit
805void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
806 bool IncludeDominator) {
807 if (Node->isSubRegion()) {
808 Region *SubRegion = Node->getNodeAs<Region>();
809 BasicBlock *OldExit = SubRegion->getExit();
810 BasicBlock *Dominator = nullptr;
811
812 // Find all the edges from the sub region to the exit.
813 // We use make_early_inc_range here because we modify BB's terminator.
814 for (BasicBlock *BB : llvm::make_early_inc_range(Range: predecessors(BB: OldExit))) {
815 if (!SubRegion->contains(BB))
816 continue;
817
818 // Modify the edges to point to the new exit
819 delPhiValues(From: BB, To: OldExit);
820 BB->getTerminator()->replaceUsesOfWith(From: OldExit, To: NewExit);
821 addPhiValues(From: BB, To: NewExit);
822
823 // Find the new dominator (if requested)
824 if (IncludeDominator) {
825 if (!Dominator)
826 Dominator = BB;
827 else
828 Dominator = DT->findNearestCommonDominator(A: Dominator, B: BB);
829 }
830 }
831
832 // Change the dominator (if requested)
833 if (Dominator)
834 DT->changeImmediateDominator(BB: NewExit, NewBB: Dominator);
835
836 // Update the region info
837 SubRegion->replaceExit(BB: NewExit);
838 } else {
839 BasicBlock *BB = Node->getNodeAs<BasicBlock>();
840 killTerminator(BB);
841 BranchInst *Br = BranchInst::Create(IfTrue: NewExit, InsertBefore: BB);
842 Br->setDebugLoc(TermDL[BB]);
843 addPhiValues(From: BB, To: NewExit);
844 if (IncludeDominator)
845 DT->changeImmediateDominator(BB: NewExit, NewBB: BB);
846 }
847}
848
849/// Create a new flow node and update dominator tree and region info
850BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
851 LLVMContext &Context = Func->getContext();
852 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
853 Order.back()->getEntry();
854 BasicBlock *Flow = BasicBlock::Create(Context, Name: FlowBlockName,
855 Parent: Func, InsertBefore: Insert);
856 FlowSet.insert(Ptr: Flow);
857
858 // use a temporary variable to avoid a use-after-free if the map's storage is
859 // reallocated
860 DebugLoc DL = TermDL[Dominator];
861 TermDL[Flow] = std::move(DL);
862
863 DT->addNewBlock(BB: Flow, DomBB: Dominator);
864 ParentRegion->getRegionInfo()->setRegionFor(BB: Flow, R: ParentRegion);
865 return Flow;
866}
867
868/// Create a new or reuse the previous node as flow node
869BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) {
870 BasicBlock *Entry = PrevNode->getEntry();
871
872 if (!PrevNode->isSubRegion()) {
873 killTerminator(BB: Entry);
874 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
875 return Entry;
876 }
877
878 // create a new flow node
879 BasicBlock *Flow = getNextFlow(Dominator: Entry);
880
881 // and wire it up
882 changeExit(Node: PrevNode, NewExit: Flow, IncludeDominator: true);
883 PrevNode = ParentRegion->getBBNode(BB: Flow);
884 return Flow;
885}
886
887/// Returns the region exit if possible, otherwise just a new flow node
888BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
889 bool ExitUseAllowed) {
890 if (!Order.empty() || !ExitUseAllowed)
891 return getNextFlow(Dominator: Flow);
892
893 BasicBlock *Exit = ParentRegion->getExit();
894 DT->changeImmediateDominator(BB: Exit, NewBB: Flow);
895 addPhiValues(From: Flow, To: Exit);
896 return Exit;
897}
898
899/// Set the previous node
900void StructurizeCFG::setPrevNode(BasicBlock *BB) {
901 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
902 : nullptr;
903}
904
905/// Does BB dominate all the predicates of Node?
906bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
907 BBPredicates &Preds = Predicates[Node->getEntry()];
908 return llvm::all_of(Range&: Preds, P: [&](std::pair<BasicBlock *, Value *> Pred) {
909 return DT->dominates(A: BB, B: Pred.first);
910 });
911}
912
913/// Can we predict that this node will always be called?
914bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
915 BBPredicates &Preds = Predicates[Node->getEntry()];
916 bool Dominated = false;
917
918 // Regionentry is always true
919 if (!PrevNode)
920 return true;
921
922 for (std::pair<BasicBlock*, Value*> Pred : Preds) {
923 BasicBlock *BB = Pred.first;
924 Value *V = Pred.second;
925
926 if (V != BoolTrue)
927 return false;
928
929 if (!Dominated && DT->dominates(A: BB, B: PrevNode->getEntry()))
930 Dominated = true;
931 }
932
933 // TODO: The dominator check is too strict
934 return Dominated;
935}
936
937/// Take one node from the order vector and wire it up
938void StructurizeCFG::wireFlow(bool ExitUseAllowed,
939 BasicBlock *LoopEnd) {
940 RegionNode *Node = Order.pop_back_val();
941 Visited.insert(Ptr: Node->getEntry());
942
943 if (isPredictableTrue(Node)) {
944 // Just a linear flow
945 if (PrevNode) {
946 changeExit(Node: PrevNode, NewExit: Node->getEntry(), IncludeDominator: true);
947 }
948 PrevNode = Node;
949 } else {
950 // Insert extra prefix node (or reuse last one)
951 BasicBlock *Flow = needPrefix(NeedEmpty: false);
952
953 // Insert extra postfix node (or use exit instead)
954 BasicBlock *Entry = Node->getEntry();
955 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
956
957 // let it point to entry and next block
958 BranchInst *Br = BranchInst::Create(IfTrue: Entry, IfFalse: Next, Cond: BoolPoison, InsertBefore: Flow);
959 Br->setDebugLoc(TermDL[Flow]);
960 Conditions.push_back(Elt: Br);
961 addPhiValues(From: Flow, To: Entry);
962 DT->changeImmediateDominator(BB: Entry, NewBB: Flow);
963
964 PrevNode = Node;
965 while (!Order.empty() && !Visited.count(Ptr: LoopEnd) &&
966 dominatesPredicates(BB: Entry, Node: Order.back())) {
967 handleLoops(ExitUseAllowed: false, LoopEnd);
968 }
969
970 changeExit(Node: PrevNode, NewExit: Next, IncludeDominator: false);
971 setPrevNode(Next);
972 }
973}
974
975void StructurizeCFG::handleLoops(bool ExitUseAllowed,
976 BasicBlock *LoopEnd) {
977 RegionNode *Node = Order.back();
978 BasicBlock *LoopStart = Node->getEntry();
979
980 if (!Loops.count(Val: LoopStart)) {
981 wireFlow(ExitUseAllowed, LoopEnd);
982 return;
983 }
984
985 if (!isPredictableTrue(Node))
986 LoopStart = needPrefix(NeedEmpty: true);
987
988 LoopEnd = Loops[Node->getEntry()];
989 wireFlow(ExitUseAllowed: false, LoopEnd);
990 while (!Visited.count(Ptr: LoopEnd)) {
991 handleLoops(ExitUseAllowed: false, LoopEnd);
992 }
993
994 assert(LoopStart != &LoopStart->getParent()->getEntryBlock());
995
996 // Create an extra loop end node
997 LoopEnd = needPrefix(NeedEmpty: false);
998 BasicBlock *Next = needPostfix(Flow: LoopEnd, ExitUseAllowed);
999 BranchInst *Br = BranchInst::Create(IfTrue: Next, IfFalse: LoopStart, Cond: BoolPoison, InsertBefore: LoopEnd);
1000 Br->setDebugLoc(TermDL[LoopEnd]);
1001 LoopConds.push_back(Elt: Br);
1002 addPhiValues(From: LoopEnd, To: LoopStart);
1003 setPrevNode(Next);
1004}
1005
1006/// After this function control flow looks like it should be, but
1007/// branches and PHI nodes only have undefined conditions.
1008void StructurizeCFG::createFlow() {
1009 BasicBlock *Exit = ParentRegion->getExit();
1010 bool EntryDominatesExit = DT->dominates(A: ParentRegion->getEntry(), B: Exit);
1011
1012 AffectedPhis.clear();
1013 DeletedPhis.clear();
1014 AddedPhis.clear();
1015 Conditions.clear();
1016 LoopConds.clear();
1017
1018 PrevNode = nullptr;
1019 Visited.clear();
1020
1021 while (!Order.empty()) {
1022 handleLoops(ExitUseAllowed: EntryDominatesExit, LoopEnd: nullptr);
1023 }
1024
1025 if (PrevNode)
1026 changeExit(Node: PrevNode, NewExit: Exit, IncludeDominator: EntryDominatesExit);
1027 else
1028 assert(EntryDominatesExit);
1029}
1030
1031/// Handle a rare case where the disintegrated nodes instructions
1032/// no longer dominate all their uses. Not sure if this is really necessary
1033void StructurizeCFG::rebuildSSA() {
1034 SSAUpdater Updater;
1035 for (BasicBlock *BB : ParentRegion->blocks())
1036 for (Instruction &I : *BB) {
1037 bool Initialized = false;
1038 // We may modify the use list as we iterate over it, so we use
1039 // make_early_inc_range.
1040 for (Use &U : llvm::make_early_inc_range(Range: I.uses())) {
1041 Instruction *User = cast<Instruction>(Val: U.getUser());
1042 if (User->getParent() == BB) {
1043 continue;
1044 } else if (PHINode *UserPN = dyn_cast<PHINode>(Val: User)) {
1045 if (UserPN->getIncomingBlock(U) == BB)
1046 continue;
1047 }
1048
1049 if (DT->dominates(Def: &I, User))
1050 continue;
1051
1052 if (!Initialized) {
1053 Value *Undef = UndefValue::get(T: I.getType());
1054 Updater.Initialize(Ty: I.getType(), Name: "");
1055 Updater.AddAvailableValue(BB: &Func->getEntryBlock(), V: Undef);
1056 Updater.AddAvailableValue(BB, V: &I);
1057 Initialized = true;
1058 }
1059 Updater.RewriteUseAfterInsertions(U);
1060 }
1061 }
1062}
1063
1064static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
1065 const UniformityInfo &UA) {
1066 // Bool for if all sub-regions are uniform.
1067 bool SubRegionsAreUniform = true;
1068 // Count of how many direct children are conditional.
1069 unsigned ConditionalDirectChildren = 0;
1070
1071 for (auto *E : R->elements()) {
1072 if (!E->isSubRegion()) {
1073 auto Br = dyn_cast<BranchInst>(Val: E->getEntry()->getTerminator());
1074 if (!Br || !Br->isConditional())
1075 continue;
1076
1077 if (!UA.isUniform(I: Br))
1078 return false;
1079
1080 // One of our direct children is conditional.
1081 ConditionalDirectChildren++;
1082
1083 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
1084 << " has uniform terminator\n");
1085 } else {
1086 // Explicitly refuse to treat regions as uniform if they have non-uniform
1087 // subregions. We cannot rely on UniformityAnalysis for branches in
1088 // subregions because those branches may have been removed and re-created,
1089 // so we look for our metadata instead.
1090 //
1091 // Warning: It would be nice to treat regions as uniform based only on
1092 // their direct child basic blocks' terminators, regardless of whether
1093 // subregions are uniform or not. However, this requires a very careful
1094 // look at SIAnnotateControlFlow to make sure nothing breaks there.
1095 for (auto *BB : E->getNodeAs<Region>()->blocks()) {
1096 auto Br = dyn_cast<BranchInst>(Val: BB->getTerminator());
1097 if (!Br || !Br->isConditional())
1098 continue;
1099
1100 if (!Br->getMetadata(KindID: UniformMDKindID)) {
1101 // Early exit if we cannot have relaxed uniform regions.
1102 if (!RelaxedUniformRegions)
1103 return false;
1104
1105 SubRegionsAreUniform = false;
1106 break;
1107 }
1108 }
1109 }
1110 }
1111
1112 // Our region is uniform if:
1113 // 1. All conditional branches that are direct children are uniform (checked
1114 // above).
1115 // 2. And either:
1116 // a. All sub-regions are uniform.
1117 // b. There is one or less conditional branches among the direct children.
1118 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
1119}
1120
1121void StructurizeCFG::init(Region *R) {
1122 LLVMContext &Context = R->getEntry()->getContext();
1123
1124 Boolean = Type::getInt1Ty(C&: Context);
1125 BoolTrue = ConstantInt::getTrue(Context);
1126 BoolFalse = ConstantInt::getFalse(Context);
1127 BoolPoison = PoisonValue::get(T: Boolean);
1128
1129 this->UA = nullptr;
1130}
1131
1132bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
1133 if (R->isTopLevelRegion())
1134 return false;
1135
1136 this->UA = &UA;
1137
1138 // TODO: We could probably be smarter here with how we handle sub-regions.
1139 // We currently rely on the fact that metadata is set by earlier invocations
1140 // of the pass on sub-regions, and that this metadata doesn't get lost --
1141 // but we shouldn't rely on metadata for correctness!
1142 unsigned UniformMDKindID =
1143 R->getEntry()->getContext().getMDKindID(Name: "structurizecfg.uniform");
1144
1145 if (hasOnlyUniformBranches(R, UniformMDKindID, UA)) {
1146 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
1147 << '\n');
1148
1149 // Mark all direct child block terminators as having been treated as
1150 // uniform. To account for a possible future in which non-uniform
1151 // sub-regions are treated more cleverly, indirect children are not
1152 // marked as uniform.
1153 MDNode *MD = MDNode::get(Context&: R->getEntry()->getParent()->getContext(), MDs: {});
1154 for (RegionNode *E : R->elements()) {
1155 if (E->isSubRegion())
1156 continue;
1157
1158 if (Instruction *Term = E->getEntry()->getTerminator())
1159 Term->setMetadata(KindID: UniformMDKindID, Node: MD);
1160 }
1161
1162 return true;
1163 }
1164 return false;
1165}
1166
1167/// Run the transformation for each region found
1168bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
1169 if (R->isTopLevelRegion())
1170 return false;
1171
1172 this->DT = DT;
1173
1174 Func = R->getEntry()->getParent();
1175 assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
1176
1177 ParentRegion = R;
1178
1179 orderNodes();
1180 collectInfos();
1181 createFlow();
1182 insertConditions(Loops: false);
1183 insertConditions(Loops: true);
1184 setPhiValues();
1185 simplifyConditions();
1186 simplifyAffectedPhis();
1187 rebuildSSA();
1188
1189 // Cleanup
1190 Order.clear();
1191 Visited.clear();
1192 DeletedPhis.clear();
1193 AddedPhis.clear();
1194 Predicates.clear();
1195 Conditions.clear();
1196 Loops.clear();
1197 LoopPreds.clear();
1198 LoopConds.clear();
1199 FlowSet.clear();
1200 TermDL.clear();
1201
1202 return true;
1203}
1204
1205Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) {
1206 return new StructurizeCFGLegacyPass(SkipUniformRegions);
1207}
1208
1209static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) {
1210 Regions.push_back(x: &R);
1211 for (const auto &E : R)
1212 addRegionIntoQueue(R&: *E, Regions);
1213}
1214
1215PreservedAnalyses StructurizeCFGPass::run(Function &F,
1216 FunctionAnalysisManager &AM) {
1217
1218 bool Changed = false;
1219 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(IR&: F);
1220 auto &RI = AM.getResult<RegionInfoAnalysis>(IR&: F);
1221 std::vector<Region *> Regions;
1222 addRegionIntoQueue(R&: *RI.getTopLevelRegion(), Regions);
1223 while (!Regions.empty()) {
1224 Region *R = Regions.back();
1225 StructurizeCFG SCFG;
1226 SCFG.init(R);
1227 Changed |= SCFG.run(R, DT);
1228 Regions.pop_back();
1229 }
1230 if (!Changed)
1231 return PreservedAnalyses::all();
1232 PreservedAnalyses PA;
1233 PA.preserve<DominatorTreeAnalysis>();
1234 return PA;
1235}
1236