1//===- StructurizeCFG.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/Transforms/Scalar/StructurizeCFG.h"
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/EquivalenceClasses.h"
12#include "llvm/ADT/MapVector.h"
13#include "llvm/ADT/SCCIterator.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/ADT/SmallPtrSet.h"
16#include "llvm/ADT/SmallSet.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/Analysis/InstructionSimplify.h"
19#include "llvm/Analysis/RegionInfo.h"
20#include "llvm/Analysis/RegionIterator.h"
21#include "llvm/Analysis/RegionPass.h"
22#include "llvm/Analysis/UniformityAnalysis.h"
23#include "llvm/IR/BasicBlock.h"
24#include "llvm/IR/CFG.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/Dominators.h"
27#include "llvm/IR/Function.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
30#include "llvm/IR/Instructions.h"
31#include "llvm/IR/Metadata.h"
32#include "llvm/IR/PassManager.h"
33#include "llvm/IR/PatternMatch.h"
34#include "llvm/IR/ProfDataUtils.h"
35#include "llvm/IR/Type.h"
36#include "llvm/IR/Use.h"
37#include "llvm/IR/Value.h"
38#include "llvm/IR/ValueHandle.h"
39#include "llvm/InitializePasses.h"
40#include "llvm/Pass.h"
41#include "llvm/Support/Casting.h"
42#include "llvm/Support/CommandLine.h"
43#include "llvm/Support/Debug.h"
44#include "llvm/Support/raw_ostream.h"
45#include "llvm/Transforms/Scalar.h"
46#include "llvm/Transforms/Utils.h"
47#include "llvm/Transforms/Utils/BasicBlockUtils.h"
48#include "llvm/Transforms/Utils/Local.h"
49#include "llvm/Transforms/Utils/SSAUpdater.h"
50#include <cassert>
51#include <utility>
52
53using namespace llvm;
54using namespace llvm::PatternMatch;
55
56#define DEBUG_TYPE "structurizecfg"
57
58// The name for newly created blocks.
59const char FlowBlockName[] = "Flow";
60
61namespace {
62
63static cl::opt<bool> ForceSkipUniformRegions(
64 "structurizecfg-skip-uniform-regions",
65 cl::Hidden,
66 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
67 cl::init(Val: false));
68
69static cl::opt<bool>
70 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
71 cl::desc("Allow relaxed uniform region checks"),
72 cl::init(Val: true));
73
74// Definition of the complex types used in this pass.
75
76using BBValuePair = std::pair<BasicBlock *, Value *>;
77
78using RNVector = SmallVector<RegionNode *, 8>;
79using BBVector = SmallVector<BasicBlock *, 8>;
80using BranchVector = SmallVector<BranchInst *, 8>;
81using BBValueVector = SmallVector<BBValuePair, 2>;
82
83using BBSet = SmallPtrSet<BasicBlock *, 8>;
84
85using PhiMap = MapVector<PHINode *, BBValueVector>;
86using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;
87
88using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
89
90using MaybeCondBranchWeights = std::optional<class CondBranchWeights>;
91
92class CondBranchWeights {
93 uint32_t TrueWeight;
94 uint32_t FalseWeight;
95
96 CondBranchWeights(uint32_t T, uint32_t F) : TrueWeight(T), FalseWeight(F) {}
97
98public:
99 static MaybeCondBranchWeights tryParse(const BranchInst &Br) {
100 assert(Br.isConditional());
101
102 uint64_t T, F;
103 if (!extractBranchWeights(I: Br, TrueVal&: T, FalseVal&: F))
104 return std::nullopt;
105
106 return CondBranchWeights(T, F);
107 }
108
109 static void setMetadata(BranchInst &Br,
110 const MaybeCondBranchWeights &Weights) {
111 assert(Br.isConditional());
112 if (!Weights)
113 return;
114 uint32_t Arr[] = {Weights->TrueWeight, Weights->FalseWeight};
115 setBranchWeights(I&: Br, Weights: Arr, IsExpected: false);
116 }
117
118 CondBranchWeights invert() const {
119 return CondBranchWeights{FalseWeight, TrueWeight};
120 }
121};
122
123struct PredInfo {
124 Value *Pred;
125 MaybeCondBranchWeights Weights;
126};
127
128using BBPredicates = DenseMap<BasicBlock *, PredInfo>;
129using PredMap = DenseMap<BasicBlock *, BBPredicates>;
130using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
131
132// A traits type that is intended to be used in graph algorithms. The graph
133// traits starts at an entry node, and traverses the RegionNodes that are in
134// the Nodes set.
135struct SubGraphTraits {
136 using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>;
137 using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType;
138
139 // This wraps a set of Nodes into the iterator, so we know which edges to
140 // filter out.
141 class WrappedSuccIterator
142 : public iterator_adaptor_base<
143 WrappedSuccIterator, BaseSuccIterator,
144 typename std::iterator_traits<BaseSuccIterator>::iterator_category,
145 NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
146 SmallDenseSet<RegionNode *> *Nodes;
147
148 public:
149 WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes)
150 : iterator_adaptor_base(It), Nodes(Nodes) {}
151
152 NodeRef operator*() const { return {*I, Nodes}; }
153 };
154
155 static bool filterAll(const NodeRef &N) { return true; }
156 static bool filterSet(const NodeRef &N) { return N.second->count(V: N.first); }
157
158 using ChildIteratorType =
159 filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>;
160
161 static NodeRef getEntryNode(Region *R) {
162 return {GraphTraits<Region *>::getEntryNode(R), nullptr};
163 }
164
165 static NodeRef getEntryNode(NodeRef N) { return N; }
166
167 static iterator_range<ChildIteratorType> children(const NodeRef &N) {
168 auto *filter = N.second ? &filterSet : &filterAll;
169 return make_filter_range(
170 Range: make_range<WrappedSuccIterator>(
171 x: {GraphTraits<RegionNode *>::child_begin(N: N.first), N.second},
172 y: {GraphTraits<RegionNode *>::child_end(N: N.first), N.second}),
173 Pred: filter);
174 }
175
176 static ChildIteratorType child_begin(const NodeRef &N) {
177 return children(N).begin();
178 }
179
180 static ChildIteratorType child_end(const NodeRef &N) {
181 return children(N).end();
182 }
183};
184
185/// Finds the nearest common dominator of a set of BasicBlocks.
186///
187/// For every BB you add to the set, you can specify whether we "remember" the
188/// block. When you get the common dominator, you can also ask whether it's one
189/// of the blocks we remembered.
190class NearestCommonDominator {
191 DominatorTree *DT;
192 BasicBlock *Result = nullptr;
193 bool ResultIsRemembered = false;
194
195 /// Add BB to the resulting dominator.
196 void addBlock(BasicBlock *BB, bool Remember) {
197 if (!Result) {
198 Result = BB;
199 ResultIsRemembered = Remember;
200 return;
201 }
202
203 BasicBlock *NewResult = DT->findNearestCommonDominator(A: Result, B: BB);
204 if (NewResult != Result)
205 ResultIsRemembered = false;
206 if (NewResult == BB)
207 ResultIsRemembered |= Remember;
208 Result = NewResult;
209 }
210
211public:
212 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {}
213
214 void addBlock(BasicBlock *BB) {
215 addBlock(BB, /* Remember = */ false);
216 }
217
218 void addAndRememberBlock(BasicBlock *BB) {
219 addBlock(BB, /* Remember = */ true);
220 }
221
222 /// Get the nearest common dominator of all the BBs added via addBlock() and
223 /// addAndRememberBlock().
224 BasicBlock *result() { return Result; }
225
226 /// Is the BB returned by getResult() one of the blocks we added to the set
227 /// with addAndRememberBlock()?
228 bool resultIsRememberedBlock() { return ResultIsRemembered; }
229};
230
231/// Transforms the control flow graph on one single entry/exit region
232/// at a time.
233///
234/// After the transform all "If"/"Then"/"Else" style control flow looks like
235/// this:
236///
237/// \verbatim
238/// 1
239/// ||
240/// | |
241/// 2 |
242/// | /
243/// |/
244/// 3
245/// || Where:
246/// | | 1 = "If" block, calculates the condition
247/// 4 | 2 = "Then" subregion, runs if the condition is true
248/// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
249/// |/ 4 = "Else" optional subregion, runs if the condition is false
250/// 5 5 = "End" block, also rejoins the control flow
251/// \endverbatim
252///
253/// Control flow is expressed as a branch where the true exit goes into the
254/// "Then"/"Else" region, while the false exit skips the region
255/// The condition for the optional "Else" region is expressed as a PHI node.
256/// The incoming values of the PHI node are true for the "If" edge and false
257/// for the "Then" edge.
258///
259/// Additionally to that even complicated loops look like this:
260///
261/// \verbatim
262/// 1
263/// ||
264/// | |
265/// 2 ^ Where:
266/// | / 1 = "Entry" block
267/// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
268/// 3 3 = "Flow" block, with back edge to entry block
269/// |
270/// \endverbatim
271///
272/// The back edge of the "Flow" block is always on the false side of the branch
273/// while the true side continues the general flow. So the loop condition
274/// consist of a network of PHI nodes where the true incoming values expresses
275/// breaks and the false values expresses continue states.
276
277class StructurizeCFG {
278 Type *Boolean;
279 ConstantInt *BoolTrue;
280 ConstantInt *BoolFalse;
281 Value *BoolPoison;
282
283 Function *Func;
284 Region *ParentRegion;
285
286 UniformityInfo *UA = nullptr;
287 DominatorTree *DT;
288
289 SmallVector<RegionNode *, 8> Order;
290 BBSet Visited;
291 BBSet FlowSet;
292
293 SmallVector<WeakVH, 8> AffectedPhis;
294 BBPhiMap DeletedPhis;
295 BB2BBVecMap AddedPhis;
296
297 PredMap Predicates;
298 BranchVector Conditions;
299
300 BB2BBMap Loops;
301 PredMap LoopPreds;
302 BranchVector LoopConds;
303
304 RegionNode *PrevNode;
305
306 void orderNodes();
307
308 void analyzeLoops(RegionNode *N);
309
310 PredInfo buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
311
312 void gatherPredicates(RegionNode *N);
313
314 void collectInfos();
315
316 void insertConditions(bool Loops);
317
318 void simplifyConditions();
319
320 void delPhiValues(BasicBlock *From, BasicBlock *To);
321
322 void addPhiValues(BasicBlock *From, BasicBlock *To);
323
324 void findUndefBlocks(BasicBlock *PHIBlock,
325 const SmallSet<BasicBlock *, 8> &Incomings,
326 SmallVector<BasicBlock *> &UndefBlks) const;
327
328 void mergeIfCompatible(EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A,
329 PHINode *B);
330
331 void setPhiValues();
332
333 void simplifyAffectedPhis();
334
335 DebugLoc killTerminator(BasicBlock *BB);
336
337 void changeExit(RegionNode *Node, BasicBlock *NewExit,
338 bool IncludeDominator);
339
340 BasicBlock *getNextFlow(BasicBlock *Dominator);
341
342 std::pair<BasicBlock *, DebugLoc> needPrefix(bool NeedEmpty);
343
344 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
345
346 void setPrevNode(BasicBlock *BB);
347
348 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
349
350 bool isPredictableTrue(RegionNode *Node);
351
352 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
353
354 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
355
356 void createFlow();
357
358 void rebuildSSA();
359
360public:
361 void init(Region *R);
362 bool run(Region *R, DominatorTree *DT);
363 bool makeUniformRegion(Region *R, UniformityInfo &UA);
364};
365
366class StructurizeCFGLegacyPass : public RegionPass {
367 bool SkipUniformRegions;
368
369public:
370 static char ID;
371
372 explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false)
373 : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) {
374 if (ForceSkipUniformRegions.getNumOccurrences())
375 SkipUniformRegions = ForceSkipUniformRegions.getValue();
376 initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry());
377 }
378
379 bool runOnRegion(Region *R, RGPassManager &RGM) override {
380 StructurizeCFG SCFG;
381 SCFG.init(R);
382 if (SkipUniformRegions) {
383 UniformityInfo &UA =
384 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
385 if (SCFG.makeUniformRegion(R, UA))
386 return false;
387 }
388 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
389 return SCFG.run(R, DT);
390 }
391
392 StringRef getPassName() const override { return "Structurize control flow"; }
393
394 void getAnalysisUsage(AnalysisUsage &AU) const override {
395 if (SkipUniformRegions)
396 AU.addRequired<UniformityInfoWrapperPass>();
397 AU.addRequired<DominatorTreeWrapperPass>();
398
399 AU.addPreserved<DominatorTreeWrapperPass>();
400 RegionPass::getAnalysisUsage(AU);
401 }
402};
403
404} // end anonymous namespace
405
406char StructurizeCFGLegacyPass::ID = 0;
407
408INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
409 "Structurize the CFG", false, false)
410INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
411INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
412INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
413INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
414 "Structurize the CFG", false, false)
415
416/// Build up the general order of nodes, by performing a topological sort of the
417/// parent region's nodes, while ensuring that there is no outer cycle node
418/// between any two inner cycle nodes.
419void StructurizeCFG::orderNodes() {
420 Order.resize(N: std::distance(first: GraphTraits<Region *>::nodes_begin(R: ParentRegion),
421 last: GraphTraits<Region *>::nodes_end(R: ParentRegion)));
422 if (Order.empty())
423 return;
424
425 SmallDenseSet<RegionNode *> Nodes;
426 auto EntryNode = SubGraphTraits::getEntryNode(R: ParentRegion);
427
428 // A list of range indices of SCCs in Order, to be processed.
429 SmallVector<std::pair<unsigned, unsigned>, 8> WorkList;
430 unsigned I = 0, E = Order.size();
431 while (true) {
432 // Run through all the SCCs in the subgraph starting with Entry.
433 for (auto SCCI =
434 scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin(
435 G: EntryNode);
436 !SCCI.isAtEnd(); ++SCCI) {
437 auto &SCC = *SCCI;
438
439 // An SCC up to the size of 2, can be reduced to an entry (the last node),
440 // and a possible additional node. Therefore, it is already in order, and
441 // there is no need to add it to the work-list.
442 unsigned Size = SCC.size();
443 if (Size > 2)
444 WorkList.emplace_back(Args&: I, Args: I + Size);
445
446 // Add the SCC nodes to the Order array.
447 for (const auto &N : SCC) {
448 assert(I < E && "SCC size mismatch!");
449 Order[I++] = N.first;
450 }
451 }
452 assert(I == E && "SCC size mismatch!");
453
454 // If there are no more SCCs to order, then we are done.
455 if (WorkList.empty())
456 break;
457
458 std::tie(args&: I, args&: E) = WorkList.pop_back_val();
459
460 // Collect the set of nodes in the SCC's subgraph. These are only the
461 // possible child nodes; we do not add the entry (last node) otherwise we
462 // will have the same exact SCC all over again.
463 Nodes.clear();
464 Nodes.insert(I: Order.begin() + I, E: Order.begin() + E - 1);
465
466 // Update the entry node.
467 EntryNode.first = Order[E - 1];
468 EntryNode.second = &Nodes;
469 }
470}
471
472/// Determine the end of the loops
473void StructurizeCFG::analyzeLoops(RegionNode *N) {
474 if (N->isSubRegion()) {
475 // Test for exit as back edge
476 BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
477 if (Visited.count(Ptr: Exit))
478 Loops[Exit] = N->getEntry();
479
480 } else {
481 // Test for successors as back edge
482 BasicBlock *BB = N->getNodeAs<BasicBlock>();
483 BranchInst *Term = cast<BranchInst>(Val: BB->getTerminator());
484
485 for (BasicBlock *Succ : Term->successors())
486 if (Visited.count(Ptr: Succ))
487 Loops[Succ] = BB;
488 }
489}
490
491/// Build the condition for one edge
492PredInfo StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
493 bool Invert) {
494 Value *Cond = Invert ? BoolFalse : BoolTrue;
495 MaybeCondBranchWeights Weights;
496
497 if (Term->isConditional()) {
498 Cond = Term->getCondition();
499 Weights = CondBranchWeights::tryParse(Br: *Term);
500
501 if (Idx != (unsigned)Invert) {
502 Cond = invertCondition(Condition: Cond);
503 if (Weights)
504 Weights = Weights->invert();
505 }
506 }
507 return {.Pred: Cond, .Weights: Weights};
508}
509
510/// Analyze the predecessors of each block and build up predicates
511void StructurizeCFG::gatherPredicates(RegionNode *N) {
512 RegionInfo *RI = ParentRegion->getRegionInfo();
513 BasicBlock *BB = N->getEntry();
514 BBPredicates &Pred = Predicates[BB];
515 BBPredicates &LPred = LoopPreds[BB];
516
517 for (BasicBlock *P : predecessors(BB)) {
518 // Ignore it if it's a branch from outside into our region entry
519 if (!ParentRegion->contains(BB: P))
520 continue;
521
522 Region *R = RI->getRegionFor(BB: P);
523 if (R == ParentRegion) {
524 // It's a top level block in our region
525 BranchInst *Term = cast<BranchInst>(Val: P->getTerminator());
526 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
527 BasicBlock *Succ = Term->getSuccessor(i);
528 if (Succ != BB)
529 continue;
530
531 if (Visited.count(Ptr: P)) {
532 // Normal forward edge
533 if (Term->isConditional()) {
534 // Try to treat it like an ELSE block
535 BasicBlock *Other = Term->getSuccessor(i: !i);
536 if (Visited.count(Ptr: Other) && !Loops.count(Val: Other) &&
537 !Pred.count(Val: Other) && !Pred.count(Val: P)) {
538
539 Pred[Other] = {.Pred: BoolFalse, .Weights: std::nullopt};
540 Pred[P] = {.Pred: BoolTrue, .Weights: std::nullopt};
541 continue;
542 }
543 }
544 Pred[P] = buildCondition(Term, Idx: i, Invert: false);
545 } else {
546 // Back edge
547 LPred[P] = buildCondition(Term, Idx: i, Invert: true);
548 }
549 }
550 } else {
551 // It's an exit from a sub region
552 while (R->getParent() != ParentRegion)
553 R = R->getParent();
554
555 // Edge from inside a subregion to its entry, ignore it
556 if (*R == *N)
557 continue;
558
559 BasicBlock *Entry = R->getEntry();
560 if (Visited.count(Ptr: Entry))
561 Pred[Entry] = {.Pred: BoolTrue, .Weights: std::nullopt};
562 else
563 LPred[Entry] = {.Pred: BoolFalse, .Weights: std::nullopt};
564 }
565 }
566}
567
568/// Collect various loop and predicate infos
569void StructurizeCFG::collectInfos() {
570 // Reset predicate
571 Predicates.clear();
572
573 // and loop infos
574 Loops.clear();
575 LoopPreds.clear();
576
577 // Reset the visited nodes
578 Visited.clear();
579
580 for (RegionNode *RN : reverse(C&: Order)) {
581 LLVM_DEBUG(dbgs() << "Visiting: "
582 << (RN->isSubRegion() ? "SubRegion with entry: " : "")
583 << RN->getEntry()->getName() << "\n");
584
585 // Analyze all the conditions leading to a node
586 gatherPredicates(N: RN);
587
588 // Remember that we've seen this node
589 Visited.insert(Ptr: RN->getEntry());
590
591 // Find the last back edges
592 analyzeLoops(N: RN);
593 }
594}
595
596/// Insert the missing branch conditions
597void StructurizeCFG::insertConditions(bool Loops) {
598 BranchVector &Conds = Loops ? LoopConds : Conditions;
599 Value *Default = Loops ? BoolTrue : BoolFalse;
600 SSAUpdater PhiInserter;
601
602 for (BranchInst *Term : Conds) {
603 assert(Term->isConditional());
604
605 BasicBlock *Parent = Term->getParent();
606 BasicBlock *SuccTrue = Term->getSuccessor(i: 0);
607 BasicBlock *SuccFalse = Term->getSuccessor(i: 1);
608
609 PhiInserter.Initialize(Ty: Boolean, Name: "");
610 PhiInserter.AddAvailableValue(BB: Loops ? SuccFalse : Parent, V: Default);
611
612 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
613
614 NearestCommonDominator Dominator(DT);
615 Dominator.addBlock(BB: Parent);
616
617 PredInfo ParentInfo{.Pred: nullptr, .Weights: std::nullopt};
618 for (auto [BB, PI] : Preds) {
619 if (BB == Parent) {
620 ParentInfo = PI;
621 break;
622 }
623 PhiInserter.AddAvailableValue(BB, V: PI.Pred);
624 Dominator.addAndRememberBlock(BB);
625 }
626
627 if (ParentInfo.Pred) {
628 Term->setCondition(ParentInfo.Pred);
629 CondBranchWeights::setMetadata(Br&: *Term, Weights: ParentInfo.Weights);
630 } else {
631 if (!Dominator.resultIsRememberedBlock())
632 PhiInserter.AddAvailableValue(BB: Dominator.result(), V: Default);
633
634 Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(BB: Parent));
635 }
636 }
637}
638
639/// Simplify any inverted conditions that were built by buildConditions.
640void StructurizeCFG::simplifyConditions() {
641 SmallVector<Instruction *> InstToErase;
642 for (auto &I : concat<PredMap::value_type>(Ranges&: Predicates, Ranges&: LoopPreds)) {
643 auto &Preds = I.second;
644 for (auto [BB, PI] : Preds) {
645 Instruction *Inverted;
646 if (match(V: PI.Pred, P: m_Not(V: m_OneUse(SubPattern: m_Instruction(I&: Inverted)))) &&
647 !PI.Pred->use_empty()) {
648 if (auto *InvertedCmp = dyn_cast<CmpInst>(Val: Inverted)) {
649 InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
650 PI.Pred->replaceAllUsesWith(V: InvertedCmp);
651 InstToErase.push_back(Elt: cast<Instruction>(Val: PI.Pred));
652 }
653 }
654 }
655 }
656 for (auto *I : InstToErase)
657 I->eraseFromParent();
658}
659
660/// Remove all PHI values coming from "From" into "To" and remember
661/// them in DeletedPhis
662void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
663 PhiMap &Map = DeletedPhis[To];
664 for (PHINode &Phi : To->phis()) {
665 bool Recorded = false;
666 while (Phi.getBasicBlockIndex(BB: From) != -1) {
667 Value *Deleted = Phi.removeIncomingValue(BB: From, DeletePHIIfEmpty: false);
668 Map[&Phi].push_back(Elt: std::make_pair(x&: From, y&: Deleted));
669 if (!Recorded) {
670 AffectedPhis.push_back(Elt: &Phi);
671 Recorded = true;
672 }
673 }
674 }
675}
676
677/// Add a dummy PHI value as soon as we knew the new predecessor
678void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
679 for (PHINode &Phi : To->phis()) {
680 Value *Poison = PoisonValue::get(T: Phi.getType());
681 Phi.addIncoming(V: Poison, BB: From);
682 }
683 AddedPhis[To].push_back(Elt: From);
684}
685
686/// When we are reconstructing a PHI inside \p PHIBlock with incoming values
687/// from predecessors \p Incomings, we have a chance to mark the available value
688/// from some blocks as undefined. The function will find out all such blocks
689/// and return in \p UndefBlks.
690void StructurizeCFG::findUndefBlocks(
691 BasicBlock *PHIBlock, const SmallSet<BasicBlock *, 8> &Incomings,
692 SmallVector<BasicBlock *> &UndefBlks) const {
693 // We may get a post-structured CFG like below:
694 //
695 // | P1
696 // |/
697 // F1
698 // |\
699 // | N
700 // |/
701 // F2
702 // |\
703 // | P2
704 // |/
705 // F3
706 // |\
707 // B
708 //
709 // B is the block that has a PHI being reconstructed. P1/P2 are predecessors
710 // of B before structurization. F1/F2/F3 are flow blocks inserted during
711 // structurization process. Block N is not a predecessor of B before
712 // structurization, but are placed between the predecessors(P1/P2) of B after
713 // structurization. This usually means that threads went to N never take the
714 // path N->F2->F3->B. For example, the threads take the branch F1->N may
715 // always take the branch F2->P2. So, when we are reconstructing a PHI
716 // originally in B, we can safely say the incoming value from N is undefined.
717 SmallSet<BasicBlock *, 8> VisitedBlock;
718 SmallVector<BasicBlock *, 8> Stack;
719 if (PHIBlock == ParentRegion->getExit()) {
720 for (auto P : predecessors(BB: PHIBlock)) {
721 if (ParentRegion->contains(BB: P))
722 Stack.push_back(Elt: P);
723 }
724 } else {
725 append_range(C&: Stack, R: predecessors(BB: PHIBlock));
726 }
727
728 // Do a backward traversal over the CFG, and stop further searching if
729 // the block is not a Flow. If a block is neither flow block nor the
730 // incoming predecessor, then the incoming value from the block is
731 // undefined value for the PHI being reconstructed.
732 while (!Stack.empty()) {
733 BasicBlock *Current = Stack.pop_back_val();
734 if (!VisitedBlock.insert(Ptr: Current).second)
735 continue;
736
737 if (FlowSet.contains(Ptr: Current))
738 llvm::append_range(C&: Stack, R: predecessors(BB: Current));
739 else if (!Incomings.contains(Ptr: Current))
740 UndefBlks.push_back(Elt: Current);
741 }
742}
743
744// If two phi nodes have compatible incoming values (for each
745// incoming block, either they have the same incoming value or only one phi
746// node has an incoming value), let them share the merged incoming values. The
747// merge process is guided by the equivalence information from \p PhiClasses.
748// The function will possibly update the incoming values of leader phi in
749// DeletedPhis.
750void StructurizeCFG::mergeIfCompatible(
751 EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A, PHINode *B) {
752 auto ItA = PhiClasses.findLeader(ECV: PhiClasses.insert(Data: A));
753 auto ItB = PhiClasses.findLeader(ECV: PhiClasses.insert(Data: B));
754 // They are already in the same class, no work needed.
755 if (ItA == ItB)
756 return;
757
758 PHINode *LeaderA = *ItA;
759 PHINode *LeaderB = *ItB;
760 BBValueVector &IncomingA = DeletedPhis[LeaderA->getParent()][LeaderA];
761 BBValueVector &IncomingB = DeletedPhis[LeaderB->getParent()][LeaderB];
762
763 DenseMap<BasicBlock *, Value *> Mergeable(IncomingA.begin(), IncomingA.end());
764 for (auto [BB, V] : IncomingB) {
765 auto BBIt = Mergeable.find(Val: BB);
766 if (BBIt != Mergeable.end() && BBIt->second != V)
767 return;
768 // Either IncomingA does not have this value or IncomingA has the same
769 // value.
770 Mergeable.insert(KV: {BB, V});
771 }
772
773 // Update the incoming value of leaderA.
774 IncomingA.assign(in_start: Mergeable.begin(), in_end: Mergeable.end());
775 PhiClasses.unionSets(L1: ItA, L2: ItB);
776}
777
778/// Add the real PHI value as soon as everything is set up
779void StructurizeCFG::setPhiValues() {
780 SmallVector<PHINode *, 8> InsertedPhis;
781 SSAUpdater Updater(&InsertedPhis);
782 DenseMap<BasicBlock *, SmallVector<BasicBlock *>> UndefBlksMap;
783
784 // Find phi nodes that have compatible incoming values (either they have
785 // the same value for the same block or only one phi node has an incoming
786 // value, see example below). We only search again the phi's that are
787 // referenced by another phi, which is the case we care about.
788 //
789 // For example (-- means no incoming value):
790 // phi1 : BB1:phi2 BB2:v BB3:--
791 // phi2: BB1:-- BB2:v BB3:w
792 //
793 // Then we can merge these incoming values and let phi1, phi2 use the
794 // same set of incoming values:
795 //
796 // phi1&phi2: BB1:phi2 BB2:v BB3:w
797 //
798 // By doing this, phi1 and phi2 would share more intermediate phi nodes.
799 // This would help reduce the number of phi nodes during SSA reconstruction
800 // and ultimately result in fewer COPY instructions.
801 //
802 // This should be correct, because if a phi node does not have incoming
803 // value from certain block, this means the block is not the predecessor
804 // of the parent block, so we actually don't care about its incoming value.
805 EquivalenceClasses<PHINode *> PhiClasses;
806 for (const auto &[To, From] : AddedPhis) {
807 auto OldPhiIt = DeletedPhis.find(Val: To);
808 if (OldPhiIt == DeletedPhis.end())
809 continue;
810
811 PhiMap &BlkPhis = OldPhiIt->second;
812 SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
813 SmallSet<BasicBlock *, 8> Incomings;
814
815 // Get the undefined blocks shared by all the phi nodes.
816 if (!BlkPhis.empty()) {
817 Incomings.insert_range(R: llvm::make_first_range(c&: BlkPhis.front().second));
818 findUndefBlocks(PHIBlock: To, Incomings, UndefBlks);
819 }
820
821 for (const auto &[Phi, Incomings] : OldPhiIt->second) {
822 SmallVector<PHINode *> IncomingPHIs;
823 for (const auto &[BB, V] : Incomings) {
824 // First, for each phi, check whether it has incoming value which is
825 // another phi.
826 if (PHINode *P = dyn_cast<PHINode>(Val: V))
827 IncomingPHIs.push_back(Elt: P);
828 }
829
830 for (auto *OtherPhi : IncomingPHIs) {
831 // Skip phis that are unrelated to the phi reconstruction for now.
832 if (!DeletedPhis.contains(Val: OtherPhi->getParent()))
833 continue;
834 mergeIfCompatible(PhiClasses, A: Phi, B: OtherPhi);
835 }
836 }
837 }
838
839 for (const auto &AddedPhi : AddedPhis) {
840 BasicBlock *To = AddedPhi.first;
841 const BBVector &From = AddedPhi.second;
842
843 auto It = DeletedPhis.find(Val: To);
844 if (It == DeletedPhis.end())
845 continue;
846
847 PhiMap &Map = It->second;
848 SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
849 for (const auto &[Phi, Incoming] : Map) {
850 Value *Poison = PoisonValue::get(T: Phi->getType());
851 Updater.Initialize(Ty: Phi->getType(), Name: "");
852 Updater.AddAvailableValue(BB: &Func->getEntryBlock(), V: Poison);
853 Updater.AddAvailableValue(BB: To, V: Poison);
854
855 // Use leader phi's incoming if there is.
856 auto LeaderIt = PhiClasses.findLeader(V: Phi);
857 bool UseIncomingOfLeader =
858 LeaderIt != PhiClasses.member_end() && *LeaderIt != Phi;
859 const auto &IncomingMap =
860 UseIncomingOfLeader ? DeletedPhis[(*LeaderIt)->getParent()][*LeaderIt]
861 : Incoming;
862
863 SmallVector<BasicBlock *> ConstantPreds;
864 for (const auto &[BB, V] : IncomingMap) {
865 Updater.AddAvailableValue(BB, V);
866 if (isa<Constant>(Val: V))
867 ConstantPreds.push_back(Elt: BB);
868 }
869
870 for (auto UB : UndefBlks) {
871 // If this undef block is dominated by any predecessor(before
872 // structurization) of reconstructed PHI with constant incoming value,
873 // don't mark the available value as undefined. Setting undef to such
874 // block will stop us from getting optimal phi insertion.
875 if (any_of(Range&: ConstantPreds,
876 P: [&](BasicBlock *CP) { return DT->dominates(A: CP, B: UB); }))
877 continue;
878 // Maybe already get a value through sharing with other phi nodes.
879 if (Updater.HasValueForBlock(BB: UB))
880 continue;
881
882 Updater.AddAvailableValue(BB: UB, V: Poison);
883 }
884
885 for (BasicBlock *FI : From)
886 Phi->setIncomingValueForBlock(BB: FI, V: Updater.GetValueAtEndOfBlock(BB: FI));
887 AffectedPhis.push_back(Elt: Phi);
888 }
889 }
890
891 AffectedPhis.append(in_start: InsertedPhis.begin(), in_end: InsertedPhis.end());
892}
893
894void StructurizeCFG::simplifyAffectedPhis() {
895 bool Changed;
896 do {
897 Changed = false;
898 SimplifyQuery Q(Func->getDataLayout());
899 Q.DT = DT;
900 // Setting CanUseUndef to true might extend value liveness, set it to false
901 // to achieve better register pressure.
902 Q.CanUseUndef = false;
903 for (WeakVH VH : AffectedPhis) {
904 if (auto Phi = dyn_cast_or_null<PHINode>(Val&: VH)) {
905 if (auto NewValue = simplifyInstruction(I: Phi, Q)) {
906 Phi->replaceAllUsesWith(V: NewValue);
907 Phi->eraseFromParent();
908 Changed = true;
909 }
910 }
911 }
912 } while (Changed);
913}
914
915/// Remove phi values from all successors and then remove the terminator.
916DebugLoc StructurizeCFG::killTerminator(BasicBlock *BB) {
917 Instruction *Term = BB->getTerminator();
918 if (!Term)
919 return DebugLoc();
920
921 for (BasicBlock *Succ : successors(BB))
922 delPhiValues(From: BB, To: Succ);
923
924 DebugLoc DL = Term->getDebugLoc();
925 Term->eraseFromParent();
926 return DL;
927}
928
929/// Let node exit(s) point to NewExit
930void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
931 bool IncludeDominator) {
932 if (Node->isSubRegion()) {
933 Region *SubRegion = Node->getNodeAs<Region>();
934 BasicBlock *OldExit = SubRegion->getExit();
935 BasicBlock *Dominator = nullptr;
936
937 // Find all the edges from the sub region to the exit.
938 // We use make_early_inc_range here because we modify BB's terminator.
939 for (BasicBlock *BB : llvm::make_early_inc_range(Range: predecessors(BB: OldExit))) {
940 if (!SubRegion->contains(BB))
941 continue;
942
943 // Modify the edges to point to the new exit
944 delPhiValues(From: BB, To: OldExit);
945 BB->getTerminator()->replaceUsesOfWith(From: OldExit, To: NewExit);
946 addPhiValues(From: BB, To: NewExit);
947
948 // Find the new dominator (if requested)
949 if (IncludeDominator) {
950 if (!Dominator)
951 Dominator = BB;
952 else
953 Dominator = DT->findNearestCommonDominator(A: Dominator, B: BB);
954 }
955 }
956
957 // Change the dominator (if requested)
958 if (Dominator)
959 DT->changeImmediateDominator(BB: NewExit, NewBB: Dominator);
960
961 // Update the region info
962 SubRegion->replaceExit(BB: NewExit);
963 } else {
964 BasicBlock *BB = Node->getNodeAs<BasicBlock>();
965 DebugLoc DL = killTerminator(BB);
966 BranchInst *Br = BranchInst::Create(IfTrue: NewExit, InsertBefore: BB);
967 Br->setDebugLoc(DL);
968 addPhiValues(From: BB, To: NewExit);
969 if (IncludeDominator)
970 DT->changeImmediateDominator(BB: NewExit, NewBB: BB);
971 }
972}
973
974/// Create a new flow node and update dominator tree and region info
975BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
976 LLVMContext &Context = Func->getContext();
977 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
978 Order.back()->getEntry();
979 BasicBlock *Flow = BasicBlock::Create(Context, Name: FlowBlockName,
980 Parent: Func, InsertBefore: Insert);
981 FlowSet.insert(Ptr: Flow);
982 DT->addNewBlock(BB: Flow, DomBB: Dominator);
983 ParentRegion->getRegionInfo()->setRegionFor(BB: Flow, R: ParentRegion);
984 return Flow;
985}
986
987/// Create a new or reuse the previous node as flow node. Returns a block and a
988/// debug location to be used for new instructions in that block.
989std::pair<BasicBlock *, DebugLoc> StructurizeCFG::needPrefix(bool NeedEmpty) {
990 BasicBlock *Entry = PrevNode->getEntry();
991
992 if (!PrevNode->isSubRegion()) {
993 DebugLoc DL = killTerminator(BB: Entry);
994 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
995 return {Entry, DL};
996 }
997
998 // create a new flow node
999 BasicBlock *Flow = getNextFlow(Dominator: Entry);
1000
1001 // and wire it up
1002 changeExit(Node: PrevNode, NewExit: Flow, IncludeDominator: true);
1003 PrevNode = ParentRegion->getBBNode(BB: Flow);
1004 return {Flow, DebugLoc()};
1005}
1006
1007/// Returns the region exit if possible, otherwise just a new flow node
1008BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
1009 bool ExitUseAllowed) {
1010 if (!Order.empty() || !ExitUseAllowed)
1011 return getNextFlow(Dominator: Flow);
1012
1013 BasicBlock *Exit = ParentRegion->getExit();
1014 DT->changeImmediateDominator(BB: Exit, NewBB: Flow);
1015 addPhiValues(From: Flow, To: Exit);
1016 return Exit;
1017}
1018
1019/// Set the previous node
1020void StructurizeCFG::setPrevNode(BasicBlock *BB) {
1021 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
1022 : nullptr;
1023}
1024
1025/// Does BB dominate all the predicates of Node?
1026bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
1027 BBPredicates &Preds = Predicates[Node->getEntry()];
1028 return llvm::all_of(Range&: Preds, P: [&](std::pair<BasicBlock *, PredInfo> Pred) {
1029 return DT->dominates(A: BB, B: Pred.first);
1030 });
1031}
1032
1033/// Can we predict that this node will always be called?
1034bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
1035 BBPredicates &Preds = Predicates[Node->getEntry()];
1036 bool Dominated = false;
1037
1038 // Regionentry is always true
1039 if (!PrevNode)
1040 return true;
1041
1042 for (auto [BB, PI] : Preds) {
1043 if (PI.Pred != BoolTrue)
1044 return false;
1045
1046 if (!Dominated && DT->dominates(A: BB, B: PrevNode->getEntry()))
1047 Dominated = true;
1048 }
1049
1050 // TODO: The dominator check is too strict
1051 return Dominated;
1052}
1053
1054/// Take one node from the order vector and wire it up
1055void StructurizeCFG::wireFlow(bool ExitUseAllowed,
1056 BasicBlock *LoopEnd) {
1057 RegionNode *Node = Order.pop_back_val();
1058 Visited.insert(Ptr: Node->getEntry());
1059
1060 if (isPredictableTrue(Node)) {
1061 // Just a linear flow
1062 if (PrevNode) {
1063 changeExit(Node: PrevNode, NewExit: Node->getEntry(), IncludeDominator: true);
1064 }
1065 PrevNode = Node;
1066 } else {
1067 // Insert extra prefix node (or reuse last one)
1068 auto [Flow, DL] = needPrefix(NeedEmpty: false);
1069
1070 // Insert extra postfix node (or use exit instead)
1071 BasicBlock *Entry = Node->getEntry();
1072 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
1073
1074 // let it point to entry and next block
1075 BranchInst *Br = BranchInst::Create(IfTrue: Entry, IfFalse: Next, Cond: BoolPoison, InsertBefore: Flow);
1076 Br->setDebugLoc(DL);
1077 Conditions.push_back(Elt: Br);
1078 addPhiValues(From: Flow, To: Entry);
1079 DT->changeImmediateDominator(BB: Entry, NewBB: Flow);
1080
1081 PrevNode = Node;
1082 while (!Order.empty() && !Visited.count(Ptr: LoopEnd) &&
1083 dominatesPredicates(BB: Entry, Node: Order.back())) {
1084 handleLoops(ExitUseAllowed: false, LoopEnd);
1085 }
1086
1087 changeExit(Node: PrevNode, NewExit: Next, IncludeDominator: false);
1088 setPrevNode(Next);
1089 }
1090}
1091
1092void StructurizeCFG::handleLoops(bool ExitUseAllowed,
1093 BasicBlock *LoopEnd) {
1094 RegionNode *Node = Order.back();
1095 BasicBlock *LoopStart = Node->getEntry();
1096
1097 if (!Loops.count(Val: LoopStart)) {
1098 wireFlow(ExitUseAllowed, LoopEnd);
1099 return;
1100 }
1101
1102 if (!isPredictableTrue(Node))
1103 LoopStart = needPrefix(NeedEmpty: true).first;
1104
1105 LoopEnd = Loops[Node->getEntry()];
1106 wireFlow(ExitUseAllowed: false, LoopEnd);
1107 while (!Visited.count(Ptr: LoopEnd)) {
1108 handleLoops(ExitUseAllowed: false, LoopEnd);
1109 }
1110
1111 assert(LoopStart != &LoopStart->getParent()->getEntryBlock());
1112
1113 // Create an extra loop end node
1114 DebugLoc DL;
1115 std::tie(args&: LoopEnd, args&: DL) = needPrefix(NeedEmpty: false);
1116 BasicBlock *Next = needPostfix(Flow: LoopEnd, ExitUseAllowed);
1117 BranchInst *Br = BranchInst::Create(IfTrue: Next, IfFalse: LoopStart, Cond: BoolPoison, InsertBefore: LoopEnd);
1118 Br->setDebugLoc(DL);
1119 LoopConds.push_back(Elt: Br);
1120 addPhiValues(From: LoopEnd, To: LoopStart);
1121 setPrevNode(Next);
1122}
1123
1124/// After this function control flow looks like it should be, but
1125/// branches and PHI nodes only have undefined conditions.
1126void StructurizeCFG::createFlow() {
1127 BasicBlock *Exit = ParentRegion->getExit();
1128 bool EntryDominatesExit = DT->dominates(A: ParentRegion->getEntry(), B: Exit);
1129
1130 AffectedPhis.clear();
1131 DeletedPhis.clear();
1132 AddedPhis.clear();
1133 Conditions.clear();
1134 LoopConds.clear();
1135
1136 PrevNode = nullptr;
1137 Visited.clear();
1138
1139 while (!Order.empty()) {
1140 handleLoops(ExitUseAllowed: EntryDominatesExit, LoopEnd: nullptr);
1141 }
1142
1143 if (PrevNode)
1144 changeExit(Node: PrevNode, NewExit: Exit, IncludeDominator: EntryDominatesExit);
1145 else
1146 assert(EntryDominatesExit);
1147}
1148
1149/// Handle a rare case where the disintegrated nodes instructions
1150/// no longer dominate all their uses. Not sure if this is really necessary
1151void StructurizeCFG::rebuildSSA() {
1152 SSAUpdater Updater;
1153 for (BasicBlock *BB : ParentRegion->blocks())
1154 for (Instruction &I : *BB) {
1155 bool Initialized = false;
1156 // We may modify the use list as we iterate over it, so we use
1157 // make_early_inc_range.
1158 for (Use &U : llvm::make_early_inc_range(Range: I.uses())) {
1159 Instruction *User = cast<Instruction>(Val: U.getUser());
1160 if (User->getParent() == BB) {
1161 continue;
1162 } else if (PHINode *UserPN = dyn_cast<PHINode>(Val: User)) {
1163 if (UserPN->getIncomingBlock(U) == BB)
1164 continue;
1165 }
1166
1167 if (DT->dominates(Def: &I, User))
1168 continue;
1169
1170 if (!Initialized) {
1171 Value *Poison = PoisonValue::get(T: I.getType());
1172 Updater.Initialize(Ty: I.getType(), Name: "");
1173 Updater.AddAvailableValue(BB: &Func->getEntryBlock(), V: Poison);
1174 Updater.AddAvailableValue(BB, V: &I);
1175 Initialized = true;
1176 }
1177 Updater.RewriteUseAfterInsertions(U);
1178 }
1179 }
1180}
1181
1182static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
1183 const UniformityInfo &UA) {
1184 // Bool for if all sub-regions are uniform.
1185 bool SubRegionsAreUniform = true;
1186 // Count of how many direct children are conditional.
1187 unsigned ConditionalDirectChildren = 0;
1188
1189 for (auto *E : R->elements()) {
1190 if (!E->isSubRegion()) {
1191 auto Br = dyn_cast<BranchInst>(Val: E->getEntry()->getTerminator());
1192 if (!Br || !Br->isConditional())
1193 continue;
1194
1195 if (!UA.isUniform(I: Br))
1196 return false;
1197
1198 // One of our direct children is conditional.
1199 ConditionalDirectChildren++;
1200
1201 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
1202 << " has uniform terminator\n");
1203 } else {
1204 // Explicitly refuse to treat regions as uniform if they have non-uniform
1205 // subregions. We cannot rely on UniformityAnalysis for branches in
1206 // subregions because those branches may have been removed and re-created,
1207 // so we look for our metadata instead.
1208 //
1209 // Warning: It would be nice to treat regions as uniform based only on
1210 // their direct child basic blocks' terminators, regardless of whether
1211 // subregions are uniform or not. However, this requires a very careful
1212 // look at SIAnnotateControlFlow to make sure nothing breaks there.
1213 for (auto *BB : E->getNodeAs<Region>()->blocks()) {
1214 auto Br = dyn_cast<BranchInst>(Val: BB->getTerminator());
1215 if (!Br || !Br->isConditional())
1216 continue;
1217
1218 if (!Br->getMetadata(KindID: UniformMDKindID)) {
1219 // Early exit if we cannot have relaxed uniform regions.
1220 if (!RelaxedUniformRegions)
1221 return false;
1222
1223 SubRegionsAreUniform = false;
1224 break;
1225 }
1226 }
1227 }
1228 }
1229
1230 // Our region is uniform if:
1231 // 1. All conditional branches that are direct children are uniform (checked
1232 // above).
1233 // 2. And either:
1234 // a. All sub-regions are uniform.
1235 // b. There is one or less conditional branches among the direct children.
1236 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
1237}
1238
1239void StructurizeCFG::init(Region *R) {
1240 LLVMContext &Context = R->getEntry()->getContext();
1241
1242 Boolean = Type::getInt1Ty(C&: Context);
1243 BoolTrue = ConstantInt::getTrue(Context);
1244 BoolFalse = ConstantInt::getFalse(Context);
1245 BoolPoison = PoisonValue::get(T: Boolean);
1246
1247 this->UA = nullptr;
1248}
1249
1250bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
1251 if (R->isTopLevelRegion())
1252 return false;
1253
1254 this->UA = &UA;
1255
1256 // TODO: We could probably be smarter here with how we handle sub-regions.
1257 // We currently rely on the fact that metadata is set by earlier invocations
1258 // of the pass on sub-regions, and that this metadata doesn't get lost --
1259 // but we shouldn't rely on metadata for correctness!
1260 unsigned UniformMDKindID =
1261 R->getEntry()->getContext().getMDKindID(Name: "structurizecfg.uniform");
1262
1263 if (hasOnlyUniformBranches(R, UniformMDKindID, UA)) {
1264 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
1265 << '\n');
1266
1267 // Mark all direct child block terminators as having been treated as
1268 // uniform. To account for a possible future in which non-uniform
1269 // sub-regions are treated more cleverly, indirect children are not
1270 // marked as uniform.
1271 MDNode *MD = MDNode::get(Context&: R->getEntry()->getParent()->getContext(), MDs: {});
1272 for (RegionNode *E : R->elements()) {
1273 if (E->isSubRegion())
1274 continue;
1275
1276 if (Instruction *Term = E->getEntry()->getTerminator())
1277 Term->setMetadata(KindID: UniformMDKindID, Node: MD);
1278 }
1279
1280 return true;
1281 }
1282 return false;
1283}
1284
1285/// Run the transformation for each region found
1286bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
1287 if (R->isTopLevelRegion())
1288 return false;
1289
1290 this->DT = DT;
1291
1292 Func = R->getEntry()->getParent();
1293 assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
1294
1295 ParentRegion = R;
1296
1297 orderNodes();
1298 collectInfos();
1299 createFlow();
1300 insertConditions(Loops: false);
1301 insertConditions(Loops: true);
1302 setPhiValues();
1303 simplifyConditions();
1304 simplifyAffectedPhis();
1305 rebuildSSA();
1306
1307 // Cleanup
1308 Order.clear();
1309 Visited.clear();
1310 DeletedPhis.clear();
1311 AddedPhis.clear();
1312 Predicates.clear();
1313 Conditions.clear();
1314 Loops.clear();
1315 LoopPreds.clear();
1316 LoopConds.clear();
1317 FlowSet.clear();
1318
1319 return true;
1320}
1321
1322Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) {
1323 return new StructurizeCFGLegacyPass(SkipUniformRegions);
1324}
1325
1326static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) {
1327 Regions.push_back(x: &R);
1328 for (const auto &E : R)
1329 addRegionIntoQueue(R&: *E, Regions);
1330}
1331
1332StructurizeCFGPass::StructurizeCFGPass(bool SkipUniformRegions_)
1333 : SkipUniformRegions(SkipUniformRegions_) {
1334 if (ForceSkipUniformRegions.getNumOccurrences())
1335 SkipUniformRegions = ForceSkipUniformRegions.getValue();
1336}
1337
1338void StructurizeCFGPass::printPipeline(
1339 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1340 static_cast<PassInfoMixin<StructurizeCFGPass> *>(this)->printPipeline(
1341 OS, MapClassName2PassName);
1342 if (SkipUniformRegions)
1343 OS << "<skip-uniform-regions>";
1344}
1345
1346PreservedAnalyses StructurizeCFGPass::run(Function &F,
1347 FunctionAnalysisManager &AM) {
1348
1349 bool Changed = false;
1350 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(IR&: F);
1351 auto &RI = AM.getResult<RegionInfoAnalysis>(IR&: F);
1352
1353 UniformityInfo *UI = nullptr;
1354 if (SkipUniformRegions)
1355 UI = &AM.getResult<UniformityInfoAnalysis>(IR&: F);
1356
1357 std::vector<Region *> Regions;
1358 addRegionIntoQueue(R&: *RI.getTopLevelRegion(), Regions);
1359 while (!Regions.empty()) {
1360 Region *R = Regions.back();
1361 Regions.pop_back();
1362
1363 StructurizeCFG SCFG;
1364 SCFG.init(R);
1365
1366 if (SkipUniformRegions && SCFG.makeUniformRegion(R, UA&: *UI)) {
1367 Changed = true; // May have added metadata.
1368 continue;
1369 }
1370
1371 Changed |= SCFG.run(R, DT);
1372 }
1373 if (!Changed)
1374 return PreservedAnalyses::all();
1375 PreservedAnalyses PA;
1376 PA.preserve<DominatorTreeAnalysis>();
1377 return PA;
1378}
1379