1//===- StructurizeCFG.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/Transforms/Scalar/StructurizeCFG.h"
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/EquivalenceClasses.h"
12#include "llvm/ADT/MapVector.h"
13#include "llvm/ADT/SCCIterator.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/ADT/SmallPtrSet.h"
16#include "llvm/ADT/SmallVector.h"
17#include "llvm/Analysis/InstructionSimplify.h"
18#include "llvm/Analysis/RegionInfo.h"
19#include "llvm/Analysis/RegionIterator.h"
20#include "llvm/Analysis/RegionPass.h"
21#include "llvm/Analysis/TargetTransformInfo.h"
22#include "llvm/Analysis/UniformityAnalysis.h"
23#include "llvm/IR/BasicBlock.h"
24#include "llvm/IR/CFG.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/Dominators.h"
27#include "llvm/IR/Function.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
30#include "llvm/IR/Instructions.h"
31#include "llvm/IR/Metadata.h"
32#include "llvm/IR/PassManager.h"
33#include "llvm/IR/PatternMatch.h"
34#include "llvm/IR/ProfDataUtils.h"
35#include "llvm/IR/Type.h"
36#include "llvm/IR/Use.h"
37#include "llvm/IR/Value.h"
38#include "llvm/IR/ValueHandle.h"
39#include "llvm/InitializePasses.h"
40#include "llvm/Pass.h"
41#include "llvm/Support/Casting.h"
42#include "llvm/Support/CommandLine.h"
43#include "llvm/Support/Debug.h"
44#include "llvm/Support/raw_ostream.h"
45#include "llvm/Transforms/Scalar.h"
46#include "llvm/Transforms/Utils.h"
47#include "llvm/Transforms/Utils/BasicBlockUtils.h"
48#include "llvm/Transforms/Utils/Local.h"
49#include "llvm/Transforms/Utils/SSAUpdater.h"
50#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
51#include <cassert>
52#include <utility>
53
54using namespace llvm;
55using namespace llvm::PatternMatch;
56
57#define DEBUG_TYPE "structurizecfg"
58
59// The name for newly created blocks.
60const char FlowBlockName[] = "Flow";
61
62namespace {
63
64static cl::opt<bool> ForceSkipUniformRegions(
65 "structurizecfg-skip-uniform-regions",
66 cl::Hidden,
67 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
68 cl::init(Val: false));
69
70static cl::opt<bool>
71 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
72 cl::desc("Allow relaxed uniform region checks"),
73 cl::init(Val: true));
74
75// Definition of the complex types used in this pass.
76
77using BBValuePair = std::pair<BasicBlock *, Value *>;
78
79using RNVector = SmallVector<RegionNode *, 8>;
80using BBVector = SmallVector<BasicBlock *, 8>;
81using BranchVector = SmallVector<CondBrInst *, 8>;
82using BBValueVector = SmallVector<BBValuePair, 2>;
83
84using BBSet = SmallPtrSet<BasicBlock *, 8>;
85
86using PhiMap = MapVector<PHINode *, BBValueVector>;
87using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;
88
89using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
90
91using MaybeCondBranchWeights = std::optional<class CondBranchWeights>;
92
93class CondBranchWeights {
94 uint32_t TrueWeight;
95 uint32_t FalseWeight;
96
97 CondBranchWeights(uint32_t T, uint32_t F) : TrueWeight(T), FalseWeight(F) {}
98
99public:
100 static MaybeCondBranchWeights tryParse(const CondBrInst &Br) {
101 uint64_t T, F;
102 if (!extractBranchWeights(I: Br, TrueVal&: T, FalseVal&: F))
103 return std::nullopt;
104
105 return CondBranchWeights(T, F);
106 }
107
108 static void setMetadata(CondBrInst &Br,
109 const MaybeCondBranchWeights &Weights) {
110 if (!Weights)
111 return;
112 uint32_t Arr[] = {Weights->TrueWeight, Weights->FalseWeight};
113 setBranchWeights(I&: Br, Weights: Arr, IsExpected: false);
114 }
115
116 CondBranchWeights invert() const {
117 return CondBranchWeights{FalseWeight, TrueWeight};
118 }
119};
120
121struct PredInfo {
122 Value *Pred;
123 MaybeCondBranchWeights Weights;
124};
125
126using BBPredicates = DenseMap<BasicBlock *, PredInfo>;
127using PredMap = DenseMap<BasicBlock *, BBPredicates>;
128using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
129using Val2BBMap = DenseMap<Value *, BasicBlock *>;
130
131// A traits type that is intended to be used in graph algorithms. The graph
132// traits starts at an entry node, and traverses the RegionNodes that are in
133// the Nodes set.
134struct SubGraphTraits {
135 using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>;
136 using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType;
137
138 // This wraps a set of Nodes into the iterator, so we know which edges to
139 // filter out.
140 class WrappedSuccIterator
141 : public iterator_adaptor_base<
142 WrappedSuccIterator, BaseSuccIterator,
143 std::iterator_traits<BaseSuccIterator>::iterator_category, NodeRef,
144 std::ptrdiff_t, NodeRef *, NodeRef> {
145 SmallDenseSet<RegionNode *> *Nodes;
146
147 public:
148 WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes)
149 : iterator_adaptor_base(It), Nodes(Nodes) {}
150
151 NodeRef operator*() const { return {*I, Nodes}; }
152 };
153
154 static bool filterAll(const NodeRef &N) { return true; }
155 static bool filterSet(const NodeRef &N) { return N.second->count(V: N.first); }
156
157 using ChildIteratorType =
158 filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>;
159
160 static NodeRef getEntryNode(Region *R) {
161 return {GraphTraits<Region *>::getEntryNode(R), nullptr};
162 }
163
164 static NodeRef getEntryNode(NodeRef N) { return N; }
165
166 static iterator_range<ChildIteratorType> children(const NodeRef &N) {
167 auto *filter = N.second ? &filterSet : &filterAll;
168 return make_filter_range(
169 Range: make_range<WrappedSuccIterator>(
170 x: {GraphTraits<RegionNode *>::child_begin(N: N.first), N.second},
171 y: {GraphTraits<RegionNode *>::child_end(N: N.first), N.second}),
172 Pred: filter);
173 }
174
175 static ChildIteratorType child_begin(const NodeRef &N) {
176 return children(N).begin();
177 }
178
179 static ChildIteratorType child_end(const NodeRef &N) {
180 return children(N).end();
181 }
182};
183
184/// Finds the nearest common dominator of a set of BasicBlocks.
185///
186/// For every BB you add to the set, you can specify whether we "remember" the
187/// block. When you get the common dominator, you can also ask whether it's one
188/// of the blocks we remembered.
189class NearestCommonDominator {
190 DominatorTree *DT;
191 BasicBlock *Result = nullptr;
192 bool ResultIsRemembered = false;
193
194 /// Add BB to the resulting dominator.
195 void addBlock(BasicBlock *BB, bool Remember) {
196 if (!Result) {
197 Result = BB;
198 ResultIsRemembered = Remember;
199 return;
200 }
201
202 BasicBlock *NewResult = DT->findNearestCommonDominator(A: Result, B: BB);
203 if (NewResult != Result)
204 ResultIsRemembered = false;
205 if (NewResult == BB)
206 ResultIsRemembered |= Remember;
207 Result = NewResult;
208 }
209
210public:
211 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {}
212
213 void addBlock(BasicBlock *BB) {
214 addBlock(BB, /* Remember = */ false);
215 }
216
217 void addAndRememberBlock(BasicBlock *BB) {
218 addBlock(BB, /* Remember = */ true);
219 }
220
221 /// Get the nearest common dominator of all the BBs added via addBlock() and
222 /// addAndRememberBlock().
223 BasicBlock *result() { return Result; }
224
225 /// Is the BB returned by getResult() one of the blocks we added to the set
226 /// with addAndRememberBlock()?
227 bool resultIsRememberedBlock() { return ResultIsRemembered; }
228};
229
230/// Transforms the control flow graph on one single entry/exit region
231/// at a time.
232///
233/// After the transform all "If"/"Then"/"Else" style control flow looks like
234/// this:
235///
236/// \verbatim
237/// 1
238/// ||
239/// | |
240/// 2 |
241/// | /
242/// |/
243/// 3
244/// || Where:
245/// | | 1 = "If" block, calculates the condition
246/// 4 | 2 = "Then" subregion, runs if the condition is true
247/// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
248/// |/ 4 = "Else" optional subregion, runs if the condition is false
249/// 5 5 = "End" block, also rejoins the control flow
250/// \endverbatim
251///
252/// Control flow is expressed as a branch where the true exit goes into the
253/// "Then"/"Else" region, while the false exit skips the region
254/// The condition for the optional "Else" region is expressed as a PHI node.
255/// The incoming values of the PHI node are true for the "If" edge and false
256/// for the "Then" edge.
257///
258/// Additionally to that even complicated loops look like this:
259///
260/// \verbatim
261/// 1
262/// ||
263/// | |
264/// 2 ^ Where:
265/// | / 1 = "Entry" block
266/// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
267/// 3 3 = "Flow" block, with back edge to entry block
268/// |
269/// \endverbatim
270///
271/// The back edge of the "Flow" block is always on the false side of the branch
272/// while the true side continues the general flow. So the loop condition
273/// consist of a network of PHI nodes where the true incoming values expresses
274/// breaks and the false values expresses continue states.
275
276class StructurizeCFG {
277 Type *Boolean;
278 ConstantInt *BoolTrue;
279 ConstantInt *BoolFalse;
280 Value *BoolPoison;
281 const TargetTransformInfo *TTI;
282 Function *Func;
283 Region *ParentRegion;
284
285 UniformityInfo *UA = nullptr;
286 DominatorTree *DT;
287
288 SmallVector<RegionNode *, 8> Order;
289 BBSet Visited;
290 BBSet FlowSet;
291
292 SmallVector<WeakVH, 8> AffectedPhis;
293 BBPhiMap DeletedPhis;
294 BB2BBVecMap AddedPhis;
295
296 PredMap Predicates;
297 BranchVector Conditions;
298
299 BB2BBMap Loops;
300 PredMap LoopPreds;
301 BranchVector LoopConds;
302
303 Val2BBMap HoistedValues;
304
305 RegionNode *PrevNode;
306
307 void hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB, BasicBlock *ThenBB);
308
309 bool isHoistableInstruction(Instruction *I, BasicBlock *BB,
310 BasicBlock *HoistTo);
311
312 void orderNodes();
313
314 void analyzeLoops(RegionNode *N);
315
316 PredInfo buildCondition(CondBrInst *Term, unsigned Idx, bool Invert);
317
318 void gatherPredicates(RegionNode *N);
319
320 void collectInfos();
321
322 void insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter);
323
324 void simplifyConditions();
325
326 void delPhiValues(BasicBlock *From, BasicBlock *To);
327
328 void addPhiValues(BasicBlock *From, BasicBlock *To);
329
330 void findUndefBlocks(BasicBlock *PHIBlock,
331 const SmallPtrSet<BasicBlock *, 8> &Incomings,
332 SmallVector<BasicBlock *> &UndefBlks) const;
333
334 void mergeIfCompatible(EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A,
335 PHINode *B);
336
337 void setPhiValues();
338
339 void simplifyAffectedPhis();
340
341 void simplifyHoistedPhis();
342
343 DebugLoc killTerminator(BasicBlock *BB);
344
345 void changeExit(RegionNode *Node, BasicBlock *NewExit,
346 bool IncludeDominator);
347
348 BasicBlock *getNextFlow(BasicBlock *Dominator);
349
350 std::pair<BasicBlock *, DebugLoc> needPrefix(bool NeedEmpty);
351
352 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
353
354 void setPrevNode(BasicBlock *BB);
355
356 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
357
358 bool isPredictableTrue(RegionNode *Node);
359
360 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
361
362 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
363
364 void createFlow();
365
366 void rebuildSSA();
367
368public:
369 void init(Region *R);
370 bool run(Region *R, DominatorTree *DT, const TargetTransformInfo *TTI);
371 bool makeUniformRegion(Region *R, UniformityInfo &UA);
372};
373
374class StructurizeCFGLegacyPass : public RegionPass {
375 bool SkipUniformRegions;
376
377public:
378 static char ID;
379
380 explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false)
381 : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) {
382 if (ForceSkipUniformRegions.getNumOccurrences())
383 SkipUniformRegions = ForceSkipUniformRegions.getValue();
384 initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry());
385 }
386
387 bool runOnRegion(Region *R, RGPassManager &RGM) override {
388 StructurizeCFG SCFG;
389 SCFG.init(R);
390 if (SkipUniformRegions) {
391 UniformityInfo &UA =
392 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
393 if (SCFG.makeUniformRegion(R, UA))
394 return false;
395 }
396 Function *F = R->getEntry()->getParent();
397 const TargetTransformInfo *TTI =
398 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F: *F);
399 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
400 return SCFG.run(R, DT, TTI);
401 }
402
403 StringRef getPassName() const override { return "Structurize control flow"; }
404
405 void getAnalysisUsage(AnalysisUsage &AU) const override {
406 if (SkipUniformRegions)
407 AU.addRequired<UniformityInfoWrapperPass>();
408 AU.addRequired<TargetTransformInfoWrapperPass>();
409 AU.addRequired<DominatorTreeWrapperPass>();
410 AU.addRequired<TargetTransformInfoWrapperPass>();
411
412 AU.addPreserved<DominatorTreeWrapperPass>();
413 RegionPass::getAnalysisUsage(AU);
414 }
415};
416
417} // end anonymous namespace
418
419char StructurizeCFGLegacyPass::ID = 0;
420
421INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
422 "Structurize the CFG", false, false)
423INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
424INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
425INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
426INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
427 "Structurize the CFG", false, false)
428
429/// Checks whether an instruction is zero cost instruction and checks if the
430/// operands are from different BB. If so, this instruction can be coalesced
431/// if its hoisted to predecessor block. So, this returns true.
432bool StructurizeCFG::isHoistableInstruction(Instruction *I, BasicBlock *BB,
433 BasicBlock *HoistTo) {
434 if (I->getParent() != BB || isa<PHINode>(Val: I))
435 return false;
436
437 // If the instruction is not a zero cost instruction, return false.
438 auto Cost = TTI->getInstructionCost(U: I, CostKind: TargetTransformInfo::TCK_Latency);
439 InstructionCost::CostType CostVal =
440 Cost.isValid()
441 ? Cost.getValue()
442 : (InstructionCost::CostType)TargetTransformInfo::TCC_Expensive;
443 if (CostVal != 0)
444 return false;
445
446 // Check if all operands are available at the hoisting destination.
447 for (auto &Op : I->operands()) {
448 if (auto *OpI = dyn_cast<Instruction>(Val&: Op)) {
449 // Operand must dominate the hoisting destination.
450 if (!DT->dominates(A: OpI->getParent(), B: HoistTo))
451 return false;
452 }
453 }
454
455 return true;
456}
457
458/// Structurization can introduce unnecessary VGPR copies due to register
459/// coalescing interference. For example, if the Else block has a zero-cost
460/// instruction and the Then block modifies the VGPR value, only one value is
461/// live at a time in merge block before structurization. After structurization,
462/// the coalescer may incorrectly treat the Then value as live in the Else block
463/// (via the path Then → Flow → Else), leading to unnecessary VGPR copies.
464///
465/// This function examines phi nodes whose incoming values are zero-cost
466/// instructions in the Else block. It identifies such values that can be safely
467/// hoisted and moves them to the nearest common dominator of Then and Else
468/// blocks. A follow-up function after setting PhiNodes assigns the hoisted
469/// value to poison phi nodes along the if→flow edge, aiding register coalescing
470/// and minimizing unnecessary live ranges.
471void StructurizeCFG::hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB,
472 BasicBlock *ThenBB) {
473
474 BasicBlock *ElseSucc = ElseBB->getSingleSuccessor();
475 BasicBlock *CommonDominator = DT->findNearestCommonDominator(A: ElseBB, B: ThenBB);
476
477 if (!ElseSucc || !CommonDominator)
478 return;
479 // Only hoist in a simple if-else: ThenBB must branch directly to ElseSucc
480 // and ElseSucc must have exactly 2 predecessors (ThenBB and ElseBB).
481 // simplifyHoistedPhis assumes this exact shape; with additional predecessors
482 // the hoisted value leaks into unrelated control-flow paths.
483 if (ThenBB->getSingleSuccessor() != ElseSucc ||
484 !ElseSucc->hasNPredecessors(N: 2))
485 return;
486 Instruction *Term = CommonDominator->getTerminator();
487 for (PHINode &Phi : ElseSucc->phis()) {
488 Value *ElseVal = Phi.getIncomingValueForBlock(BB: ElseBB);
489 auto *Inst = dyn_cast<Instruction>(Val: ElseVal);
490 if (!Inst || !isHoistableInstruction(I: Inst, BB: ElseBB, HoistTo: CommonDominator))
491 continue;
492 Inst->removeFromParent();
493 Inst->insertInto(ParentBB: CommonDominator, It: Term->getIterator());
494 HoistedValues[Inst] = CommonDominator;
495 }
496}
497
498/// Build up the general order of nodes, by performing a topological sort of the
499/// parent region's nodes, while ensuring that there is no outer cycle node
500/// between any two inner cycle nodes.
501void StructurizeCFG::orderNodes() {
502 Order.resize(N: std::distance(first: GraphTraits<Region *>::nodes_begin(R: ParentRegion),
503 last: GraphTraits<Region *>::nodes_end(R: ParentRegion)));
504 if (Order.empty())
505 return;
506
507 SmallDenseSet<RegionNode *> Nodes;
508 auto EntryNode = SubGraphTraits::getEntryNode(R: ParentRegion);
509
510 // A list of range indices of SCCs in Order, to be processed.
511 SmallVector<std::pair<unsigned, unsigned>, 8> WorkList;
512 unsigned I = 0, E = Order.size();
513 while (true) {
514 // Run through all the SCCs in the subgraph starting with Entry.
515 for (auto SCCI =
516 scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin(
517 G: EntryNode);
518 !SCCI.isAtEnd(); ++SCCI) {
519 auto &SCC = *SCCI;
520
521 // An SCC up to the size of 2, can be reduced to an entry (the last node),
522 // and a possible additional node. Therefore, it is already in order, and
523 // there is no need to add it to the work-list.
524 unsigned Size = SCC.size();
525 if (Size > 2)
526 WorkList.emplace_back(Args&: I, Args: I + Size);
527
528 // Add the SCC nodes to the Order array.
529 for (const auto &N : SCC) {
530 assert(I < E && "SCC size mismatch!");
531 Order[I++] = N.first;
532 }
533 }
534 assert(I == E && "SCC size mismatch!");
535
536 // If there are no more SCCs to order, then we are done.
537 if (WorkList.empty())
538 break;
539
540 std::tie(args&: I, args&: E) = WorkList.pop_back_val();
541
542 // Collect the set of nodes in the SCC's subgraph. These are only the
543 // possible child nodes; we do not add the entry (last node) otherwise we
544 // will have the same exact SCC all over again.
545 Nodes.clear();
546 Nodes.insert(I: Order.begin() + I, E: Order.begin() + E - 1);
547
548 // Update the entry node.
549 EntryNode.first = Order[E - 1];
550 EntryNode.second = &Nodes;
551 }
552}
553
554/// Determine the end of the loops
555void StructurizeCFG::analyzeLoops(RegionNode *N) {
556 if (N->isSubRegion()) {
557 // Test for exit as back edge
558 BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
559 if (Visited.count(Ptr: Exit))
560 Loops[Exit] = N->getEntry();
561
562 } else {
563 // Test for successors as back edge
564 // TODO: support other terminators other than branches.
565 BasicBlock *BB = N->getNodeAs<BasicBlock>();
566 if (isa<UncondBrInst, CondBrInst>(Val: BB->getTerminator()))
567 for (BasicBlock *Succ : successors(BB))
568 if (Visited.count(Ptr: Succ))
569 Loops[Succ] = BB;
570 }
571}
572
573/// Build the condition for one edge
574PredInfo StructurizeCFG::buildCondition(CondBrInst *Term, unsigned Idx,
575 bool Invert) {
576 Value *Cond = Term->getCondition();
577 auto Weights = CondBranchWeights::tryParse(Br: *Term);
578 if (Idx != (unsigned)Invert) {
579 Cond = invertCondition(Condition: Cond);
580 if (Weights)
581 Weights = Weights->invert();
582 }
583 return {.Pred: Cond, .Weights: Weights};
584}
585
586/// Analyze the predecessors of each block and build up predicates
587void StructurizeCFG::gatherPredicates(RegionNode *N) {
588 RegionInfo *RI = ParentRegion->getRegionInfo();
589 BasicBlock *BB = N->getEntry();
590 BBPredicates &Pred = Predicates[BB];
591 BBPredicates &LPred = LoopPreds[BB];
592
593 for (BasicBlock *P : predecessors(BB)) {
594 // Ignore it if it's a branch from outside into our region entry
595 if (!ParentRegion->contains(BB: P))
596 continue;
597
598 Region *R = RI->getRegionFor(BB: P);
599 if (R == ParentRegion) {
600 if (isa<UncondBrInst>(Val: P->getTerminator())) {
601 if (Visited.count(Ptr: P))
602 Pred[P] = {.Pred: BoolTrue, .Weights: std::nullopt};
603 else
604 LPred[P] = {.Pred: BoolFalse, .Weights: std::nullopt};
605 } else if (auto *CondBr = dyn_cast<CondBrInst>(Val: P->getTerminator())) {
606 bool Idx = CondBr->getSuccessor(i: 0) == BB ? 0 : 1;
607 if (Visited.count(Ptr: P)) {
608 // Normal forward edge
609 // Try to treat Other like an ELSE block
610 BasicBlock *Other = CondBr->getSuccessor(i: !Idx);
611 if (Visited.count(Ptr: Other) && !Loops.count(Val: Other) &&
612 !Pred.count(Val: Other) && !Pred.count(Val: P)) {
613 hoistZeroCostElseBlockPhiValues(ElseBB: BB, ThenBB: Other);
614 Pred[Other] = {.Pred: BoolFalse, .Weights: std::nullopt};
615 Pred[P] = {.Pred: BoolTrue, .Weights: std::nullopt};
616 } else
617 Pred[P] = buildCondition(Term: CondBr, Idx, Invert: false);
618 } else {
619 // Back edge
620 LPred[P] = buildCondition(Term: CondBr, Idx, Invert: true);
621 }
622 }
623 } else {
624 // It's an exit from a sub region
625 while (R->getParent() != ParentRegion)
626 R = R->getParent();
627
628 // Edge from inside a subregion to its entry, ignore it
629 if (*R == *N)
630 continue;
631
632 BasicBlock *Entry = R->getEntry();
633 if (Visited.count(Ptr: Entry))
634 Pred[Entry] = {.Pred: BoolTrue, .Weights: std::nullopt};
635 else
636 LPred[Entry] = {.Pred: BoolFalse, .Weights: std::nullopt};
637 }
638 }
639}
640
641/// Collect various loop and predicate infos
642void StructurizeCFG::collectInfos() {
643 // Reset predicate
644 Predicates.clear();
645
646 // and loop infos
647 Loops.clear();
648 LoopPreds.clear();
649
650 // Reset the visited nodes
651 Visited.clear();
652
653 for (RegionNode *RN : reverse(C&: Order)) {
654 LLVM_DEBUG(dbgs() << "Visiting: "
655 << (RN->isSubRegion() ? "SubRegion with entry: " : "")
656 << RN->getEntry()->getName() << "\n");
657
658 // Analyze all the conditions leading to a node
659 gatherPredicates(N: RN);
660
661 // Remember that we've seen this node
662 Visited.insert(Ptr: RN->getEntry());
663
664 // Find the last back edges
665 analyzeLoops(N: RN);
666 }
667}
668
669/// Insert the missing branch conditions
670void StructurizeCFG::insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter) {
671 BranchVector &Conds = Loops ? LoopConds : Conditions;
672 Value *Default = Loops ? BoolTrue : BoolFalse;
673
674 for (CondBrInst *Term : Conds) {
675 BasicBlock *Parent = Term->getParent();
676 BasicBlock *SuccTrue = Term->getSuccessor(i: 0);
677 BasicBlock *SuccFalse = Term->getSuccessor(i: 1);
678
679 unsigned Variable = PhiInserter.AddVariable(Name: "", Ty: Boolean);
680 PhiInserter.AddAvailableValue(Var: Variable, BB: Loops ? SuccFalse : Parent,
681 V: Default);
682
683 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
684
685 NearestCommonDominator Dominator(DT);
686 Dominator.addBlock(BB: Parent);
687
688 PredInfo ParentInfo{.Pred: nullptr, .Weights: std::nullopt};
689 for (auto [BB, PI] : Preds) {
690 if (BB == Parent) {
691 ParentInfo = PI;
692 break;
693 }
694 PhiInserter.AddAvailableValue(Var: Variable, BB, V: PI.Pred);
695 Dominator.addAndRememberBlock(BB);
696 }
697
698 if (ParentInfo.Pred) {
699 Term->setCondition(ParentInfo.Pred);
700 CondBranchWeights::setMetadata(Br&: *Term, Weights: ParentInfo.Weights);
701 } else {
702 if (!Dominator.resultIsRememberedBlock())
703 PhiInserter.AddAvailableValue(Var: Variable, BB: Dominator.result(), V: Default);
704
705 PhiInserter.AddUse(Var: Variable, U: &Term->getOperandUse(i: 0));
706 }
707 }
708}
709
710/// Simplify any inverted conditions that were built by buildConditions.
711void StructurizeCFG::simplifyConditions() {
712 SmallVector<Instruction *> InstToErase;
713 for (auto &I : concat<PredMap::value_type>(Ranges&: Predicates, Ranges&: LoopPreds)) {
714 auto &Preds = I.second;
715 for (auto [BB, PI] : Preds) {
716 Instruction *Inverted;
717 if (match(V: PI.Pred, P: m_Not(V: m_OneUse(SubPattern: m_Instruction(I&: Inverted)))) &&
718 !PI.Pred->use_empty()) {
719 if (auto *InvertedCmp = dyn_cast<CmpInst>(Val: Inverted)) {
720 InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
721 PI.Pred->replaceAllUsesWith(V: InvertedCmp);
722 InstToErase.push_back(Elt: cast<Instruction>(Val: PI.Pred));
723 }
724 }
725 }
726 }
727 for (auto *I : InstToErase)
728 I->eraseFromParent();
729}
730
731/// Remove all PHI values coming from "From" into "To" and remember
732/// them in DeletedPhis
733void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
734 PhiMap &Map = DeletedPhis[To];
735 for (PHINode &Phi : To->phis()) {
736 bool Recorded = false;
737 while (Phi.getBasicBlockIndex(BB: From) != -1) {
738 Value *Deleted = Phi.removeIncomingValue(BB: From, DeletePHIIfEmpty: false);
739 Map[&Phi].push_back(Elt: std::make_pair(x&: From, y&: Deleted));
740 if (!Recorded) {
741 AffectedPhis.push_back(Elt: &Phi);
742 Recorded = true;
743 }
744 }
745 }
746}
747
748/// Add a dummy PHI value as soon as we knew the new predecessor
749void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
750 for (PHINode &Phi : To->phis()) {
751 Value *Poison = PoisonValue::get(T: Phi.getType());
752 Phi.addIncoming(V: Poison, BB: From);
753 }
754 AddedPhis[To].push_back(Elt: From);
755}
756
757/// When we are reconstructing a PHI inside \p PHIBlock with incoming values
758/// from predecessors \p Incomings, we have a chance to mark the available value
759/// from some blocks as undefined. The function will find out all such blocks
760/// and return in \p UndefBlks.
761void StructurizeCFG::findUndefBlocks(
762 BasicBlock *PHIBlock, const SmallPtrSet<BasicBlock *, 8> &Incomings,
763 SmallVector<BasicBlock *> &UndefBlks) const {
764 // We may get a post-structured CFG like below:
765 //
766 // | P1
767 // |/
768 // F1
769 // |\
770 // | N
771 // |/
772 // F2
773 // |\
774 // | P2
775 // |/
776 // F3
777 // |\
778 // B
779 //
780 // B is the block that has a PHI being reconstructed. P1/P2 are predecessors
781 // of B before structurization. F1/F2/F3 are flow blocks inserted during
782 // structurization process. Block N is not a predecessor of B before
783 // structurization, but are placed between the predecessors(P1/P2) of B after
784 // structurization. This usually means that threads went to N never take the
785 // path N->F2->F3->B. For example, the threads take the branch F1->N may
786 // always take the branch F2->P2. So, when we are reconstructing a PHI
787 // originally in B, we can safely say the incoming value from N is undefined.
788 SmallPtrSet<BasicBlock *, 8> VisitedBlock;
789 SmallVector<BasicBlock *, 8> Stack;
790 if (PHIBlock == ParentRegion->getExit()) {
791 for (auto P : predecessors(BB: PHIBlock)) {
792 if (ParentRegion->contains(BB: P))
793 Stack.push_back(Elt: P);
794 }
795 } else {
796 append_range(C&: Stack, R: predecessors(BB: PHIBlock));
797 }
798
799 // Do a backward traversal over the CFG, and stop further searching if
800 // the block is not a Flow. If a block is neither flow block nor the
801 // incoming predecessor, then the incoming value from the block is
802 // undefined value for the PHI being reconstructed.
803 while (!Stack.empty()) {
804 BasicBlock *Current = Stack.pop_back_val();
805 if (!VisitedBlock.insert(Ptr: Current).second)
806 continue;
807
808 if (FlowSet.contains(Ptr: Current))
809 llvm::append_range(C&: Stack, R: predecessors(BB: Current));
810 else if (!Incomings.contains(Ptr: Current))
811 UndefBlks.push_back(Elt: Current);
812 }
813}
814
815// If two phi nodes have compatible incoming values (for each
816// incoming block, either they have the same incoming value or only one phi
817// node has an incoming value), let them share the merged incoming values. The
818// merge process is guided by the equivalence information from \p PhiClasses.
819// The function will possibly update the incoming values of leader phi in
820// DeletedPhis.
821void StructurizeCFG::mergeIfCompatible(
822 EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A, PHINode *B) {
823 auto ItA = PhiClasses.findLeader(ECV: PhiClasses.insert(Data: A));
824 auto ItB = PhiClasses.findLeader(ECV: PhiClasses.insert(Data: B));
825 // They are already in the same class, no work needed.
826 if (ItA == ItB)
827 return;
828
829 PHINode *LeaderA = *ItA;
830 PHINode *LeaderB = *ItB;
831 BBValueVector &IncomingA = DeletedPhis[LeaderA->getParent()][LeaderA];
832 BBValueVector &IncomingB = DeletedPhis[LeaderB->getParent()][LeaderB];
833
834 DenseMap<BasicBlock *, Value *> Mergeable(IncomingA.begin(), IncomingA.end());
835 for (auto [BB, V] : IncomingB) {
836 auto BBIt = Mergeable.find(Val: BB);
837 if (BBIt != Mergeable.end() && BBIt->second != V)
838 return;
839 // Either IncomingA does not have this value or IncomingA has the same
840 // value.
841 Mergeable.insert(KV: {BB, V});
842 }
843
844 // Update the incoming value of leaderA.
845 IncomingA.assign(in_start: Mergeable.begin(), in_end: Mergeable.end());
846 PhiClasses.unionSets(L1: ItA, L2: ItB);
847}
848
849/// Add the real PHI value as soon as everything is set up
850void StructurizeCFG::setPhiValues() {
851 SmallVector<PHINode *, 8> InsertedPhis;
852 SSAUpdater Updater(&InsertedPhis);
853 DenseMap<BasicBlock *, SmallVector<BasicBlock *>> UndefBlksMap;
854
855 // Find phi nodes that have compatible incoming values (either they have
856 // the same value for the same block or only one phi node has an incoming
857 // value, see example below). We only search again the phi's that are
858 // referenced by another phi, which is the case we care about.
859 //
860 // For example (-- means no incoming value):
861 // phi1 : BB1:phi2 BB2:v BB3:--
862 // phi2: BB1:-- BB2:v BB3:w
863 //
864 // Then we can merge these incoming values and let phi1, phi2 use the
865 // same set of incoming values:
866 //
867 // phi1&phi2: BB1:phi2 BB2:v BB3:w
868 //
869 // By doing this, phi1 and phi2 would share more intermediate phi nodes.
870 // This would help reduce the number of phi nodes during SSA reconstruction
871 // and ultimately result in fewer COPY instructions.
872 //
873 // This should be correct, because if a phi node does not have incoming
874 // value from certain block, this means the block is not the predecessor
875 // of the parent block, so we actually don't care about its incoming value.
876 EquivalenceClasses<PHINode *> PhiClasses;
877 for (const auto &[To, From] : AddedPhis) {
878 auto OldPhiIt = DeletedPhis.find(Val: To);
879 if (OldPhiIt == DeletedPhis.end())
880 continue;
881
882 PhiMap &BlkPhis = OldPhiIt->second;
883 SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
884 SmallPtrSet<BasicBlock *, 8> Incomings;
885
886 // Get the undefined blocks shared by all the phi nodes.
887 if (!BlkPhis.empty()) {
888 Incomings.insert_range(R: llvm::make_first_range(c&: BlkPhis.front().second));
889 findUndefBlocks(PHIBlock: To, Incomings, UndefBlks);
890 }
891
892 for (const auto &[Phi, Incomings] : OldPhiIt->second) {
893 SmallVector<PHINode *> IncomingPHIs;
894 for (const auto &[BB, V] : Incomings) {
895 // First, for each phi, check whether it has incoming value which is
896 // another phi.
897 if (PHINode *P = dyn_cast<PHINode>(Val: V))
898 IncomingPHIs.push_back(Elt: P);
899 }
900
901 for (auto *OtherPhi : IncomingPHIs) {
902 // Skip phis that are unrelated to the phi reconstruction for now.
903 if (!DeletedPhis.contains(Val: OtherPhi->getParent()))
904 continue;
905 mergeIfCompatible(PhiClasses, A: Phi, B: OtherPhi);
906 }
907 }
908 }
909
910 for (const auto &AddedPhi : AddedPhis) {
911 BasicBlock *To = AddedPhi.first;
912 const BBVector &From = AddedPhi.second;
913
914 auto It = DeletedPhis.find(Val: To);
915 if (It == DeletedPhis.end())
916 continue;
917
918 PhiMap &Map = It->second;
919 SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
920 for (const auto &[Phi, Incoming] : Map) {
921 Value *Poison = PoisonValue::get(T: Phi->getType());
922 Updater.Initialize(Ty: Phi->getType(), Name: "");
923 Updater.AddAvailableValue(BB: &Func->getEntryBlock(), V: Poison);
924 Updater.AddAvailableValue(BB: To, V: Poison);
925
926 // Use leader phi's incoming if there is.
927 auto LeaderIt = PhiClasses.findLeader(V: Phi);
928 bool UseIncomingOfLeader =
929 LeaderIt != PhiClasses.member_end() && *LeaderIt != Phi;
930 const auto &IncomingMap =
931 UseIncomingOfLeader ? DeletedPhis[(*LeaderIt)->getParent()][*LeaderIt]
932 : Incoming;
933
934 SmallVector<BasicBlock *> ConstantPreds;
935 for (const auto &[BB, V] : IncomingMap) {
936 Updater.AddAvailableValue(BB, V);
937 if (isa<Constant>(Val: V))
938 ConstantPreds.push_back(Elt: BB);
939 }
940
941 for (auto UB : UndefBlks) {
942 // If this undef block is dominated by any predecessor(before
943 // structurization) of reconstructed PHI with constant incoming value,
944 // don't mark the available value as undefined. Setting undef to such
945 // block will stop us from getting optimal phi insertion.
946 if (any_of(Range&: ConstantPreds,
947 P: [&](BasicBlock *CP) { return DT->dominates(A: CP, B: UB); }))
948 continue;
949 // Maybe already get a value through sharing with other phi nodes.
950 if (Updater.HasValueForBlock(BB: UB))
951 continue;
952
953 Updater.AddAvailableValue(BB: UB, V: Poison);
954 }
955
956 for (BasicBlock *FI : From)
957 Phi->setIncomingValueForBlock(BB: FI, V: Updater.GetValueAtEndOfBlock(BB: FI));
958 AffectedPhis.push_back(Elt: Phi);
959 }
960 }
961
962 AffectedPhis.append(in_start: InsertedPhis.begin(), in_end: InsertedPhis.end());
963}
964
965/// Updates PHI nodes after hoisted zero cost instructions by replacing poison
966/// entries on Flow nodes with the appropriate hoisted values
967void StructurizeCFG::simplifyHoistedPhis() {
968 for (WeakVH VH : AffectedPhis) {
969 PHINode *Phi = dyn_cast_or_null<PHINode>(Val&: VH);
970 if (!Phi || Phi->getNumIncomingValues() != 2)
971 continue;
972
973 for (int i = 0; i < 2; i++) {
974 Value *V = Phi->getIncomingValue(i);
975 auto BBIt = HoistedValues.find(Val: V);
976
977 if (BBIt == HoistedValues.end())
978 continue;
979
980 Value *OtherV = Phi->getIncomingValue(i: !i);
981 PHINode *OtherPhi = dyn_cast<PHINode>(Val: OtherV);
982 if (!OtherPhi)
983 continue;
984
985 int PoisonValBBIdx = -1;
986 for (size_t i = 0; i < OtherPhi->getNumIncomingValues(); i++) {
987 if (!isa<PoisonValue>(Val: OtherPhi->getIncomingValue(i)))
988 continue;
989 PoisonValBBIdx = i;
990 break;
991 }
992 if (PoisonValBBIdx == -1 ||
993 !DT->dominates(A: BBIt->second,
994 B: OtherPhi->getIncomingBlock(i: PoisonValBBIdx)))
995 continue;
996
997 OtherPhi->setIncomingValue(i: PoisonValBBIdx, V);
998 if (DT->dominates(Def: OtherV, User: Phi))
999 Phi->setIncomingValue(i, V: OtherV);
1000 }
1001 }
1002}
1003
1004void StructurizeCFG::simplifyAffectedPhis() {
1005 bool Changed;
1006 do {
1007 Changed = false;
1008 SimplifyQuery Q(Func->getDataLayout());
1009 Q.DT = DT;
1010 // Setting CanUseUndef to true might extend value liveness, set it to false
1011 // to achieve better register pressure.
1012 Q.CanUseUndef = false;
1013 for (WeakVH VH : AffectedPhis) {
1014 if (auto Phi = dyn_cast_or_null<PHINode>(Val&: VH)) {
1015 if (auto NewValue = simplifyInstruction(I: Phi, Q)) {
1016 Phi->replaceAllUsesWith(V: NewValue);
1017 Phi->eraseFromParent();
1018 Changed = true;
1019 }
1020 }
1021 }
1022 } while (Changed);
1023}
1024
1025/// Remove phi values from all successors and then remove the terminator.
1026DebugLoc StructurizeCFG::killTerminator(BasicBlock *BB) {
1027 Instruction *Term = BB->getTerminator();
1028 if (!Term)
1029 return DebugLoc();
1030
1031 for (BasicBlock *Succ : successors(BB))
1032 delPhiValues(From: BB, To: Succ);
1033
1034 DebugLoc DL = Term->getDebugLoc();
1035 Term->eraseFromParent();
1036 return DL;
1037}
1038
1039/// Let node exit(s) point to NewExit
1040void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
1041 bool IncludeDominator) {
1042 if (Node->isSubRegion()) {
1043 Region *SubRegion = Node->getNodeAs<Region>();
1044 BasicBlock *OldExit = SubRegion->getExit();
1045 BasicBlock *Dominator = nullptr;
1046
1047 // Find all the edges from the sub region to the exit.
1048 // We use make_early_inc_range here because we modify BB's terminator.
1049 for (BasicBlock *BB : llvm::make_early_inc_range(Range: predecessors(BB: OldExit))) {
1050 if (!SubRegion->contains(BB))
1051 continue;
1052
1053 // Modify the edges to point to the new exit
1054 delPhiValues(From: BB, To: OldExit);
1055 BB->getTerminator()->replaceUsesOfWith(From: OldExit, To: NewExit);
1056 addPhiValues(From: BB, To: NewExit);
1057
1058 // Find the new dominator (if requested)
1059 if (IncludeDominator) {
1060 if (!Dominator)
1061 Dominator = BB;
1062 else
1063 Dominator = DT->findNearestCommonDominator(A: Dominator, B: BB);
1064 }
1065 }
1066
1067 // Change the dominator (if requested)
1068 if (Dominator)
1069 DT->changeImmediateDominator(BB: NewExit, NewBB: Dominator);
1070
1071 // Update the region info
1072 SubRegion->replaceExit(BB: NewExit);
1073 } else {
1074 BasicBlock *BB = Node->getNodeAs<BasicBlock>();
1075 DebugLoc DL = killTerminator(BB);
1076 UncondBrInst *Br = UncondBrInst::Create(IfTrue: NewExit, InsertBefore: BB);
1077 Br->setDebugLoc(DL);
1078 addPhiValues(From: BB, To: NewExit);
1079 if (IncludeDominator)
1080 DT->changeImmediateDominator(BB: NewExit, NewBB: BB);
1081 }
1082}
1083
1084/// Create a new flow node and update dominator tree and region info
1085BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
1086 LLVMContext &Context = Func->getContext();
1087 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
1088 Order.back()->getEntry();
1089 BasicBlock *Flow = BasicBlock::Create(Context, Name: FlowBlockName,
1090 Parent: Func, InsertBefore: Insert);
1091 FlowSet.insert(Ptr: Flow);
1092 DT->addNewBlock(BB: Flow, DomBB: Dominator);
1093 ParentRegion->getRegionInfo()->setRegionFor(BB: Flow, R: ParentRegion);
1094 return Flow;
1095}
1096
1097/// Create a new or reuse the previous node as flow node. Returns a block and a
1098/// debug location to be used for new instructions in that block.
1099std::pair<BasicBlock *, DebugLoc> StructurizeCFG::needPrefix(bool NeedEmpty) {
1100 BasicBlock *Entry = PrevNode->getEntry();
1101
1102 if (!PrevNode->isSubRegion()) {
1103 DebugLoc DL = killTerminator(BB: Entry);
1104 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
1105 return {Entry, DL};
1106 }
1107
1108 // create a new flow node
1109 BasicBlock *Flow = getNextFlow(Dominator: Entry);
1110
1111 // and wire it up
1112 changeExit(Node: PrevNode, NewExit: Flow, IncludeDominator: true);
1113 PrevNode = ParentRegion->getBBNode(BB: Flow);
1114 return {Flow, DebugLoc()};
1115}
1116
1117/// Returns the region exit if possible, otherwise just a new flow node
1118BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
1119 bool ExitUseAllowed) {
1120 if (!Order.empty() || !ExitUseAllowed)
1121 return getNextFlow(Dominator: Flow);
1122
1123 BasicBlock *Exit = ParentRegion->getExit();
1124 DT->changeImmediateDominator(BB: Exit, NewBB: Flow);
1125 addPhiValues(From: Flow, To: Exit);
1126 return Exit;
1127}
1128
1129/// Set the previous node
1130void StructurizeCFG::setPrevNode(BasicBlock *BB) {
1131 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
1132 : nullptr;
1133}
1134
1135/// Does BB dominate all the predicates of Node?
1136bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
1137 BBPredicates &Preds = Predicates[Node->getEntry()];
1138 return llvm::all_of(Range&: Preds, P: [&](std::pair<BasicBlock *, PredInfo> Pred) {
1139 return DT->dominates(A: BB, B: Pred.first);
1140 });
1141}
1142
1143/// Can we predict that this node will always be called?
1144bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
1145 BBPredicates &Preds = Predicates[Node->getEntry()];
1146 bool Dominated = false;
1147
1148 // Regionentry is always true
1149 if (!PrevNode)
1150 return true;
1151
1152 for (auto [BB, PI] : Preds) {
1153 if (PI.Pred != BoolTrue)
1154 return false;
1155
1156 if (!Dominated && DT->dominates(A: BB, B: PrevNode->getEntry()))
1157 Dominated = true;
1158 }
1159
1160 // TODO: The dominator check is too strict
1161 return Dominated;
1162}
1163
1164/// Take one node from the order vector and wire it up
1165void StructurizeCFG::wireFlow(bool ExitUseAllowed,
1166 BasicBlock *LoopEnd) {
1167 RegionNode *Node = Order.pop_back_val();
1168 Visited.insert(Ptr: Node->getEntry());
1169
1170 if (isPredictableTrue(Node)) {
1171 // Just a linear flow
1172 if (PrevNode) {
1173 changeExit(Node: PrevNode, NewExit: Node->getEntry(), IncludeDominator: true);
1174 }
1175 PrevNode = Node;
1176 } else {
1177 // Insert extra prefix node (or reuse last one)
1178 auto [Flow, DL] = needPrefix(NeedEmpty: false);
1179
1180 // Insert extra postfix node (or use exit instead)
1181 BasicBlock *Entry = Node->getEntry();
1182 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
1183
1184 // let it point to entry and next block
1185 CondBrInst *Br = CondBrInst::Create(Cond: BoolPoison, IfTrue: Entry, IfFalse: Next, InsertBefore: Flow);
1186 Br->setDebugLoc(DL);
1187 Conditions.push_back(Elt: Br);
1188 addPhiValues(From: Flow, To: Entry);
1189 DT->changeImmediateDominator(BB: Entry, NewBB: Flow);
1190
1191 PrevNode = Node;
1192 while (!Order.empty() && !Visited.count(Ptr: LoopEnd) &&
1193 dominatesPredicates(BB: Entry, Node: Order.back())) {
1194 handleLoops(ExitUseAllowed: false, LoopEnd);
1195 }
1196
1197 changeExit(Node: PrevNode, NewExit: Next, IncludeDominator: false);
1198 setPrevNode(Next);
1199 }
1200}
1201
1202void StructurizeCFG::handleLoops(bool ExitUseAllowed,
1203 BasicBlock *LoopEnd) {
1204 RegionNode *Node = Order.back();
1205 BasicBlock *LoopStart = Node->getEntry();
1206
1207 if (!Loops.count(Val: LoopStart)) {
1208 wireFlow(ExitUseAllowed, LoopEnd);
1209 return;
1210 }
1211
1212 if (!isPredictableTrue(Node))
1213 LoopStart = needPrefix(NeedEmpty: true).first;
1214
1215 LoopEnd = Loops[Node->getEntry()];
1216 wireFlow(ExitUseAllowed: false, LoopEnd);
1217 while (!Visited.count(Ptr: LoopEnd)) {
1218 handleLoops(ExitUseAllowed: false, LoopEnd);
1219 }
1220
1221 assert(LoopStart != &LoopStart->getParent()->getEntryBlock());
1222
1223 // Create an extra loop end node
1224 DebugLoc DL;
1225 std::tie(args&: LoopEnd, args&: DL) = needPrefix(NeedEmpty: false);
1226 BasicBlock *Next = needPostfix(Flow: LoopEnd, ExitUseAllowed);
1227 CondBrInst *Br = CondBrInst::Create(Cond: BoolPoison, IfTrue: Next, IfFalse: LoopStart, InsertBefore: LoopEnd);
1228 Br->setDebugLoc(DL);
1229 LoopConds.push_back(Elt: Br);
1230 addPhiValues(From: LoopEnd, To: LoopStart);
1231 setPrevNode(Next);
1232}
1233
1234/// After this function control flow looks like it should be, but
1235/// branches and PHI nodes only have undefined conditions.
1236void StructurizeCFG::createFlow() {
1237 BasicBlock *Exit = ParentRegion->getExit();
1238 bool EntryDominatesExit = DT->dominates(A: ParentRegion->getEntry(), B: Exit);
1239
1240 AffectedPhis.clear();
1241 DeletedPhis.clear();
1242 AddedPhis.clear();
1243 Conditions.clear();
1244 LoopConds.clear();
1245
1246 PrevNode = nullptr;
1247 Visited.clear();
1248
1249 while (!Order.empty()) {
1250 handleLoops(ExitUseAllowed: EntryDominatesExit, LoopEnd: nullptr);
1251 }
1252
1253 if (PrevNode)
1254 changeExit(Node: PrevNode, NewExit: Exit, IncludeDominator: EntryDominatesExit);
1255 else
1256 assert(EntryDominatesExit);
1257}
1258
1259/// Handle a rare case where the disintegrated nodes instructions
1260/// no longer dominate all their uses. Not sure if this is really necessary
1261void StructurizeCFG::rebuildSSA() {
1262 SSAUpdater Updater;
1263 for (BasicBlock *BB : ParentRegion->blocks())
1264 for (Instruction &I : *BB) {
1265 bool Initialized = false;
1266 // We may modify the use list as we iterate over it, so we use
1267 // make_early_inc_range.
1268 for (Use &U : llvm::make_early_inc_range(Range: I.uses())) {
1269 Instruction *User = cast<Instruction>(Val: U.getUser());
1270 if (User->getParent() == BB) {
1271 continue;
1272 } else if (PHINode *UserPN = dyn_cast<PHINode>(Val: User)) {
1273 if (UserPN->getIncomingBlock(U) == BB)
1274 continue;
1275 }
1276
1277 if (DT->dominates(Def: &I, User))
1278 continue;
1279
1280 if (!Initialized) {
1281 Value *Poison = PoisonValue::get(T: I.getType());
1282 Updater.Initialize(Ty: I.getType(), Name: "");
1283 Updater.AddAvailableValue(BB: &Func->getEntryBlock(), V: Poison);
1284 Updater.AddAvailableValue(BB, V: &I);
1285 Initialized = true;
1286 }
1287 Updater.RewriteUseAfterInsertions(U);
1288 }
1289 }
1290}
1291
1292static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
1293 const UniformityInfo &UA) {
1294 // Bool for if all sub-regions are uniform.
1295 bool SubRegionsAreUniform = true;
1296 // Count of how many direct children are conditional.
1297 unsigned ConditionalDirectChildren = 0;
1298
1299 for (auto *E : R->elements()) {
1300 if (!E->isSubRegion()) {
1301 auto Br = dyn_cast<CondBrInst>(Val: E->getEntry()->getTerminator());
1302 if (!Br)
1303 continue;
1304
1305 if (!UA.isUniform(I: Br))
1306 return false;
1307
1308 // One of our direct children is conditional.
1309 ConditionalDirectChildren++;
1310
1311 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
1312 << " has uniform terminator\n");
1313 } else {
1314 // Explicitly refuse to treat regions as uniform if they have non-uniform
1315 // subregions. We cannot rely on UniformityAnalysis for branches in
1316 // subregions because those branches may have been removed and re-created,
1317 // so we look for our metadata instead.
1318 //
1319 // Warning: It would be nice to treat regions as uniform based only on
1320 // their direct child basic blocks' terminators, regardless of whether
1321 // subregions are uniform or not. However, this requires a very careful
1322 // look at SIAnnotateControlFlow to make sure nothing breaks there.
1323 for (auto *BB : E->getNodeAs<Region>()->blocks()) {
1324 auto Br = dyn_cast<CondBrInst>(Val: BB->getTerminator());
1325 if (!Br)
1326 continue;
1327
1328 if (!Br->getMetadata(KindID: UniformMDKindID)) {
1329 // Early exit if we cannot have relaxed uniform regions.
1330 if (!RelaxedUniformRegions)
1331 return false;
1332
1333 SubRegionsAreUniform = false;
1334 break;
1335 }
1336 }
1337 }
1338 }
1339
1340 // Our region is uniform if:
1341 // 1. All conditional branches that are direct children are uniform (checked
1342 // above).
1343 // 2. And either:
1344 // a. All sub-regions are uniform.
1345 // b. There is one or less conditional branches among the direct children.
1346 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
1347}
1348
1349void StructurizeCFG::init(Region *R) {
1350 LLVMContext &Context = R->getEntry()->getContext();
1351
1352 Boolean = Type::getInt1Ty(C&: Context);
1353 BoolTrue = ConstantInt::getTrue(Context);
1354 BoolFalse = ConstantInt::getFalse(Context);
1355 BoolPoison = PoisonValue::get(T: Boolean);
1356
1357 this->UA = nullptr;
1358}
1359
1360bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
1361 if (R->isTopLevelRegion())
1362 return false;
1363
1364 this->UA = &UA;
1365
1366 // TODO: We could probably be smarter here with how we handle sub-regions.
1367 // We currently rely on the fact that metadata is set by earlier invocations
1368 // of the pass on sub-regions, and that this metadata doesn't get lost --
1369 // but we shouldn't rely on metadata for correctness!
1370 unsigned UniformMDKindID =
1371 R->getEntry()->getContext().getMDKindID(Name: "structurizecfg.uniform");
1372
1373 if (hasOnlyUniformBranches(R, UniformMDKindID, UA)) {
1374 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
1375 << '\n');
1376
1377 // Mark all direct child block terminators as having been treated as
1378 // uniform. To account for a possible future in which non-uniform
1379 // sub-regions are treated more cleverly, indirect children are not
1380 // marked as uniform.
1381 MDNode *MD = MDNode::get(Context&: R->getEntry()->getParent()->getContext(), MDs: {});
1382 for (RegionNode *E : R->elements()) {
1383 if (E->isSubRegion())
1384 continue;
1385
1386 if (Instruction *Term = E->getEntry()->getTerminator())
1387 Term->setMetadata(KindID: UniformMDKindID, Node: MD);
1388 }
1389
1390 return true;
1391 }
1392 return false;
1393}
1394
1395/// Run the transformation for each region found
1396bool StructurizeCFG::run(Region *R, DominatorTree *DT,
1397 const TargetTransformInfo *TTI) {
1398 // CallBr and its corresponding direct target blocks are for now ignored by
1399 // this pass. This is not a limitation for the currently intended uses cases
1400 // of callbr in the AMDGPU backend.
1401 // Parent and child regions are not affected by this (current) restriction.
1402 // See `llvm/test/Transforms/StructurizeCFG/callbr.ll` for details.
1403 if (R->isTopLevelRegion() || isa<CallBrInst>(Val: R->getEntry()->getTerminator()))
1404 return false;
1405
1406 this->DT = DT;
1407 this->TTI = TTI;
1408 Func = R->getEntry()->getParent();
1409
1410 ParentRegion = R;
1411
1412 orderNodes();
1413 collectInfos();
1414 createFlow();
1415
1416 SSAUpdaterBulk PhiInserter;
1417 insertConditions(Loops: false, PhiInserter);
1418 insertConditions(Loops: true, PhiInserter);
1419 PhiInserter.RewriteAndOptimizeAllUses(DT&: *DT);
1420
1421 setPhiValues();
1422 simplifyHoistedPhis();
1423 simplifyConditions();
1424 simplifyAffectedPhis();
1425 rebuildSSA();
1426
1427 // Cleanup
1428 Order.clear();
1429 Visited.clear();
1430 DeletedPhis.clear();
1431 AddedPhis.clear();
1432 Predicates.clear();
1433 Conditions.clear();
1434 Loops.clear();
1435 LoopPreds.clear();
1436 LoopConds.clear();
1437 FlowSet.clear();
1438
1439 return true;
1440}
1441
1442Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) {
1443 return new StructurizeCFGLegacyPass(SkipUniformRegions);
1444}
1445
1446static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) {
1447 Regions.push_back(x: &R);
1448 for (const auto &E : R)
1449 addRegionIntoQueue(R&: *E, Regions);
1450}
1451
1452StructurizeCFGPass::StructurizeCFGPass(bool SkipUniformRegions_)
1453 : SkipUniformRegions(SkipUniformRegions_) {
1454 if (ForceSkipUniformRegions.getNumOccurrences())
1455 SkipUniformRegions = ForceSkipUniformRegions.getValue();
1456}
1457
1458void StructurizeCFGPass::printPipeline(
1459 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1460 static_cast<PassInfoMixin<StructurizeCFGPass> *>(this)->printPipeline(
1461 OS, MapClassName2PassName);
1462 if (SkipUniformRegions)
1463 OS << "<skip-uniform-regions>";
1464}
1465
1466PreservedAnalyses StructurizeCFGPass::run(Function &F,
1467 FunctionAnalysisManager &AM) {
1468
1469 bool Changed = false;
1470 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(IR&: F);
1471 auto &RI = AM.getResult<RegionInfoAnalysis>(IR&: F);
1472 TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(IR&: F);
1473 UniformityInfo *UI = nullptr;
1474 if (SkipUniformRegions)
1475 UI = &AM.getResult<UniformityInfoAnalysis>(IR&: F);
1476
1477 std::vector<Region *> Regions;
1478 addRegionIntoQueue(R&: *RI.getTopLevelRegion(), Regions);
1479 while (!Regions.empty()) {
1480 Region *R = Regions.back();
1481 Regions.pop_back();
1482
1483 StructurizeCFG SCFG;
1484 SCFG.init(R);
1485
1486 if (SkipUniformRegions && SCFG.makeUniformRegion(R, UA&: *UI)) {
1487 Changed = true; // May have added metadata.
1488 continue;
1489 }
1490
1491 Changed |= SCFG.run(R, DT, TTI);
1492 }
1493 if (!Changed)
1494 return PreservedAnalyses::all();
1495 PreservedAnalyses PA;
1496 PA.preserve<DominatorTreeAnalysis>();
1497 return PA;
1498}
1499