1//===- StructurizeCFG.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/Transforms/Scalar/StructurizeCFG.h"
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/EquivalenceClasses.h"
12#include "llvm/ADT/MapVector.h"
13#include "llvm/ADT/SCCIterator.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/ADT/SmallPtrSet.h"
16#include "llvm/ADT/SmallVector.h"
17#include "llvm/Analysis/InstructionSimplify.h"
18#include "llvm/Analysis/RegionInfo.h"
19#include "llvm/Analysis/RegionIterator.h"
20#include "llvm/Analysis/RegionPass.h"
21#include "llvm/Analysis/TargetTransformInfo.h"
22#include "llvm/Analysis/UniformityAnalysis.h"
23#include "llvm/IR/BasicBlock.h"
24#include "llvm/IR/CFG.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/Dominators.h"
27#include "llvm/IR/Function.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
30#include "llvm/IR/Instructions.h"
31#include "llvm/IR/Metadata.h"
32#include "llvm/IR/PassManager.h"
33#include "llvm/IR/PatternMatch.h"
34#include "llvm/IR/ProfDataUtils.h"
35#include "llvm/IR/Type.h"
36#include "llvm/IR/Use.h"
37#include "llvm/IR/Value.h"
38#include "llvm/IR/ValueHandle.h"
39#include "llvm/InitializePasses.h"
40#include "llvm/Pass.h"
41#include "llvm/Support/Casting.h"
42#include "llvm/Support/CommandLine.h"
43#include "llvm/Support/Debug.h"
44#include "llvm/Support/raw_ostream.h"
45#include "llvm/Transforms/Scalar.h"
46#include "llvm/Transforms/Utils.h"
47#include "llvm/Transforms/Utils/BasicBlockUtils.h"
48#include "llvm/Transforms/Utils/Local.h"
49#include "llvm/Transforms/Utils/SSAUpdater.h"
50#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
51#include <cassert>
52#include <utility>
53
54using namespace llvm;
55using namespace llvm::PatternMatch;
56
57#define DEBUG_TYPE "structurizecfg"
58
59// The name for newly created blocks.
60const char FlowBlockName[] = "Flow";
61
62namespace {
63
64static cl::opt<bool> ForceSkipUniformRegions(
65 "structurizecfg-skip-uniform-regions",
66 cl::Hidden,
67 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
68 cl::init(Val: false));
69
70static cl::opt<bool>
71 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
72 cl::desc("Allow relaxed uniform region checks"),
73 cl::init(Val: true));
74
75// Definition of the complex types used in this pass.
76
77using BBValuePair = std::pair<BasicBlock *, Value *>;
78
79using RNVector = SmallVector<RegionNode *, 8>;
80using BBVector = SmallVector<BasicBlock *, 8>;
81using BranchVector = SmallVector<BranchInst *, 8>;
82using BBValueVector = SmallVector<BBValuePair, 2>;
83
84using BBSet = SmallPtrSet<BasicBlock *, 8>;
85
86using PhiMap = MapVector<PHINode *, BBValueVector>;
87using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;
88
89using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
90
91using MaybeCondBranchWeights = std::optional<class CondBranchWeights>;
92
93class CondBranchWeights {
94 uint32_t TrueWeight;
95 uint32_t FalseWeight;
96
97 CondBranchWeights(uint32_t T, uint32_t F) : TrueWeight(T), FalseWeight(F) {}
98
99public:
100 static MaybeCondBranchWeights tryParse(const BranchInst &Br) {
101 assert(Br.isConditional());
102
103 uint64_t T, F;
104 if (!extractBranchWeights(I: Br, TrueVal&: T, FalseVal&: F))
105 return std::nullopt;
106
107 return CondBranchWeights(T, F);
108 }
109
110 static void setMetadata(BranchInst &Br,
111 const MaybeCondBranchWeights &Weights) {
112 assert(Br.isConditional());
113 if (!Weights)
114 return;
115 uint32_t Arr[] = {Weights->TrueWeight, Weights->FalseWeight};
116 setBranchWeights(I&: Br, Weights: Arr, IsExpected: false);
117 }
118
119 CondBranchWeights invert() const {
120 return CondBranchWeights{FalseWeight, TrueWeight};
121 }
122};
123
124struct PredInfo {
125 Value *Pred;
126 MaybeCondBranchWeights Weights;
127};
128
129using BBPredicates = DenseMap<BasicBlock *, PredInfo>;
130using PredMap = DenseMap<BasicBlock *, BBPredicates>;
131using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
132using Val2BBMap = DenseMap<Value *, BasicBlock *>;
133
134// A traits type that is intended to be used in graph algorithms. The graph
135// traits starts at an entry node, and traverses the RegionNodes that are in
136// the Nodes set.
137struct SubGraphTraits {
138 using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>;
139 using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType;
140
141 // This wraps a set of Nodes into the iterator, so we know which edges to
142 // filter out.
143 class WrappedSuccIterator
144 : public iterator_adaptor_base<
145 WrappedSuccIterator, BaseSuccIterator,
146 std::iterator_traits<BaseSuccIterator>::iterator_category, NodeRef,
147 std::ptrdiff_t, NodeRef *, NodeRef> {
148 SmallDenseSet<RegionNode *> *Nodes;
149
150 public:
151 WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes)
152 : iterator_adaptor_base(It), Nodes(Nodes) {}
153
154 NodeRef operator*() const { return {*I, Nodes}; }
155 };
156
157 static bool filterAll(const NodeRef &N) { return true; }
158 static bool filterSet(const NodeRef &N) { return N.second->count(V: N.first); }
159
160 using ChildIteratorType =
161 filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>;
162
163 static NodeRef getEntryNode(Region *R) {
164 return {GraphTraits<Region *>::getEntryNode(R), nullptr};
165 }
166
167 static NodeRef getEntryNode(NodeRef N) { return N; }
168
169 static iterator_range<ChildIteratorType> children(const NodeRef &N) {
170 auto *filter = N.second ? &filterSet : &filterAll;
171 return make_filter_range(
172 Range: make_range<WrappedSuccIterator>(
173 x: {GraphTraits<RegionNode *>::child_begin(N: N.first), N.second},
174 y: {GraphTraits<RegionNode *>::child_end(N: N.first), N.second}),
175 Pred: filter);
176 }
177
178 static ChildIteratorType child_begin(const NodeRef &N) {
179 return children(N).begin();
180 }
181
182 static ChildIteratorType child_end(const NodeRef &N) {
183 return children(N).end();
184 }
185};
186
187/// Finds the nearest common dominator of a set of BasicBlocks.
188///
189/// For every BB you add to the set, you can specify whether we "remember" the
190/// block. When you get the common dominator, you can also ask whether it's one
191/// of the blocks we remembered.
192class NearestCommonDominator {
193 DominatorTree *DT;
194 BasicBlock *Result = nullptr;
195 bool ResultIsRemembered = false;
196
197 /// Add BB to the resulting dominator.
198 void addBlock(BasicBlock *BB, bool Remember) {
199 if (!Result) {
200 Result = BB;
201 ResultIsRemembered = Remember;
202 return;
203 }
204
205 BasicBlock *NewResult = DT->findNearestCommonDominator(A: Result, B: BB);
206 if (NewResult != Result)
207 ResultIsRemembered = false;
208 if (NewResult == BB)
209 ResultIsRemembered |= Remember;
210 Result = NewResult;
211 }
212
213public:
214 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {}
215
216 void addBlock(BasicBlock *BB) {
217 addBlock(BB, /* Remember = */ false);
218 }
219
220 void addAndRememberBlock(BasicBlock *BB) {
221 addBlock(BB, /* Remember = */ true);
222 }
223
224 /// Get the nearest common dominator of all the BBs added via addBlock() and
225 /// addAndRememberBlock().
226 BasicBlock *result() { return Result; }
227
228 /// Is the BB returned by getResult() one of the blocks we added to the set
229 /// with addAndRememberBlock()?
230 bool resultIsRememberedBlock() { return ResultIsRemembered; }
231};
232
233/// Transforms the control flow graph on one single entry/exit region
234/// at a time.
235///
236/// After the transform all "If"/"Then"/"Else" style control flow looks like
237/// this:
238///
239/// \verbatim
240/// 1
241/// ||
242/// | |
243/// 2 |
244/// | /
245/// |/
246/// 3
247/// || Where:
248/// | | 1 = "If" block, calculates the condition
249/// 4 | 2 = "Then" subregion, runs if the condition is true
250/// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
251/// |/ 4 = "Else" optional subregion, runs if the condition is false
252/// 5 5 = "End" block, also rejoins the control flow
253/// \endverbatim
254///
255/// Control flow is expressed as a branch where the true exit goes into the
256/// "Then"/"Else" region, while the false exit skips the region
257/// The condition for the optional "Else" region is expressed as a PHI node.
258/// The incoming values of the PHI node are true for the "If" edge and false
259/// for the "Then" edge.
260///
261/// Additionally to that even complicated loops look like this:
262///
263/// \verbatim
264/// 1
265/// ||
266/// | |
267/// 2 ^ Where:
268/// | / 1 = "Entry" block
269/// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
270/// 3 3 = "Flow" block, with back edge to entry block
271/// |
272/// \endverbatim
273///
274/// The back edge of the "Flow" block is always on the false side of the branch
275/// while the true side continues the general flow. So the loop condition
276/// consist of a network of PHI nodes where the true incoming values expresses
277/// breaks and the false values expresses continue states.
278
279class StructurizeCFG {
280 Type *Boolean;
281 ConstantInt *BoolTrue;
282 ConstantInt *BoolFalse;
283 Value *BoolPoison;
284 const TargetTransformInfo *TTI;
285 Function *Func;
286 Region *ParentRegion;
287
288 UniformityInfo *UA = nullptr;
289 DominatorTree *DT;
290
291 SmallVector<RegionNode *, 8> Order;
292 BBSet Visited;
293 BBSet FlowSet;
294
295 SmallVector<WeakVH, 8> AffectedPhis;
296 BBPhiMap DeletedPhis;
297 BB2BBVecMap AddedPhis;
298
299 PredMap Predicates;
300 BranchVector Conditions;
301
302 BB2BBMap Loops;
303 PredMap LoopPreds;
304 BranchVector LoopConds;
305
306 Val2BBMap HoistedValues;
307
308 RegionNode *PrevNode;
309
310 void hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB, BasicBlock *ThenBB);
311
312 bool isHoistableInstruction(Instruction *I, BasicBlock *BB,
313 BasicBlock *HoistTo);
314
315 void orderNodes();
316
317 void analyzeLoops(RegionNode *N);
318
319 PredInfo buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
320
321 void gatherPredicates(RegionNode *N);
322
323 void collectInfos();
324
325 void insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter);
326
327 void simplifyConditions();
328
329 void delPhiValues(BasicBlock *From, BasicBlock *To);
330
331 void addPhiValues(BasicBlock *From, BasicBlock *To);
332
333 void findUndefBlocks(BasicBlock *PHIBlock,
334 const SmallPtrSet<BasicBlock *, 8> &Incomings,
335 SmallVector<BasicBlock *> &UndefBlks) const;
336
337 void mergeIfCompatible(EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A,
338 PHINode *B);
339
340 void setPhiValues();
341
342 void simplifyAffectedPhis();
343
344 void simplifyHoistedPhis();
345
346 DebugLoc killTerminator(BasicBlock *BB);
347
348 void changeExit(RegionNode *Node, BasicBlock *NewExit,
349 bool IncludeDominator);
350
351 BasicBlock *getNextFlow(BasicBlock *Dominator);
352
353 std::pair<BasicBlock *, DebugLoc> needPrefix(bool NeedEmpty);
354
355 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
356
357 void setPrevNode(BasicBlock *BB);
358
359 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
360
361 bool isPredictableTrue(RegionNode *Node);
362
363 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
364
365 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
366
367 void createFlow();
368
369 void rebuildSSA();
370
371public:
372 void init(Region *R);
373 bool run(Region *R, DominatorTree *DT, const TargetTransformInfo *TTI);
374 bool makeUniformRegion(Region *R, UniformityInfo &UA);
375};
376
377class StructurizeCFGLegacyPass : public RegionPass {
378 bool SkipUniformRegions;
379
380public:
381 static char ID;
382
383 explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false)
384 : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) {
385 if (ForceSkipUniformRegions.getNumOccurrences())
386 SkipUniformRegions = ForceSkipUniformRegions.getValue();
387 initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry());
388 }
389
390 bool runOnRegion(Region *R, RGPassManager &RGM) override {
391 StructurizeCFG SCFG;
392 SCFG.init(R);
393 if (SkipUniformRegions) {
394 UniformityInfo &UA =
395 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
396 if (SCFG.makeUniformRegion(R, UA))
397 return false;
398 }
399 Function *F = R->getEntry()->getParent();
400 const TargetTransformInfo *TTI =
401 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F: *F);
402 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
403 return SCFG.run(R, DT, TTI);
404 }
405
406 StringRef getPassName() const override { return "Structurize control flow"; }
407
408 void getAnalysisUsage(AnalysisUsage &AU) const override {
409 if (SkipUniformRegions)
410 AU.addRequired<UniformityInfoWrapperPass>();
411 AU.addRequired<TargetTransformInfoWrapperPass>();
412 AU.addRequired<DominatorTreeWrapperPass>();
413 AU.addRequired<TargetTransformInfoWrapperPass>();
414
415 AU.addPreserved<DominatorTreeWrapperPass>();
416 RegionPass::getAnalysisUsage(AU);
417 }
418};
419
420} // end anonymous namespace
421
422char StructurizeCFGLegacyPass::ID = 0;
423
424INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
425 "Structurize the CFG", false, false)
426INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
427INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
428INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
429INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
430 "Structurize the CFG", false, false)
431
432/// Checks whether an instruction is zero cost instruction and checks if the
433/// operands are from different BB. If so, this instruction can be coalesced
434/// if its hoisted to predecessor block. So, this returns true.
435bool StructurizeCFG::isHoistableInstruction(Instruction *I, BasicBlock *BB,
436 BasicBlock *HoistTo) {
437 if (I->getParent() != BB || isa<PHINode>(Val: I))
438 return false;
439
440 // If the instruction is not a zero cost instruction, return false.
441 auto Cost = TTI->getInstructionCost(U: I, CostKind: TargetTransformInfo::TCK_Latency);
442 InstructionCost::CostType CostVal =
443 Cost.isValid()
444 ? Cost.getValue()
445 : (InstructionCost::CostType)TargetTransformInfo::TCC_Expensive;
446 if (CostVal != 0)
447 return false;
448
449 // Check if all operands are available at the hoisting destination.
450 for (auto &Op : I->operands()) {
451 if (auto *OpI = dyn_cast<Instruction>(Val&: Op)) {
452 // Operand must dominate the hoisting destination.
453 if (!DT->dominates(A: OpI->getParent(), B: HoistTo))
454 return false;
455 }
456 }
457
458 return true;
459}
460
461/// Structurization can introduce unnecessary VGPR copies due to register
462/// coalescing interference. For example, if the Else block has a zero-cost
463/// instruction and the Then block modifies the VGPR value, only one value is
464/// live at a time in merge block before structurization. After structurization,
465/// the coalescer may incorrectly treat the Then value as live in the Else block
466/// (via the path Then → Flow → Else), leading to unnecessary VGPR copies.
467///
468/// This function examines phi nodes whose incoming values are zero-cost
469/// instructions in the Else block. It identifies such values that can be safely
470/// hoisted and moves them to the nearest common dominator of Then and Else
471/// blocks. A follow-up function after setting PhiNodes assigns the hoisted
472/// value to poison phi nodes along the if→flow edge, aiding register coalescing
473/// and minimizing unnecessary live ranges.
474void StructurizeCFG::hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB,
475 BasicBlock *ThenBB) {
476
477 BasicBlock *ElseSucc = ElseBB->getSingleSuccessor();
478 BasicBlock *CommonDominator = DT->findNearestCommonDominator(A: ElseBB, B: ThenBB);
479
480 if (!ElseSucc || !CommonDominator)
481 return;
482 Instruction *Term = CommonDominator->getTerminator();
483 for (PHINode &Phi : ElseSucc->phis()) {
484 Value *ElseVal = Phi.getIncomingValueForBlock(BB: ElseBB);
485 auto *Inst = dyn_cast<Instruction>(Val: ElseVal);
486 if (!Inst || !isHoistableInstruction(I: Inst, BB: ElseBB, HoistTo: CommonDominator))
487 continue;
488 Inst->removeFromParent();
489 Inst->insertInto(ParentBB: CommonDominator, It: Term->getIterator());
490 HoistedValues[Inst] = CommonDominator;
491 }
492}
493
494/// Build up the general order of nodes, by performing a topological sort of the
495/// parent region's nodes, while ensuring that there is no outer cycle node
496/// between any two inner cycle nodes.
497void StructurizeCFG::orderNodes() {
498 Order.resize(N: std::distance(first: GraphTraits<Region *>::nodes_begin(R: ParentRegion),
499 last: GraphTraits<Region *>::nodes_end(R: ParentRegion)));
500 if (Order.empty())
501 return;
502
503 SmallDenseSet<RegionNode *> Nodes;
504 auto EntryNode = SubGraphTraits::getEntryNode(R: ParentRegion);
505
506 // A list of range indices of SCCs in Order, to be processed.
507 SmallVector<std::pair<unsigned, unsigned>, 8> WorkList;
508 unsigned I = 0, E = Order.size();
509 while (true) {
510 // Run through all the SCCs in the subgraph starting with Entry.
511 for (auto SCCI =
512 scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin(
513 G: EntryNode);
514 !SCCI.isAtEnd(); ++SCCI) {
515 auto &SCC = *SCCI;
516
517 // An SCC up to the size of 2, can be reduced to an entry (the last node),
518 // and a possible additional node. Therefore, it is already in order, and
519 // there is no need to add it to the work-list.
520 unsigned Size = SCC.size();
521 if (Size > 2)
522 WorkList.emplace_back(Args&: I, Args: I + Size);
523
524 // Add the SCC nodes to the Order array.
525 for (const auto &N : SCC) {
526 assert(I < E && "SCC size mismatch!");
527 Order[I++] = N.first;
528 }
529 }
530 assert(I == E && "SCC size mismatch!");
531
532 // If there are no more SCCs to order, then we are done.
533 if (WorkList.empty())
534 break;
535
536 std::tie(args&: I, args&: E) = WorkList.pop_back_val();
537
538 // Collect the set of nodes in the SCC's subgraph. These are only the
539 // possible child nodes; we do not add the entry (last node) otherwise we
540 // will have the same exact SCC all over again.
541 Nodes.clear();
542 Nodes.insert(I: Order.begin() + I, E: Order.begin() + E - 1);
543
544 // Update the entry node.
545 EntryNode.first = Order[E - 1];
546 EntryNode.second = &Nodes;
547 }
548}
549
550/// Determine the end of the loops
551void StructurizeCFG::analyzeLoops(RegionNode *N) {
552 if (N->isSubRegion()) {
553 // Test for exit as back edge
554 BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
555 if (Visited.count(Ptr: Exit))
556 Loops[Exit] = N->getEntry();
557
558 } else {
559 // Test for successors as back edge
560 BasicBlock *BB = N->getNodeAs<BasicBlock>();
561 if (BranchInst *Term = dyn_cast<BranchInst>(Val: BB->getTerminator()))
562 for (BasicBlock *Succ : Term->successors())
563 if (Visited.count(Ptr: Succ))
564 Loops[Succ] = BB;
565 }
566}
567
568/// Build the condition for one edge
569PredInfo StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
570 bool Invert) {
571 Value *Cond = Invert ? BoolFalse : BoolTrue;
572 MaybeCondBranchWeights Weights;
573
574 if (Term->isConditional()) {
575 Cond = Term->getCondition();
576 Weights = CondBranchWeights::tryParse(Br: *Term);
577
578 if (Idx != (unsigned)Invert) {
579 Cond = invertCondition(Condition: Cond);
580 if (Weights)
581 Weights = Weights->invert();
582 }
583 }
584 return {.Pred: Cond, .Weights: Weights};
585}
586
587/// Analyze the predecessors of each block and build up predicates
588void StructurizeCFG::gatherPredicates(RegionNode *N) {
589 RegionInfo *RI = ParentRegion->getRegionInfo();
590 BasicBlock *BB = N->getEntry();
591 BBPredicates &Pred = Predicates[BB];
592 BBPredicates &LPred = LoopPreds[BB];
593
594 for (BasicBlock *P : predecessors(BB)) {
595 // Ignore it if it's a branch from outside into our region entry
596 if (!ParentRegion->contains(BB: P) || !dyn_cast<BranchInst>(Val: P->getTerminator()))
597 continue;
598
599 Region *R = RI->getRegionFor(BB: P);
600 if (R == ParentRegion) {
601 // It's a top level block in our region
602 BranchInst *Term = cast<BranchInst>(Val: P->getTerminator());
603 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
604 BasicBlock *Succ = Term->getSuccessor(i);
605 if (Succ != BB)
606 continue;
607
608 if (Visited.count(Ptr: P)) {
609 // Normal forward edge
610 if (Term->isConditional()) {
611 // Try to treat it like an ELSE block
612 BasicBlock *Other = Term->getSuccessor(i: !i);
613 if (Visited.count(Ptr: Other) && !Loops.count(Val: Other) &&
614 !Pred.count(Val: Other) && !Pred.count(Val: P)) {
615 hoistZeroCostElseBlockPhiValues(ElseBB: Succ, ThenBB: Other);
616 Pred[Other] = {.Pred: BoolFalse, .Weights: std::nullopt};
617 Pred[P] = {.Pred: BoolTrue, .Weights: std::nullopt};
618 continue;
619 }
620 }
621 Pred[P] = buildCondition(Term, Idx: i, Invert: false);
622 } else {
623 // Back edge
624 LPred[P] = buildCondition(Term, Idx: i, Invert: true);
625 }
626 }
627 } else {
628 // It's an exit from a sub region
629 while (R->getParent() != ParentRegion)
630 R = R->getParent();
631
632 // Edge from inside a subregion to its entry, ignore it
633 if (*R == *N)
634 continue;
635
636 BasicBlock *Entry = R->getEntry();
637 if (Visited.count(Ptr: Entry))
638 Pred[Entry] = {.Pred: BoolTrue, .Weights: std::nullopt};
639 else
640 LPred[Entry] = {.Pred: BoolFalse, .Weights: std::nullopt};
641 }
642 }
643}
644
645/// Collect various loop and predicate infos
646void StructurizeCFG::collectInfos() {
647 // Reset predicate
648 Predicates.clear();
649
650 // and loop infos
651 Loops.clear();
652 LoopPreds.clear();
653
654 // Reset the visited nodes
655 Visited.clear();
656
657 for (RegionNode *RN : reverse(C&: Order)) {
658 LLVM_DEBUG(dbgs() << "Visiting: "
659 << (RN->isSubRegion() ? "SubRegion with entry: " : "")
660 << RN->getEntry()->getName() << "\n");
661
662 // Analyze all the conditions leading to a node
663 gatherPredicates(N: RN);
664
665 // Remember that we've seen this node
666 Visited.insert(Ptr: RN->getEntry());
667
668 // Find the last back edges
669 analyzeLoops(N: RN);
670 }
671}
672
673/// Insert the missing branch conditions
674void StructurizeCFG::insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter) {
675 BranchVector &Conds = Loops ? LoopConds : Conditions;
676 Value *Default = Loops ? BoolTrue : BoolFalse;
677
678 for (BranchInst *Term : Conds) {
679 assert(Term->isConditional());
680
681 BasicBlock *Parent = Term->getParent();
682 BasicBlock *SuccTrue = Term->getSuccessor(i: 0);
683 BasicBlock *SuccFalse = Term->getSuccessor(i: 1);
684
685 unsigned Variable = PhiInserter.AddVariable(Name: "", Ty: Boolean);
686 PhiInserter.AddAvailableValue(Var: Variable, BB: Loops ? SuccFalse : Parent,
687 V: Default);
688
689 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
690
691 NearestCommonDominator Dominator(DT);
692 Dominator.addBlock(BB: Parent);
693
694 PredInfo ParentInfo{.Pred: nullptr, .Weights: std::nullopt};
695 for (auto [BB, PI] : Preds) {
696 if (BB == Parent) {
697 ParentInfo = PI;
698 break;
699 }
700 PhiInserter.AddAvailableValue(Var: Variable, BB, V: PI.Pred);
701 Dominator.addAndRememberBlock(BB);
702 }
703
704 if (ParentInfo.Pred) {
705 Term->setCondition(ParentInfo.Pred);
706 CondBranchWeights::setMetadata(Br&: *Term, Weights: ParentInfo.Weights);
707 } else {
708 if (!Dominator.resultIsRememberedBlock())
709 PhiInserter.AddAvailableValue(Var: Variable, BB: Dominator.result(), V: Default);
710
711 PhiInserter.AddUse(Var: Variable, U: &Term->getOperandUse(i: 0));
712 }
713 }
714}
715
716/// Simplify any inverted conditions that were built by buildConditions.
717void StructurizeCFG::simplifyConditions() {
718 SmallVector<Instruction *> InstToErase;
719 for (auto &I : concat<PredMap::value_type>(Ranges&: Predicates, Ranges&: LoopPreds)) {
720 auto &Preds = I.second;
721 for (auto [BB, PI] : Preds) {
722 Instruction *Inverted;
723 if (match(V: PI.Pred, P: m_Not(V: m_OneUse(SubPattern: m_Instruction(I&: Inverted)))) &&
724 !PI.Pred->use_empty()) {
725 if (auto *InvertedCmp = dyn_cast<CmpInst>(Val: Inverted)) {
726 InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
727 PI.Pred->replaceAllUsesWith(V: InvertedCmp);
728 InstToErase.push_back(Elt: cast<Instruction>(Val: PI.Pred));
729 }
730 }
731 }
732 }
733 for (auto *I : InstToErase)
734 I->eraseFromParent();
735}
736
737/// Remove all PHI values coming from "From" into "To" and remember
738/// them in DeletedPhis
739void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
740 PhiMap &Map = DeletedPhis[To];
741 for (PHINode &Phi : To->phis()) {
742 bool Recorded = false;
743 while (Phi.getBasicBlockIndex(BB: From) != -1) {
744 Value *Deleted = Phi.removeIncomingValue(BB: From, DeletePHIIfEmpty: false);
745 Map[&Phi].push_back(Elt: std::make_pair(x&: From, y&: Deleted));
746 if (!Recorded) {
747 AffectedPhis.push_back(Elt: &Phi);
748 Recorded = true;
749 }
750 }
751 }
752}
753
754/// Add a dummy PHI value as soon as we knew the new predecessor
755void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
756 for (PHINode &Phi : To->phis()) {
757 Value *Poison = PoisonValue::get(T: Phi.getType());
758 Phi.addIncoming(V: Poison, BB: From);
759 }
760 AddedPhis[To].push_back(Elt: From);
761}
762
763/// When we are reconstructing a PHI inside \p PHIBlock with incoming values
764/// from predecessors \p Incomings, we have a chance to mark the available value
765/// from some blocks as undefined. The function will find out all such blocks
766/// and return in \p UndefBlks.
767void StructurizeCFG::findUndefBlocks(
768 BasicBlock *PHIBlock, const SmallPtrSet<BasicBlock *, 8> &Incomings,
769 SmallVector<BasicBlock *> &UndefBlks) const {
770 // We may get a post-structured CFG like below:
771 //
772 // | P1
773 // |/
774 // F1
775 // |\
776 // | N
777 // |/
778 // F2
779 // |\
780 // | P2
781 // |/
782 // F3
783 // |\
784 // B
785 //
786 // B is the block that has a PHI being reconstructed. P1/P2 are predecessors
787 // of B before structurization. F1/F2/F3 are flow blocks inserted during
788 // structurization process. Block N is not a predecessor of B before
789 // structurization, but are placed between the predecessors(P1/P2) of B after
790 // structurization. This usually means that threads went to N never take the
791 // path N->F2->F3->B. For example, the threads take the branch F1->N may
792 // always take the branch F2->P2. So, when we are reconstructing a PHI
793 // originally in B, we can safely say the incoming value from N is undefined.
794 SmallPtrSet<BasicBlock *, 8> VisitedBlock;
795 SmallVector<BasicBlock *, 8> Stack;
796 if (PHIBlock == ParentRegion->getExit()) {
797 for (auto P : predecessors(BB: PHIBlock)) {
798 if (ParentRegion->contains(BB: P))
799 Stack.push_back(Elt: P);
800 }
801 } else {
802 append_range(C&: Stack, R: predecessors(BB: PHIBlock));
803 }
804
805 // Do a backward traversal over the CFG, and stop further searching if
806 // the block is not a Flow. If a block is neither flow block nor the
807 // incoming predecessor, then the incoming value from the block is
808 // undefined value for the PHI being reconstructed.
809 while (!Stack.empty()) {
810 BasicBlock *Current = Stack.pop_back_val();
811 if (!VisitedBlock.insert(Ptr: Current).second)
812 continue;
813
814 if (FlowSet.contains(Ptr: Current))
815 llvm::append_range(C&: Stack, R: predecessors(BB: Current));
816 else if (!Incomings.contains(Ptr: Current))
817 UndefBlks.push_back(Elt: Current);
818 }
819}
820
821// If two phi nodes have compatible incoming values (for each
822// incoming block, either they have the same incoming value or only one phi
823// node has an incoming value), let them share the merged incoming values. The
824// merge process is guided by the equivalence information from \p PhiClasses.
825// The function will possibly update the incoming values of leader phi in
826// DeletedPhis.
827void StructurizeCFG::mergeIfCompatible(
828 EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A, PHINode *B) {
829 auto ItA = PhiClasses.findLeader(ECV: PhiClasses.insert(Data: A));
830 auto ItB = PhiClasses.findLeader(ECV: PhiClasses.insert(Data: B));
831 // They are already in the same class, no work needed.
832 if (ItA == ItB)
833 return;
834
835 PHINode *LeaderA = *ItA;
836 PHINode *LeaderB = *ItB;
837 BBValueVector &IncomingA = DeletedPhis[LeaderA->getParent()][LeaderA];
838 BBValueVector &IncomingB = DeletedPhis[LeaderB->getParent()][LeaderB];
839
840 DenseMap<BasicBlock *, Value *> Mergeable(IncomingA.begin(), IncomingA.end());
841 for (auto [BB, V] : IncomingB) {
842 auto BBIt = Mergeable.find(Val: BB);
843 if (BBIt != Mergeable.end() && BBIt->second != V)
844 return;
845 // Either IncomingA does not have this value or IncomingA has the same
846 // value.
847 Mergeable.insert(KV: {BB, V});
848 }
849
850 // Update the incoming value of leaderA.
851 IncomingA.assign(in_start: Mergeable.begin(), in_end: Mergeable.end());
852 PhiClasses.unionSets(L1: ItA, L2: ItB);
853}
854
855/// Add the real PHI value as soon as everything is set up
856void StructurizeCFG::setPhiValues() {
857 SmallVector<PHINode *, 8> InsertedPhis;
858 SSAUpdater Updater(&InsertedPhis);
859 DenseMap<BasicBlock *, SmallVector<BasicBlock *>> UndefBlksMap;
860
861 // Find phi nodes that have compatible incoming values (either they have
862 // the same value for the same block or only one phi node has an incoming
863 // value, see example below). We only search again the phi's that are
864 // referenced by another phi, which is the case we care about.
865 //
866 // For example (-- means no incoming value):
867 // phi1 : BB1:phi2 BB2:v BB3:--
868 // phi2: BB1:-- BB2:v BB3:w
869 //
870 // Then we can merge these incoming values and let phi1, phi2 use the
871 // same set of incoming values:
872 //
873 // phi1&phi2: BB1:phi2 BB2:v BB3:w
874 //
875 // By doing this, phi1 and phi2 would share more intermediate phi nodes.
876 // This would help reduce the number of phi nodes during SSA reconstruction
877 // and ultimately result in fewer COPY instructions.
878 //
879 // This should be correct, because if a phi node does not have incoming
880 // value from certain block, this means the block is not the predecessor
881 // of the parent block, so we actually don't care about its incoming value.
882 EquivalenceClasses<PHINode *> PhiClasses;
883 for (const auto &[To, From] : AddedPhis) {
884 auto OldPhiIt = DeletedPhis.find(Val: To);
885 if (OldPhiIt == DeletedPhis.end())
886 continue;
887
888 PhiMap &BlkPhis = OldPhiIt->second;
889 SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
890 SmallPtrSet<BasicBlock *, 8> Incomings;
891
892 // Get the undefined blocks shared by all the phi nodes.
893 if (!BlkPhis.empty()) {
894 Incomings.insert_range(R: llvm::make_first_range(c&: BlkPhis.front().second));
895 findUndefBlocks(PHIBlock: To, Incomings, UndefBlks);
896 }
897
898 for (const auto &[Phi, Incomings] : OldPhiIt->second) {
899 SmallVector<PHINode *> IncomingPHIs;
900 for (const auto &[BB, V] : Incomings) {
901 // First, for each phi, check whether it has incoming value which is
902 // another phi.
903 if (PHINode *P = dyn_cast<PHINode>(Val: V))
904 IncomingPHIs.push_back(Elt: P);
905 }
906
907 for (auto *OtherPhi : IncomingPHIs) {
908 // Skip phis that are unrelated to the phi reconstruction for now.
909 if (!DeletedPhis.contains(Val: OtherPhi->getParent()))
910 continue;
911 mergeIfCompatible(PhiClasses, A: Phi, B: OtherPhi);
912 }
913 }
914 }
915
916 for (const auto &AddedPhi : AddedPhis) {
917 BasicBlock *To = AddedPhi.first;
918 const BBVector &From = AddedPhi.second;
919
920 auto It = DeletedPhis.find(Val: To);
921 if (It == DeletedPhis.end())
922 continue;
923
924 PhiMap &Map = It->second;
925 SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
926 for (const auto &[Phi, Incoming] : Map) {
927 Value *Poison = PoisonValue::get(T: Phi->getType());
928 Updater.Initialize(Ty: Phi->getType(), Name: "");
929 Updater.AddAvailableValue(BB: &Func->getEntryBlock(), V: Poison);
930 Updater.AddAvailableValue(BB: To, V: Poison);
931
932 // Use leader phi's incoming if there is.
933 auto LeaderIt = PhiClasses.findLeader(V: Phi);
934 bool UseIncomingOfLeader =
935 LeaderIt != PhiClasses.member_end() && *LeaderIt != Phi;
936 const auto &IncomingMap =
937 UseIncomingOfLeader ? DeletedPhis[(*LeaderIt)->getParent()][*LeaderIt]
938 : Incoming;
939
940 SmallVector<BasicBlock *> ConstantPreds;
941 for (const auto &[BB, V] : IncomingMap) {
942 Updater.AddAvailableValue(BB, V);
943 if (isa<Constant>(Val: V))
944 ConstantPreds.push_back(Elt: BB);
945 }
946
947 for (auto UB : UndefBlks) {
948 // If this undef block is dominated by any predecessor(before
949 // structurization) of reconstructed PHI with constant incoming value,
950 // don't mark the available value as undefined. Setting undef to such
951 // block will stop us from getting optimal phi insertion.
952 if (any_of(Range&: ConstantPreds,
953 P: [&](BasicBlock *CP) { return DT->dominates(A: CP, B: UB); }))
954 continue;
955 // Maybe already get a value through sharing with other phi nodes.
956 if (Updater.HasValueForBlock(BB: UB))
957 continue;
958
959 Updater.AddAvailableValue(BB: UB, V: Poison);
960 }
961
962 for (BasicBlock *FI : From)
963 Phi->setIncomingValueForBlock(BB: FI, V: Updater.GetValueAtEndOfBlock(BB: FI));
964 AffectedPhis.push_back(Elt: Phi);
965 }
966 }
967
968 AffectedPhis.append(in_start: InsertedPhis.begin(), in_end: InsertedPhis.end());
969}
970
971/// Updates PHI nodes after hoisted zero cost instructions by replacing poison
972/// entries on Flow nodes with the appropriate hoisted values
973void StructurizeCFG::simplifyHoistedPhis() {
974 for (WeakVH VH : AffectedPhis) {
975 PHINode *Phi = dyn_cast_or_null<PHINode>(Val&: VH);
976 if (!Phi || Phi->getNumIncomingValues() != 2)
977 continue;
978
979 for (int i = 0; i < 2; i++) {
980 Value *V = Phi->getIncomingValue(i);
981 auto BBIt = HoistedValues.find(Val: V);
982
983 if (BBIt == HoistedValues.end())
984 continue;
985
986 Value *OtherV = Phi->getIncomingValue(i: !i);
987 PHINode *OtherPhi = dyn_cast<PHINode>(Val: OtherV);
988 if (!OtherPhi)
989 continue;
990
991 int PoisonValBBIdx = -1;
992 for (size_t i = 0; i < OtherPhi->getNumIncomingValues(); i++) {
993 if (!isa<PoisonValue>(Val: OtherPhi->getIncomingValue(i)))
994 continue;
995 PoisonValBBIdx = i;
996 break;
997 }
998 if (PoisonValBBIdx == -1 ||
999 !DT->dominates(A: BBIt->second,
1000 B: OtherPhi->getIncomingBlock(i: PoisonValBBIdx)))
1001 continue;
1002
1003 OtherPhi->setIncomingValue(i: PoisonValBBIdx, V);
1004 if (DT->dominates(Def: OtherV, User: Phi))
1005 Phi->setIncomingValue(i, V: OtherV);
1006 }
1007 }
1008}
1009
1010void StructurizeCFG::simplifyAffectedPhis() {
1011 bool Changed;
1012 do {
1013 Changed = false;
1014 SimplifyQuery Q(Func->getDataLayout());
1015 Q.DT = DT;
1016 // Setting CanUseUndef to true might extend value liveness, set it to false
1017 // to achieve better register pressure.
1018 Q.CanUseUndef = false;
1019 for (WeakVH VH : AffectedPhis) {
1020 if (auto Phi = dyn_cast_or_null<PHINode>(Val&: VH)) {
1021 if (auto NewValue = simplifyInstruction(I: Phi, Q)) {
1022 Phi->replaceAllUsesWith(V: NewValue);
1023 Phi->eraseFromParent();
1024 Changed = true;
1025 }
1026 }
1027 }
1028 } while (Changed);
1029}
1030
1031/// Remove phi values from all successors and then remove the terminator.
1032DebugLoc StructurizeCFG::killTerminator(BasicBlock *BB) {
1033 Instruction *Term = BB->getTerminator();
1034 if (!Term)
1035 return DebugLoc();
1036
1037 for (BasicBlock *Succ : successors(BB))
1038 delPhiValues(From: BB, To: Succ);
1039
1040 DebugLoc DL = Term->getDebugLoc();
1041 Term->eraseFromParent();
1042 return DL;
1043}
1044
1045/// Let node exit(s) point to NewExit
1046void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
1047 bool IncludeDominator) {
1048 if (Node->isSubRegion()) {
1049 Region *SubRegion = Node->getNodeAs<Region>();
1050 BasicBlock *OldExit = SubRegion->getExit();
1051 BasicBlock *Dominator = nullptr;
1052
1053 // Find all the edges from the sub region to the exit.
1054 // We use make_early_inc_range here because we modify BB's terminator.
1055 for (BasicBlock *BB : llvm::make_early_inc_range(Range: predecessors(BB: OldExit))) {
1056 if (!SubRegion->contains(BB))
1057 continue;
1058
1059 // Modify the edges to point to the new exit
1060 delPhiValues(From: BB, To: OldExit);
1061 BB->getTerminator()->replaceUsesOfWith(From: OldExit, To: NewExit);
1062 addPhiValues(From: BB, To: NewExit);
1063
1064 // Find the new dominator (if requested)
1065 if (IncludeDominator) {
1066 if (!Dominator)
1067 Dominator = BB;
1068 else
1069 Dominator = DT->findNearestCommonDominator(A: Dominator, B: BB);
1070 }
1071 }
1072
1073 // Change the dominator (if requested)
1074 if (Dominator)
1075 DT->changeImmediateDominator(BB: NewExit, NewBB: Dominator);
1076
1077 // Update the region info
1078 SubRegion->replaceExit(BB: NewExit);
1079 } else {
1080 BasicBlock *BB = Node->getNodeAs<BasicBlock>();
1081 DebugLoc DL = killTerminator(BB);
1082 BranchInst *Br = BranchInst::Create(IfTrue: NewExit, InsertBefore: BB);
1083 Br->setDebugLoc(DL);
1084 addPhiValues(From: BB, To: NewExit);
1085 if (IncludeDominator)
1086 DT->changeImmediateDominator(BB: NewExit, NewBB: BB);
1087 }
1088}
1089
1090/// Create a new flow node and update dominator tree and region info
1091BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
1092 LLVMContext &Context = Func->getContext();
1093 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
1094 Order.back()->getEntry();
1095 BasicBlock *Flow = BasicBlock::Create(Context, Name: FlowBlockName,
1096 Parent: Func, InsertBefore: Insert);
1097 FlowSet.insert(Ptr: Flow);
1098 DT->addNewBlock(BB: Flow, DomBB: Dominator);
1099 ParentRegion->getRegionInfo()->setRegionFor(BB: Flow, R: ParentRegion);
1100 return Flow;
1101}
1102
1103/// Create a new or reuse the previous node as flow node. Returns a block and a
1104/// debug location to be used for new instructions in that block.
1105std::pair<BasicBlock *, DebugLoc> StructurizeCFG::needPrefix(bool NeedEmpty) {
1106 BasicBlock *Entry = PrevNode->getEntry();
1107
1108 if (!PrevNode->isSubRegion()) {
1109 DebugLoc DL = killTerminator(BB: Entry);
1110 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
1111 return {Entry, DL};
1112 }
1113
1114 // create a new flow node
1115 BasicBlock *Flow = getNextFlow(Dominator: Entry);
1116
1117 // and wire it up
1118 changeExit(Node: PrevNode, NewExit: Flow, IncludeDominator: true);
1119 PrevNode = ParentRegion->getBBNode(BB: Flow);
1120 return {Flow, DebugLoc()};
1121}
1122
1123/// Returns the region exit if possible, otherwise just a new flow node
1124BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
1125 bool ExitUseAllowed) {
1126 if (!Order.empty() || !ExitUseAllowed)
1127 return getNextFlow(Dominator: Flow);
1128
1129 BasicBlock *Exit = ParentRegion->getExit();
1130 DT->changeImmediateDominator(BB: Exit, NewBB: Flow);
1131 addPhiValues(From: Flow, To: Exit);
1132 return Exit;
1133}
1134
1135/// Set the previous node
1136void StructurizeCFG::setPrevNode(BasicBlock *BB) {
1137 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
1138 : nullptr;
1139}
1140
1141/// Does BB dominate all the predicates of Node?
1142bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
1143 BBPredicates &Preds = Predicates[Node->getEntry()];
1144 return llvm::all_of(Range&: Preds, P: [&](std::pair<BasicBlock *, PredInfo> Pred) {
1145 return DT->dominates(A: BB, B: Pred.first);
1146 });
1147}
1148
1149/// Can we predict that this node will always be called?
1150bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
1151 BBPredicates &Preds = Predicates[Node->getEntry()];
1152 bool Dominated = false;
1153
1154 // Regionentry is always true
1155 if (!PrevNode)
1156 return true;
1157
1158 for (auto [BB, PI] : Preds) {
1159 if (PI.Pred != BoolTrue)
1160 return false;
1161
1162 if (!Dominated && DT->dominates(A: BB, B: PrevNode->getEntry()))
1163 Dominated = true;
1164 }
1165
1166 // TODO: The dominator check is too strict
1167 return Dominated;
1168}
1169
1170/// Take one node from the order vector and wire it up
1171void StructurizeCFG::wireFlow(bool ExitUseAllowed,
1172 BasicBlock *LoopEnd) {
1173 RegionNode *Node = Order.pop_back_val();
1174 Visited.insert(Ptr: Node->getEntry());
1175
1176 if (isPredictableTrue(Node)) {
1177 // Just a linear flow
1178 if (PrevNode) {
1179 changeExit(Node: PrevNode, NewExit: Node->getEntry(), IncludeDominator: true);
1180 }
1181 PrevNode = Node;
1182 } else {
1183 // Insert extra prefix node (or reuse last one)
1184 auto [Flow, DL] = needPrefix(NeedEmpty: false);
1185
1186 // Insert extra postfix node (or use exit instead)
1187 BasicBlock *Entry = Node->getEntry();
1188 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
1189
1190 // let it point to entry and next block
1191 BranchInst *Br = BranchInst::Create(IfTrue: Entry, IfFalse: Next, Cond: BoolPoison, InsertBefore: Flow);
1192 Br->setDebugLoc(DL);
1193 Conditions.push_back(Elt: Br);
1194 addPhiValues(From: Flow, To: Entry);
1195 DT->changeImmediateDominator(BB: Entry, NewBB: Flow);
1196
1197 PrevNode = Node;
1198 while (!Order.empty() && !Visited.count(Ptr: LoopEnd) &&
1199 dominatesPredicates(BB: Entry, Node: Order.back())) {
1200 handleLoops(ExitUseAllowed: false, LoopEnd);
1201 }
1202
1203 changeExit(Node: PrevNode, NewExit: Next, IncludeDominator: false);
1204 setPrevNode(Next);
1205 }
1206}
1207
1208void StructurizeCFG::handleLoops(bool ExitUseAllowed,
1209 BasicBlock *LoopEnd) {
1210 RegionNode *Node = Order.back();
1211 BasicBlock *LoopStart = Node->getEntry();
1212
1213 if (!Loops.count(Val: LoopStart)) {
1214 wireFlow(ExitUseAllowed, LoopEnd);
1215 return;
1216 }
1217
1218 if (!isPredictableTrue(Node))
1219 LoopStart = needPrefix(NeedEmpty: true).first;
1220
1221 LoopEnd = Loops[Node->getEntry()];
1222 wireFlow(ExitUseAllowed: false, LoopEnd);
1223 while (!Visited.count(Ptr: LoopEnd)) {
1224 handleLoops(ExitUseAllowed: false, LoopEnd);
1225 }
1226
1227 assert(LoopStart != &LoopStart->getParent()->getEntryBlock());
1228
1229 // Create an extra loop end node
1230 DebugLoc DL;
1231 std::tie(args&: LoopEnd, args&: DL) = needPrefix(NeedEmpty: false);
1232 BasicBlock *Next = needPostfix(Flow: LoopEnd, ExitUseAllowed);
1233 BranchInst *Br = BranchInst::Create(IfTrue: Next, IfFalse: LoopStart, Cond: BoolPoison, InsertBefore: LoopEnd);
1234 Br->setDebugLoc(DL);
1235 LoopConds.push_back(Elt: Br);
1236 addPhiValues(From: LoopEnd, To: LoopStart);
1237 setPrevNode(Next);
1238}
1239
1240/// After this function control flow looks like it should be, but
1241/// branches and PHI nodes only have undefined conditions.
1242void StructurizeCFG::createFlow() {
1243 BasicBlock *Exit = ParentRegion->getExit();
1244 bool EntryDominatesExit = DT->dominates(A: ParentRegion->getEntry(), B: Exit);
1245
1246 AffectedPhis.clear();
1247 DeletedPhis.clear();
1248 AddedPhis.clear();
1249 Conditions.clear();
1250 LoopConds.clear();
1251
1252 PrevNode = nullptr;
1253 Visited.clear();
1254
1255 while (!Order.empty()) {
1256 handleLoops(ExitUseAllowed: EntryDominatesExit, LoopEnd: nullptr);
1257 }
1258
1259 if (PrevNode)
1260 changeExit(Node: PrevNode, NewExit: Exit, IncludeDominator: EntryDominatesExit);
1261 else
1262 assert(EntryDominatesExit);
1263}
1264
1265/// Handle a rare case where the disintegrated nodes instructions
1266/// no longer dominate all their uses. Not sure if this is really necessary
1267void StructurizeCFG::rebuildSSA() {
1268 SSAUpdater Updater;
1269 for (BasicBlock *BB : ParentRegion->blocks())
1270 for (Instruction &I : *BB) {
1271 bool Initialized = false;
1272 // We may modify the use list as we iterate over it, so we use
1273 // make_early_inc_range.
1274 for (Use &U : llvm::make_early_inc_range(Range: I.uses())) {
1275 Instruction *User = cast<Instruction>(Val: U.getUser());
1276 if (User->getParent() == BB) {
1277 continue;
1278 } else if (PHINode *UserPN = dyn_cast<PHINode>(Val: User)) {
1279 if (UserPN->getIncomingBlock(U) == BB)
1280 continue;
1281 }
1282
1283 if (DT->dominates(Def: &I, User))
1284 continue;
1285
1286 if (!Initialized) {
1287 Value *Poison = PoisonValue::get(T: I.getType());
1288 Updater.Initialize(Ty: I.getType(), Name: "");
1289 Updater.AddAvailableValue(BB: &Func->getEntryBlock(), V: Poison);
1290 Updater.AddAvailableValue(BB, V: &I);
1291 Initialized = true;
1292 }
1293 Updater.RewriteUseAfterInsertions(U);
1294 }
1295 }
1296}
1297
1298static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
1299 const UniformityInfo &UA) {
1300 // Bool for if all sub-regions are uniform.
1301 bool SubRegionsAreUniform = true;
1302 // Count of how many direct children are conditional.
1303 unsigned ConditionalDirectChildren = 0;
1304
1305 for (auto *E : R->elements()) {
1306 if (!E->isSubRegion()) {
1307 auto Br = dyn_cast<BranchInst>(Val: E->getEntry()->getTerminator());
1308 if (!Br || !Br->isConditional())
1309 continue;
1310
1311 if (!UA.isUniform(I: Br))
1312 return false;
1313
1314 // One of our direct children is conditional.
1315 ConditionalDirectChildren++;
1316
1317 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
1318 << " has uniform terminator\n");
1319 } else {
1320 // Explicitly refuse to treat regions as uniform if they have non-uniform
1321 // subregions. We cannot rely on UniformityAnalysis for branches in
1322 // subregions because those branches may have been removed and re-created,
1323 // so we look for our metadata instead.
1324 //
1325 // Warning: It would be nice to treat regions as uniform based only on
1326 // their direct child basic blocks' terminators, regardless of whether
1327 // subregions are uniform or not. However, this requires a very careful
1328 // look at SIAnnotateControlFlow to make sure nothing breaks there.
1329 for (auto *BB : E->getNodeAs<Region>()->blocks()) {
1330 auto Br = dyn_cast<BranchInst>(Val: BB->getTerminator());
1331 if (!Br || !Br->isConditional())
1332 continue;
1333
1334 if (!Br->getMetadata(KindID: UniformMDKindID)) {
1335 // Early exit if we cannot have relaxed uniform regions.
1336 if (!RelaxedUniformRegions)
1337 return false;
1338
1339 SubRegionsAreUniform = false;
1340 break;
1341 }
1342 }
1343 }
1344 }
1345
1346 // Our region is uniform if:
1347 // 1. All conditional branches that are direct children are uniform (checked
1348 // above).
1349 // 2. And either:
1350 // a. All sub-regions are uniform.
1351 // b. There is one or less conditional branches among the direct children.
1352 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
1353}
1354
1355void StructurizeCFG::init(Region *R) {
1356 LLVMContext &Context = R->getEntry()->getContext();
1357
1358 Boolean = Type::getInt1Ty(C&: Context);
1359 BoolTrue = ConstantInt::getTrue(Context);
1360 BoolFalse = ConstantInt::getFalse(Context);
1361 BoolPoison = PoisonValue::get(T: Boolean);
1362
1363 this->UA = nullptr;
1364}
1365
1366bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
1367 if (R->isTopLevelRegion())
1368 return false;
1369
1370 this->UA = &UA;
1371
1372 // TODO: We could probably be smarter here with how we handle sub-regions.
1373 // We currently rely on the fact that metadata is set by earlier invocations
1374 // of the pass on sub-regions, and that this metadata doesn't get lost --
1375 // but we shouldn't rely on metadata for correctness!
1376 unsigned UniformMDKindID =
1377 R->getEntry()->getContext().getMDKindID(Name: "structurizecfg.uniform");
1378
1379 if (hasOnlyUniformBranches(R, UniformMDKindID, UA)) {
1380 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
1381 << '\n');
1382
1383 // Mark all direct child block terminators as having been treated as
1384 // uniform. To account for a possible future in which non-uniform
1385 // sub-regions are treated more cleverly, indirect children are not
1386 // marked as uniform.
1387 MDNode *MD = MDNode::get(Context&: R->getEntry()->getParent()->getContext(), MDs: {});
1388 for (RegionNode *E : R->elements()) {
1389 if (E->isSubRegion())
1390 continue;
1391
1392 if (Instruction *Term = E->getEntry()->getTerminator())
1393 Term->setMetadata(KindID: UniformMDKindID, Node: MD);
1394 }
1395
1396 return true;
1397 }
1398 return false;
1399}
1400
1401/// Run the transformation for each region found
1402bool StructurizeCFG::run(Region *R, DominatorTree *DT,
1403 const TargetTransformInfo *TTI) {
1404 // CallBr and its corresponding direct target blocks are for now ignored by
1405 // this pass. This is not a limitation for the currently intended uses cases
1406 // of callbr in the AMDGPU backend.
1407 // Parent and child regions are not affected by this (current) restriction.
1408 // See `llvm/test/Transforms/StructurizeCFG/callbr.ll` for details.
1409 if (R->isTopLevelRegion() || isa<CallBrInst>(Val: R->getEntry()->getTerminator()))
1410 return false;
1411
1412 this->DT = DT;
1413 this->TTI = TTI;
1414 Func = R->getEntry()->getParent();
1415
1416 ParentRegion = R;
1417
1418 orderNodes();
1419 collectInfos();
1420 createFlow();
1421
1422 SSAUpdaterBulk PhiInserter;
1423 insertConditions(Loops: false, PhiInserter);
1424 insertConditions(Loops: true, PhiInserter);
1425 PhiInserter.RewriteAndOptimizeAllUses(DT&: *DT);
1426
1427 setPhiValues();
1428 simplifyHoistedPhis();
1429 simplifyConditions();
1430 simplifyAffectedPhis();
1431 rebuildSSA();
1432
1433 // Cleanup
1434 Order.clear();
1435 Visited.clear();
1436 DeletedPhis.clear();
1437 AddedPhis.clear();
1438 Predicates.clear();
1439 Conditions.clear();
1440 Loops.clear();
1441 LoopPreds.clear();
1442 LoopConds.clear();
1443 FlowSet.clear();
1444
1445 return true;
1446}
1447
1448Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) {
1449 return new StructurizeCFGLegacyPass(SkipUniformRegions);
1450}
1451
1452static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) {
1453 Regions.push_back(x: &R);
1454 for (const auto &E : R)
1455 addRegionIntoQueue(R&: *E, Regions);
1456}
1457
1458StructurizeCFGPass::StructurizeCFGPass(bool SkipUniformRegions_)
1459 : SkipUniformRegions(SkipUniformRegions_) {
1460 if (ForceSkipUniformRegions.getNumOccurrences())
1461 SkipUniformRegions = ForceSkipUniformRegions.getValue();
1462}
1463
1464void StructurizeCFGPass::printPipeline(
1465 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1466 static_cast<PassInfoMixin<StructurizeCFGPass> *>(this)->printPipeline(
1467 OS, MapClassName2PassName);
1468 if (SkipUniformRegions)
1469 OS << "<skip-uniform-regions>";
1470}
1471
1472PreservedAnalyses StructurizeCFGPass::run(Function &F,
1473 FunctionAnalysisManager &AM) {
1474
1475 bool Changed = false;
1476 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(IR&: F);
1477 auto &RI = AM.getResult<RegionInfoAnalysis>(IR&: F);
1478 TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(IR&: F);
1479 UniformityInfo *UI = nullptr;
1480 if (SkipUniformRegions)
1481 UI = &AM.getResult<UniformityInfoAnalysis>(IR&: F);
1482
1483 std::vector<Region *> Regions;
1484 addRegionIntoQueue(R&: *RI.getTopLevelRegion(), Regions);
1485 while (!Regions.empty()) {
1486 Region *R = Regions.back();
1487 Regions.pop_back();
1488
1489 StructurizeCFG SCFG;
1490 SCFG.init(R);
1491
1492 if (SkipUniformRegions && SCFG.makeUniformRegion(R, UA&: *UI)) {
1493 Changed = true; // May have added metadata.
1494 continue;
1495 }
1496
1497 Changed |= SCFG.run(R, DT, TTI);
1498 }
1499 if (!Changed)
1500 return PreservedAnalyses::all();
1501 PreservedAnalyses PA;
1502 PA.preserve<DominatorTreeAnalysis>();
1503 return PA;
1504}
1505