1//===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Transform each threading path to effectively jump thread the DFA. For
10// example, the CFG below could be transformed as follows, where the cloned
11// blocks unconditionally branch to the next correct case based on what is
12// identified in the analysis.
13//
14// sw.bb sw.bb
15// / | \ / | \
16// case1 case2 case3 case1 case2 case3
17// \ | / | | |
18// determinator det.2 det.3 det.1
19// br sw.bb / | \
20// sw.bb.2 sw.bb.3 sw.bb.1
21// br case2 br case3 br case1ยง
22//
23// Definitions and Terminology:
24//
25// * Threading path:
26// a list of basic blocks, the exit state, and the block that determines
27// the next state, for which the following notation will be used:
28// < path of BBs that form a cycle > [ state, determinator ]
29//
30// * Predictable switch:
31// The switch variable is always a known constant so that all conditional
32// jumps based on switch variable can be converted to unconditional jump.
33//
34// * Determinator:
35// The basic block that determines the next state of the DFA.
36//
37// Representing the optimization in C-like pseudocode: the code pattern on the
38// left could functionally be transformed to the right pattern if the switch
39// condition is predictable.
40//
41// X = A goto A
42// for (...) A:
43// switch (X) ...
44// case A goto B
45// X = B B:
46// case B ...
47// X = C goto C
48//
49// The pass first checks that switch variable X is decided by the control flow
50// path taken in the loop; for example, in case B, the next value of X is
51// decided to be C. It then enumerates through all paths in the loop and labels
52// the basic blocks where the next state is decided.
53//
54// Using this information it creates new paths that unconditionally branch to
55// the next case. This involves cloning code, so it only gets triggered if the
56// amount of code duplicated is below a threshold.
57//
58//===----------------------------------------------------------------------===//
59
60#include "llvm/Transforms/Scalar/DFAJumpThreading.h"
61#include "llvm/ADT/APInt.h"
62#include "llvm/ADT/DenseMap.h"
63#include "llvm/ADT/SmallSet.h"
64#include "llvm/ADT/Statistic.h"
65#include "llvm/Analysis/AssumptionCache.h"
66#include "llvm/Analysis/CodeMetrics.h"
67#include "llvm/Analysis/DomTreeUpdater.h"
68#include "llvm/Analysis/LoopInfo.h"
69#include "llvm/Analysis/OptimizationRemarkEmitter.h"
70#include "llvm/Analysis/TargetTransformInfo.h"
71#include "llvm/IR/CFG.h"
72#include "llvm/IR/Constants.h"
73#include "llvm/IR/IntrinsicInst.h"
74#include "llvm/Support/CommandLine.h"
75#include "llvm/Support/Debug.h"
76#include "llvm/Transforms/Utils/Cloning.h"
77#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
78#include "llvm/Transforms/Utils/ValueMapper.h"
79#include <deque>
80
81#ifdef EXPENSIVE_CHECKS
82#include "llvm/IR/Verifier.h"
83#endif
84
85using namespace llvm;
86
87#define DEBUG_TYPE "dfa-jump-threading"
88
89STATISTIC(NumTransforms, "Number of transformations done");
90STATISTIC(NumCloned, "Number of blocks cloned");
91STATISTIC(NumPaths, "Number of individual paths threaded");
92
93static cl::opt<bool>
94 ClViewCfgBefore("dfa-jump-view-cfg-before",
95 cl::desc("View the CFG before DFA Jump Threading"),
96 cl::Hidden, cl::init(Val: false));
97
98static cl::opt<bool> EarlyExitHeuristic(
99 "dfa-early-exit-heuristic",
100 cl::desc("Exit early if an unpredictable value come from the same loop"),
101 cl::Hidden, cl::init(Val: true));
102
103static cl::opt<unsigned> MaxPathLength(
104 "dfa-max-path-length",
105 cl::desc("Max number of blocks searched to find a threading path"),
106 cl::Hidden, cl::init(Val: 20));
107
108static cl::opt<unsigned> MaxNumVisitiedPaths(
109 "dfa-max-num-visited-paths",
110 cl::desc(
111 "Max number of blocks visited while enumerating paths around a switch"),
112 cl::Hidden, cl::init(Val: 2500));
113
114static cl::opt<unsigned>
115 MaxNumPaths("dfa-max-num-paths",
116 cl::desc("Max number of paths enumerated around a switch"),
117 cl::Hidden, cl::init(Val: 200));
118
119static cl::opt<unsigned>
120 CostThreshold("dfa-cost-threshold",
121 cl::desc("Maximum cost accepted for the transformation"),
122 cl::Hidden, cl::init(Val: 50));
123
124namespace {
125
126class SelectInstToUnfold {
127 SelectInst *SI;
128 PHINode *SIUse;
129
130public:
131 SelectInstToUnfold(SelectInst *SI, PHINode *SIUse) : SI(SI), SIUse(SIUse) {}
132
133 SelectInst *getInst() { return SI; }
134 PHINode *getUse() { return SIUse; }
135
136 explicit operator bool() const { return SI && SIUse; }
137};
138
139void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
140 std::vector<SelectInstToUnfold> *NewSIsToUnfold,
141 std::vector<BasicBlock *> *NewBBs);
142
143class DFAJumpThreading {
144public:
145 DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,
146 TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE)
147 : AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {}
148
149 bool run(Function &F);
150 bool LoopInfoBroken;
151
152private:
153 void
154 unfoldSelectInstrs(DominatorTree *DT,
155 const SmallVector<SelectInstToUnfold, 4> &SelectInsts) {
156 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
157 SmallVector<SelectInstToUnfold, 4> Stack(SelectInsts);
158
159 while (!Stack.empty()) {
160 SelectInstToUnfold SIToUnfold = Stack.pop_back_val();
161
162 std::vector<SelectInstToUnfold> NewSIsToUnfold;
163 std::vector<BasicBlock *> NewBBs;
164 unfold(DTU: &DTU, LI, SIToUnfold, NewSIsToUnfold: &NewSIsToUnfold, NewBBs: &NewBBs);
165
166 // Put newly discovered select instructions into the work list.
167 llvm::append_range(C&: Stack, R&: NewSIsToUnfold);
168 }
169 }
170
171 AssumptionCache *AC;
172 DominatorTree *DT;
173 LoopInfo *LI;
174 TargetTransformInfo *TTI;
175 OptimizationRemarkEmitter *ORE;
176};
177
178} // end anonymous namespace
179
180namespace {
181
182/// Unfold the select instruction held in \p SIToUnfold by replacing it with
183/// control flow.
184///
185/// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly
186/// created basic blocks into \p NewBBs.
187///
188/// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible.
189void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
190 std::vector<SelectInstToUnfold> *NewSIsToUnfold,
191 std::vector<BasicBlock *> *NewBBs) {
192 SelectInst *SI = SIToUnfold.getInst();
193 PHINode *SIUse = SIToUnfold.getUse();
194 BasicBlock *StartBlock = SI->getParent();
195 BranchInst *StartBlockTerm =
196 dyn_cast<BranchInst>(Val: StartBlock->getTerminator());
197
198 assert(StartBlockTerm);
199 assert(SI->hasOneUse());
200
201 if (StartBlockTerm->isUnconditional()) {
202 BasicBlock *EndBlock = StartBlock->getUniqueSuccessor();
203 // Arbitrarily choose the 'false' side for a new input value to the PHI.
204 BasicBlock *NewBlock = BasicBlock::Create(
205 Context&: SI->getContext(), Name: Twine(SI->getName(), ".si.unfold.false"),
206 Parent: EndBlock->getParent(), InsertBefore: EndBlock);
207 NewBBs->push_back(x: NewBlock);
208 BranchInst::Create(IfTrue: EndBlock, InsertBefore: NewBlock);
209 DTU->applyUpdates(Updates: {{DominatorTree::Insert, NewBlock, EndBlock}});
210
211 // StartBlock
212 // | \
213 // | NewBlock
214 // | /
215 // EndBlock
216 Value *SIOp1 = SI->getTrueValue();
217 Value *SIOp2 = SI->getFalseValue();
218
219 PHINode *NewPhi = PHINode::Create(Ty: SIUse->getType(), NumReservedValues: 1,
220 NameStr: Twine(SIOp2->getName(), ".si.unfold.phi"),
221 InsertBefore: NewBlock->getFirstInsertionPt());
222 NewPhi->addIncoming(V: SIOp2, BB: StartBlock);
223
224 // Update any other PHI nodes in EndBlock.
225 for (PHINode &Phi : EndBlock->phis()) {
226 if (SIUse == &Phi)
227 continue;
228 Phi.addIncoming(V: Phi.getIncomingValueForBlock(BB: StartBlock), BB: NewBlock);
229 }
230
231 // Update the phi node of SI, which is its only use.
232 if (EndBlock == SIUse->getParent()) {
233 SIUse->addIncoming(V: NewPhi, BB: NewBlock);
234 SIUse->replaceUsesOfWith(From: SI, To: SIOp1);
235 } else {
236 PHINode *EndPhi = PHINode::Create(Ty: SIUse->getType(), NumReservedValues: pred_size(BB: EndBlock),
237 NameStr: Twine(SI->getName(), ".si.unfold.phi"),
238 InsertBefore: EndBlock->getFirstInsertionPt());
239 for (BasicBlock *Pred : predecessors(BB: EndBlock)) {
240 if (Pred != StartBlock && Pred != NewBlock)
241 EndPhi->addIncoming(V: EndPhi, BB: Pred);
242 }
243
244 EndPhi->addIncoming(V: SIOp1, BB: StartBlock);
245 EndPhi->addIncoming(V: NewPhi, BB: NewBlock);
246 SIUse->replaceUsesOfWith(From: SI, To: EndPhi);
247 SIUse = EndPhi;
248 }
249
250 if (auto *OpSi = dyn_cast<SelectInst>(Val: SIOp1))
251 NewSIsToUnfold->push_back(x: SelectInstToUnfold(OpSi, SIUse));
252 if (auto *OpSi = dyn_cast<SelectInst>(Val: SIOp2))
253 NewSIsToUnfold->push_back(x: SelectInstToUnfold(OpSi, NewPhi));
254
255 // Insert the real conditional branch based on the original condition.
256 StartBlockTerm->eraseFromParent();
257 BranchInst::Create(IfTrue: EndBlock, IfFalse: NewBlock, Cond: SI->getCondition(), InsertBefore: StartBlock);
258 DTU->applyUpdates(Updates: {{DominatorTree::Insert, StartBlock, EndBlock},
259 {DominatorTree::Insert, StartBlock, NewBlock}});
260 } else {
261 BasicBlock *EndBlock = SIUse->getParent();
262 BasicBlock *NewBlockT = BasicBlock::Create(
263 Context&: SI->getContext(), Name: Twine(SI->getName(), ".si.unfold.true"),
264 Parent: EndBlock->getParent(), InsertBefore: EndBlock);
265 BasicBlock *NewBlockF = BasicBlock::Create(
266 Context&: SI->getContext(), Name: Twine(SI->getName(), ".si.unfold.false"),
267 Parent: EndBlock->getParent(), InsertBefore: EndBlock);
268
269 NewBBs->push_back(x: NewBlockT);
270 NewBBs->push_back(x: NewBlockF);
271
272 // Def only has one use in EndBlock.
273 // Before transformation:
274 // StartBlock(Def)
275 // | \
276 // EndBlock OtherBlock
277 // (Use)
278 //
279 // After transformation:
280 // StartBlock(Def)
281 // | \
282 // | OtherBlock
283 // NewBlockT
284 // | \
285 // | NewBlockF
286 // | /
287 // | /
288 // EndBlock
289 // (Use)
290 BranchInst::Create(IfTrue: EndBlock, InsertBefore: NewBlockF);
291 // Insert the real conditional branch based on the original condition.
292 BranchInst::Create(IfTrue: EndBlock, IfFalse: NewBlockF, Cond: SI->getCondition(), InsertBefore: NewBlockT);
293 DTU->applyUpdates(Updates: {{DominatorTree::Insert, NewBlockT, NewBlockF},
294 {DominatorTree::Insert, NewBlockT, EndBlock},
295 {DominatorTree::Insert, NewBlockF, EndBlock}});
296
297 Value *TrueVal = SI->getTrueValue();
298 Value *FalseVal = SI->getFalseValue();
299
300 PHINode *NewPhiT = PHINode::Create(
301 Ty: SIUse->getType(), NumReservedValues: 1, NameStr: Twine(TrueVal->getName(), ".si.unfold.phi"),
302 InsertBefore: NewBlockT->getFirstInsertionPt());
303 PHINode *NewPhiF = PHINode::Create(
304 Ty: SIUse->getType(), NumReservedValues: 1, NameStr: Twine(FalseVal->getName(), ".si.unfold.phi"),
305 InsertBefore: NewBlockF->getFirstInsertionPt());
306 NewPhiT->addIncoming(V: TrueVal, BB: StartBlock);
307 NewPhiF->addIncoming(V: FalseVal, BB: NewBlockT);
308
309 if (auto *TrueSI = dyn_cast<SelectInst>(Val: TrueVal))
310 NewSIsToUnfold->push_back(x: SelectInstToUnfold(TrueSI, NewPhiT));
311 if (auto *FalseSi = dyn_cast<SelectInst>(Val: FalseVal))
312 NewSIsToUnfold->push_back(x: SelectInstToUnfold(FalseSi, NewPhiF));
313
314 SIUse->addIncoming(V: NewPhiT, BB: NewBlockT);
315 SIUse->addIncoming(V: NewPhiF, BB: NewBlockF);
316 SIUse->removeIncomingValue(BB: StartBlock);
317
318 // Update any other PHI nodes in EndBlock.
319 for (PHINode &Phi : EndBlock->phis()) {
320 if (SIUse == &Phi)
321 continue;
322 Phi.addIncoming(V: Phi.getIncomingValueForBlock(BB: StartBlock), BB: NewBlockT);
323 Phi.addIncoming(V: Phi.getIncomingValueForBlock(BB: StartBlock), BB: NewBlockF);
324 Phi.removeIncomingValue(BB: StartBlock);
325 }
326
327 // Update the appropriate successor of the start block to point to the new
328 // unfolded block.
329 unsigned SuccNum = StartBlockTerm->getSuccessor(i: 1) == EndBlock ? 1 : 0;
330 StartBlockTerm->setSuccessor(idx: SuccNum, NewSucc: NewBlockT);
331 DTU->applyUpdates(Updates: {{DominatorTree::Delete, StartBlock, EndBlock},
332 {DominatorTree::Insert, StartBlock, NewBlockT}});
333 }
334
335 // Preserve loop info
336 if (Loop *L = LI->getLoopFor(BB: SI->getParent())) {
337 for (BasicBlock *NewBB : *NewBBs)
338 L->addBasicBlockToLoop(NewBB, LI&: *LI);
339 }
340
341 // The select is now dead.
342 assert(SI->use_empty() && "Select must be dead now");
343 SI->eraseFromParent();
344}
345
346struct ClonedBlock {
347 BasicBlock *BB;
348 APInt State; ///< \p State corresponds to the next value of a switch stmnt.
349};
350
351typedef std::deque<BasicBlock *> PathType;
352typedef std::vector<PathType> PathsType;
353typedef SmallPtrSet<const BasicBlock *, 8> VisitedBlocks;
354typedef std::vector<ClonedBlock> CloneList;
355
356// This data structure keeps track of all blocks that have been cloned. If two
357// different ThreadingPaths clone the same block for a certain state it should
358// be reused, and it can be looked up in this map.
359typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap;
360
361// This map keeps track of all the new definitions for an instruction. This
362// information is needed when restoring SSA form after cloning blocks.
363typedef MapVector<Instruction *, std::vector<Instruction *>> DefMap;
364
365inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) {
366 OS << "< ";
367 for (const BasicBlock *BB : Path) {
368 std::string BBName;
369 if (BB->hasName())
370 raw_string_ostream(BBName) << BB->getName();
371 else
372 raw_string_ostream(BBName) << BB;
373 OS << BBName << " ";
374 }
375 OS << ">";
376 return OS;
377}
378
379/// ThreadingPath is a path in the control flow of a loop that can be threaded
380/// by cloning necessary basic blocks and replacing conditional branches with
381/// unconditional ones. A threading path includes a list of basic blocks, the
382/// exit state, and the block that determines the next state.
383struct ThreadingPath {
384 /// Exit value is DFA's exit state for the given path.
385 APInt getExitValue() const { return ExitVal; }
386 void setExitValue(const ConstantInt *V) {
387 ExitVal = V->getValue();
388 IsExitValSet = true;
389 }
390 bool isExitValueSet() const { return IsExitValSet; }
391
392 /// Determinator is the basic block that determines the next state of the DFA.
393 const BasicBlock *getDeterminatorBB() const { return DBB; }
394 void setDeterminator(const BasicBlock *BB) { DBB = BB; }
395
396 /// Path is a list of basic blocks.
397 const PathType &getPath() const { return Path; }
398 void setPath(const PathType &NewPath) { Path = NewPath; }
399 void push_back(BasicBlock *BB) { Path.push_back(x: BB); }
400 void push_front(BasicBlock *BB) { Path.push_front(x: BB); }
401 void appendExcludingFirst(const PathType &OtherPath) {
402 llvm::append_range(C&: Path, R: llvm::drop_begin(RangeOrContainer: OtherPath));
403 }
404
405 void print(raw_ostream &OS) const {
406 OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]";
407 }
408
409private:
410 PathType Path;
411 APInt ExitVal;
412 const BasicBlock *DBB = nullptr;
413 bool IsExitValSet = false;
414};
415
416#ifndef NDEBUG
417inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) {
418 TPath.print(OS);
419 return OS;
420}
421#endif
422
423struct MainSwitch {
424 MainSwitch(SwitchInst *SI, LoopInfo *LI, OptimizationRemarkEmitter *ORE)
425 : LI(LI) {
426 if (isCandidate(SI)) {
427 Instr = SI;
428 } else {
429 ORE->emit(RemarkBuilder: [&]() {
430 return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", SI)
431 << "Switch instruction is not predictable.";
432 });
433 }
434 }
435
436 virtual ~MainSwitch() = default;
437
438 SwitchInst *getInstr() const { return Instr; }
439 const SmallVector<SelectInstToUnfold, 4> getSelectInsts() {
440 return SelectInsts;
441 }
442
443private:
444 /// Do a use-def chain traversal starting from the switch condition to see if
445 /// \p SI is a potential condidate.
446 ///
447 /// Also, collect select instructions to unfold.
448 bool isCandidate(const SwitchInst *SI) {
449 std::deque<std::pair<Value *, BasicBlock *>> Q;
450 SmallSet<Value *, 16> SeenValues;
451 SelectInsts.clear();
452
453 Value *SICond = SI->getCondition();
454 LLVM_DEBUG(dbgs() << "\tSICond: " << *SICond << "\n");
455 if (!isa<PHINode>(Val: SICond))
456 return false;
457
458 // The switch must be in a loop.
459 const Loop *L = LI->getLoopFor(BB: SI->getParent());
460 if (!L)
461 return false;
462
463 addToQueue(Val: SICond, BB: nullptr, Q, SeenValues);
464
465 while (!Q.empty()) {
466 Value *Current = Q.front().first;
467 BasicBlock *CurrentIncomingBB = Q.front().second;
468 Q.pop_front();
469
470 if (auto *Phi = dyn_cast<PHINode>(Val: Current)) {
471 for (BasicBlock *IncomingBB : Phi->blocks()) {
472 Value *Incoming = Phi->getIncomingValueForBlock(BB: IncomingBB);
473 addToQueue(Val: Incoming, BB: IncomingBB, Q, SeenValues);
474 }
475 LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n");
476 } else if (SelectInst *SelI = dyn_cast<SelectInst>(Val: Current)) {
477 if (!isValidSelectInst(SI: SelI))
478 return false;
479 addToQueue(Val: SelI->getTrueValue(), BB: CurrentIncomingBB, Q, SeenValues);
480 addToQueue(Val: SelI->getFalseValue(), BB: CurrentIncomingBB, Q, SeenValues);
481 LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n");
482 if (auto *SelIUse = dyn_cast<PHINode>(Val: SelI->user_back()))
483 SelectInsts.push_back(Elt: SelectInstToUnfold(SelI, SelIUse));
484 } else if (isa<Constant>(Val: Current)) {
485 LLVM_DEBUG(dbgs() << "\tconst: " << *Current << "\n");
486 continue;
487 } else {
488 LLVM_DEBUG(dbgs() << "\tother: " << *Current << "\n");
489 // Allow unpredictable values. The hope is that those will be the
490 // initial switch values that can be ignored (they will hit the
491 // unthreaded switch) but this assumption will get checked later after
492 // paths have been enumerated (in function getStateDefMap).
493
494 // If the unpredictable value comes from the same inner loop it is
495 // likely that it will also be on the enumerated paths, causing us to
496 // exit after we have enumerated all the paths. This heuristic save
497 // compile time because a search for all the paths can become expensive.
498 if (EarlyExitHeuristic &&
499 L->contains(L: LI->getLoopFor(BB: CurrentIncomingBB))) {
500 LLVM_DEBUG(dbgs()
501 << "\tExiting early due to unpredictability heuristic.\n");
502 return false;
503 }
504
505 continue;
506 }
507 }
508
509 return true;
510 }
511
512 void addToQueue(Value *Val, BasicBlock *BB,
513 std::deque<std::pair<Value *, BasicBlock *>> &Q,
514 SmallSet<Value *, 16> &SeenValues) {
515 if (SeenValues.insert(Ptr: Val).second)
516 Q.push_back(x: {Val, BB});
517 }
518
519 bool isValidSelectInst(SelectInst *SI) {
520 if (!SI->hasOneUse())
521 return false;
522
523 Instruction *SIUse = dyn_cast<Instruction>(Val: SI->user_back());
524 // The use of the select inst should be either a phi or another select.
525 if (!SIUse && !(isa<PHINode>(Val: SIUse) || isa<SelectInst>(Val: SIUse)))
526 return false;
527
528 BasicBlock *SIBB = SI->getParent();
529
530 // Currently, we can only expand select instructions in basic blocks with
531 // one successor.
532 BranchInst *SITerm = dyn_cast<BranchInst>(Val: SIBB->getTerminator());
533 if (!SITerm || !SITerm->isUnconditional())
534 return false;
535
536 // Only fold the select coming from directly where it is defined.
537 PHINode *PHIUser = dyn_cast<PHINode>(Val: SIUse);
538 if (PHIUser && PHIUser->getIncomingBlock(U: *SI->use_begin()) != SIBB)
539 return false;
540
541 // If select will not be sunk during unfolding, and it is in the same basic
542 // block as another state defining select, then cannot unfold both.
543 for (SelectInstToUnfold SIToUnfold : SelectInsts) {
544 SelectInst *PrevSI = SIToUnfold.getInst();
545 if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI &&
546 PrevSI->getParent() == SI->getParent())
547 return false;
548 }
549
550 return true;
551 }
552
553 LoopInfo *LI;
554 SwitchInst *Instr = nullptr;
555 SmallVector<SelectInstToUnfold, 4> SelectInsts;
556};
557
558struct AllSwitchPaths {
559 AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE,
560 LoopInfo *LI, Loop *L)
561 : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), ORE(ORE),
562 LI(LI), SwitchOuterLoop(L) {}
563
564 std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; }
565 unsigned getNumThreadingPaths() { return TPaths.size(); }
566 SwitchInst *getSwitchInst() { return Switch; }
567 BasicBlock *getSwitchBlock() { return SwitchBlock; }
568
569 void run() {
570 StateDefMap StateDef = getStateDefMap();
571 if (StateDef.empty()) {
572 ORE->emit(RemarkBuilder: [&]() {
573 return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable",
574 Switch)
575 << "Switch instruction is not predictable.";
576 });
577 return;
578 }
579
580 auto *SwitchPhi = cast<PHINode>(Val: Switch->getOperand(i_nocapture: 0));
581 auto *SwitchPhiDefBB = SwitchPhi->getParent();
582 VisitedBlocks VB;
583 // Get paths from the determinator BBs to SwitchPhiDefBB
584 std::vector<ThreadingPath> PathsToPhiDef =
585 getPathsFromStateDefMap(StateDef, Phi: SwitchPhi, VB);
586 if (SwitchPhiDefBB == SwitchBlock) {
587 TPaths = std::move(PathsToPhiDef);
588 return;
589 }
590
591 // Find and append paths from SwitchPhiDefBB to SwitchBlock.
592 PathsType PathsToSwitchBB =
593 paths(BB: SwitchPhiDefBB, ToBB: SwitchBlock, Visited&: VB, /* PathDepth = */ 1);
594 if (PathsToSwitchBB.empty())
595 return;
596
597 std::vector<ThreadingPath> TempList;
598 for (const ThreadingPath &Path : PathsToPhiDef) {
599 for (const PathType &PathToSw : PathsToSwitchBB) {
600 ThreadingPath PathCopy(Path);
601 PathCopy.appendExcludingFirst(OtherPath: PathToSw);
602 TempList.push_back(x: PathCopy);
603 }
604 }
605 TPaths = std::move(TempList);
606 }
607
608private:
609 // Value: an instruction that defines a switch state;
610 // Key: the parent basic block of that instruction.
611 typedef DenseMap<const BasicBlock *, const PHINode *> StateDefMap;
612 std::vector<ThreadingPath> getPathsFromStateDefMap(StateDefMap &StateDef,
613 PHINode *Phi,
614 VisitedBlocks &VB) {
615 std::vector<ThreadingPath> Res;
616 auto *PhiBB = Phi->getParent();
617 VB.insert(Ptr: PhiBB);
618
619 VisitedBlocks UniqueBlocks;
620 for (auto *IncomingBB : Phi->blocks()) {
621 if (!UniqueBlocks.insert(Ptr: IncomingBB).second)
622 continue;
623 if (!SwitchOuterLoop->contains(BB: IncomingBB))
624 continue;
625
626 Value *IncomingValue = Phi->getIncomingValueForBlock(BB: IncomingBB);
627 // We found the determinator. This is the start of our path.
628 if (auto *C = dyn_cast<ConstantInt>(Val: IncomingValue)) {
629 // SwitchBlock is the determinator, unsupported unless its also the def.
630 if (PhiBB == SwitchBlock &&
631 SwitchBlock != cast<PHINode>(Val: Switch->getOperand(i_nocapture: 0))->getParent())
632 continue;
633 ThreadingPath NewPath;
634 NewPath.setDeterminator(PhiBB);
635 NewPath.setExitValue(C);
636 // Don't add SwitchBlock at the start, this is handled later.
637 if (IncomingBB != SwitchBlock)
638 NewPath.push_back(BB: IncomingBB);
639 NewPath.push_back(BB: PhiBB);
640 Res.push_back(x: NewPath);
641 continue;
642 }
643 // Don't get into a cycle.
644 if (VB.contains(Ptr: IncomingBB) || IncomingBB == SwitchBlock)
645 continue;
646 // Recurse up the PHI chain.
647 auto *IncomingPhi = dyn_cast<PHINode>(Val: IncomingValue);
648 if (!IncomingPhi)
649 continue;
650 auto *IncomingPhiDefBB = IncomingPhi->getParent();
651 if (!StateDef.contains(Val: IncomingPhiDefBB))
652 continue;
653
654 // Direct predecessor, just add to the path.
655 if (IncomingPhiDefBB == IncomingBB) {
656 std::vector<ThreadingPath> PredPaths =
657 getPathsFromStateDefMap(StateDef, Phi: IncomingPhi, VB);
658 for (ThreadingPath &Path : PredPaths) {
659 Path.push_back(BB: PhiBB);
660 Res.push_back(x: std::move(Path));
661 }
662 continue;
663 }
664 // Not a direct predecessor, find intermediate paths to append to the
665 // existing path.
666 if (VB.contains(Ptr: IncomingPhiDefBB))
667 continue;
668
669 PathsType IntermediatePaths;
670 IntermediatePaths =
671 paths(BB: IncomingPhiDefBB, ToBB: IncomingBB, Visited&: VB, /* PathDepth = */ 1);
672 if (IntermediatePaths.empty())
673 continue;
674
675 std::vector<ThreadingPath> PredPaths =
676 getPathsFromStateDefMap(StateDef, Phi: IncomingPhi, VB);
677 for (const ThreadingPath &Path : PredPaths) {
678 for (const PathType &IPath : IntermediatePaths) {
679 ThreadingPath NewPath(Path);
680 NewPath.appendExcludingFirst(OtherPath: IPath);
681 NewPath.push_back(BB: PhiBB);
682 Res.push_back(x: NewPath);
683 }
684 }
685 }
686 VB.erase(Ptr: PhiBB);
687 return Res;
688 }
689
690 PathsType paths(BasicBlock *BB, BasicBlock *ToBB, VisitedBlocks &Visited,
691 unsigned PathDepth) {
692 PathsType Res;
693
694 // Stop exploring paths after visiting MaxPathLength blocks
695 if (PathDepth > MaxPathLength) {
696 ORE->emit(RemarkBuilder: [&]() {
697 return OptimizationRemarkAnalysis(DEBUG_TYPE, "MaxPathLengthReached",
698 Switch)
699 << "Exploration stopped after visiting MaxPathLength="
700 << ore::NV("MaxPathLength", MaxPathLength) << " blocks.";
701 });
702 return Res;
703 }
704
705 Visited.insert(Ptr: BB);
706 if (++NumVisited > MaxNumVisitiedPaths)
707 return Res;
708
709 // Stop if we have reached the BB out of loop, since its successors have no
710 // impact on the DFA.
711 if (!SwitchOuterLoop->contains(BB))
712 return Res;
713
714 // Some blocks have multiple edges to the same successor, and this set
715 // is used to prevent a duplicate path from being generated
716 SmallSet<BasicBlock *, 4> Successors;
717 for (BasicBlock *Succ : successors(BB)) {
718 if (!Successors.insert(Ptr: Succ).second)
719 continue;
720
721 // Found a cycle through the final block.
722 if (Succ == ToBB) {
723 Res.push_back(x: {BB, ToBB});
724 continue;
725 }
726
727 // We have encountered a cycle, do not get caught in it
728 if (Visited.contains(Ptr: Succ))
729 continue;
730
731 auto *CurrLoop = LI->getLoopFor(BB);
732 // Unlikely to be beneficial.
733 if (Succ == CurrLoop->getHeader())
734 continue;
735 // Skip for now, revisit this condition later to see the impact on
736 // coverage and compile time.
737 if (LI->getLoopFor(BB: Succ) != CurrLoop)
738 continue;
739
740 PathsType SuccPaths = paths(BB: Succ, ToBB, Visited, PathDepth: PathDepth + 1);
741 for (PathType &Path : SuccPaths) {
742 Path.push_front(x: BB);
743 Res.push_back(x: Path);
744 if (Res.size() >= MaxNumPaths) {
745 return Res;
746 }
747 }
748 }
749 // This block could now be visited again from a different predecessor. Note
750 // that this will result in exponential runtime. Subpaths could possibly be
751 // cached but it takes a lot of memory to store them.
752 Visited.erase(Ptr: BB);
753 return Res;
754 }
755
756 /// Walk the use-def chain and collect all the state-defining blocks and the
757 /// PHI nodes in those blocks that define the state.
758 StateDefMap getStateDefMap() const {
759 StateDefMap Res;
760 PHINode *FirstDef = dyn_cast<PHINode>(Val: Switch->getOperand(i_nocapture: 0));
761 assert(FirstDef && "The first definition must be a phi.");
762
763 SmallVector<PHINode *, 8> Stack;
764 Stack.push_back(Elt: FirstDef);
765 SmallSet<Value *, 16> SeenValues;
766
767 while (!Stack.empty()) {
768 PHINode *CurPhi = Stack.pop_back_val();
769
770 Res[CurPhi->getParent()] = CurPhi;
771 SeenValues.insert(Ptr: CurPhi);
772
773 for (BasicBlock *IncomingBB : CurPhi->blocks()) {
774 PHINode *IncomingPhi =
775 dyn_cast<PHINode>(Val: CurPhi->getIncomingValueForBlock(BB: IncomingBB));
776 if (!IncomingPhi)
777 continue;
778 bool IsOutsideLoops = !SwitchOuterLoop->contains(BB: IncomingBB);
779 if (SeenValues.contains(Ptr: IncomingPhi) || IsOutsideLoops)
780 continue;
781
782 Stack.push_back(Elt: IncomingPhi);
783 }
784 }
785
786 return Res;
787 }
788
789 unsigned NumVisited = 0;
790 SwitchInst *Switch;
791 BasicBlock *SwitchBlock;
792 OptimizationRemarkEmitter *ORE;
793 std::vector<ThreadingPath> TPaths;
794 LoopInfo *LI;
795 Loop *SwitchOuterLoop;
796};
797
798struct TransformDFA {
799 TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT,
800 AssumptionCache *AC, TargetTransformInfo *TTI,
801 OptimizationRemarkEmitter *ORE,
802 SmallPtrSet<const Value *, 32> EphValues)
803 : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE),
804 EphValues(EphValues) {}
805
806 void run() {
807 if (isLegalAndProfitableToTransform()) {
808 createAllExitPaths();
809 NumTransforms++;
810 }
811 }
812
813private:
814 /// This function performs both a legality check and profitability check at
815 /// the same time since it is convenient to do so. It iterates through all
816 /// blocks that will be cloned, and keeps track of the duplication cost. It
817 /// also returns false if it is illegal to clone some required block.
818 bool isLegalAndProfitableToTransform() {
819 CodeMetrics Metrics;
820 SwitchInst *Switch = SwitchPaths->getSwitchInst();
821
822 // Don't thread switch without multiple successors.
823 if (Switch->getNumSuccessors() <= 1)
824 return false;
825
826 // Note that DuplicateBlockMap is not being used as intended here. It is
827 // just being used to ensure (BB, State) pairs are only counted once.
828 DuplicateBlockMap DuplicateMap;
829
830 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
831 PathType PathBBs = TPath.getPath();
832 APInt NextState = TPath.getExitValue();
833 const BasicBlock *Determinator = TPath.getDeterminatorBB();
834
835 // Update Metrics for the Switch block, this is always cloned
836 BasicBlock *BB = SwitchPaths->getSwitchBlock();
837 BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
838 if (!VisitedBB) {
839 Metrics.analyzeBasicBlock(BB, TTI: *TTI, EphValues);
840 DuplicateMap[BB].push_back(x: {.BB: BB, .State: NextState});
841 }
842
843 // If the Switch block is the Determinator, then we can continue since
844 // this is the only block that is cloned and we already counted for it.
845 if (PathBBs.front() == Determinator)
846 continue;
847
848 // Otherwise update Metrics for all blocks that will be cloned. If any
849 // block is already cloned and would be reused, don't double count it.
850 auto DetIt = llvm::find(Range&: PathBBs, Val: Determinator);
851 for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) {
852 BB = *BBIt;
853 VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
854 if (VisitedBB)
855 continue;
856 Metrics.analyzeBasicBlock(BB, TTI: *TTI, EphValues);
857 DuplicateMap[BB].push_back(x: {.BB: BB, .State: NextState});
858 }
859
860 if (Metrics.notDuplicatable) {
861 LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
862 << "non-duplicatable instructions.\n");
863 ORE->emit(RemarkBuilder: [&]() {
864 return OptimizationRemarkMissed(DEBUG_TYPE, "NonDuplicatableInst",
865 Switch)
866 << "Contains non-duplicatable instructions.";
867 });
868 return false;
869 }
870
871 // FIXME: Allow jump threading with controlled convergence.
872 if (Metrics.Convergence != ConvergenceKind::None) {
873 LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
874 << "convergent instructions.\n");
875 ORE->emit(RemarkBuilder: [&]() {
876 return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch)
877 << "Contains convergent instructions.";
878 });
879 return false;
880 }
881
882 if (!Metrics.NumInsts.isValid()) {
883 LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
884 << "instructions with invalid cost.\n");
885 ORE->emit(RemarkBuilder: [&]() {
886 return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch)
887 << "Contains instructions with invalid cost.";
888 });
889 return false;
890 }
891 }
892
893 InstructionCost DuplicationCost = 0;
894
895 unsigned JumpTableSize = 0;
896 TTI->getEstimatedNumberOfCaseClusters(SI: *Switch, JTSize&: JumpTableSize, PSI: nullptr,
897 BFI: nullptr);
898 if (JumpTableSize == 0) {
899 // Factor in the number of conditional branches reduced from jump
900 // threading. Assume that lowering the switch block is implemented by
901 // using binary search, hence the LogBase2().
902 unsigned CondBranches =
903 APInt(32, Switch->getNumSuccessors()).ceilLogBase2();
904 assert(CondBranches > 0 &&
905 "The threaded switch must have multiple branches");
906 DuplicationCost = Metrics.NumInsts / CondBranches;
907 } else {
908 // Compared with jump tables, the DFA optimizer removes an indirect branch
909 // on each loop iteration, thus making branch prediction more precise. The
910 // more branch targets there are, the more likely it is for the branch
911 // predictor to make a mistake, and the more benefit there is in the DFA
912 // optimizer. Thus, the more branch targets there are, the lower is the
913 // cost of the DFA opt.
914 DuplicationCost = Metrics.NumInsts / JumpTableSize;
915 }
916
917 LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block "
918 << SwitchPaths->getSwitchBlock()->getName()
919 << " is: " << DuplicationCost << "\n\n");
920
921 if (DuplicationCost > CostThreshold) {
922 LLVM_DEBUG(dbgs() << "Not jump threading, duplication cost exceeds the "
923 << "cost threshold.\n");
924 ORE->emit(RemarkBuilder: [&]() {
925 return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch)
926 << "Duplication cost exceeds the cost threshold (cost="
927 << ore::NV("Cost", DuplicationCost)
928 << ", threshold=" << ore::NV("Threshold", CostThreshold) << ").";
929 });
930 return false;
931 }
932
933 ORE->emit(RemarkBuilder: [&]() {
934 return OptimizationRemark(DEBUG_TYPE, "JumpThreaded", Switch)
935 << "Switch statement jump-threaded.";
936 });
937
938 return true;
939 }
940
941 /// Transform each threading path to effectively jump thread the DFA.
942 void createAllExitPaths() {
943 DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager);
944
945 // Move the switch block to the end of the path, since it will be duplicated
946 BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock();
947 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
948 LLVM_DEBUG(dbgs() << TPath << "\n");
949 // TODO: Fix exit path creation logic so that we dont need this
950 // placeholder.
951 TPath.push_front(BB: SwitchBlock);
952 }
953
954 // Transform the ThreadingPaths and keep track of the cloned values
955 DuplicateBlockMap DuplicateMap;
956 DefMap NewDefs;
957
958 SmallSet<BasicBlock *, 16> BlocksToClean;
959 BlocksToClean.insert_range(R: successors(BB: SwitchBlock));
960
961 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
962 createExitPath(NewDefs, Path&: TPath, DuplicateMap, BlocksToClean, DTU: &DTU);
963 NumPaths++;
964 }
965
966 // After all paths are cloned, now update the last successor of the cloned
967 // path so it skips over the switch statement
968 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths())
969 updateLastSuccessor(TPath, DuplicateMap, DTU: &DTU);
970
971 // For each instruction that was cloned and used outside, update its uses
972 updateSSA(NewDefs);
973
974 // Clean PHI Nodes for the newly created blocks
975 for (BasicBlock *BB : BlocksToClean)
976 cleanPhiNodes(BB);
977 }
978
979 /// For a specific ThreadingPath \p Path, create an exit path starting from
980 /// the determinator block.
981 ///
982 /// To remember the correct destination, we have to duplicate blocks
983 /// corresponding to each state. Also update the terminating instruction of
984 /// the predecessors, and phis in the successor blocks.
985 void createExitPath(DefMap &NewDefs, ThreadingPath &Path,
986 DuplicateBlockMap &DuplicateMap,
987 SmallSet<BasicBlock *, 16> &BlocksToClean,
988 DomTreeUpdater *DTU) {
989 APInt NextState = Path.getExitValue();
990 const BasicBlock *Determinator = Path.getDeterminatorBB();
991 PathType PathBBs = Path.getPath();
992
993 // Don't select the placeholder block in front
994 if (PathBBs.front() == Determinator)
995 PathBBs.pop_front();
996
997 auto DetIt = llvm::find(Range&: PathBBs, Val: Determinator);
998 // When there is only one BB in PathBBs, the determinator takes itself as a
999 // direct predecessor.
1000 BasicBlock *PrevBB = PathBBs.size() == 1 ? *DetIt : *std::prev(x: DetIt);
1001 for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) {
1002 BasicBlock *BB = *BBIt;
1003 BlocksToClean.insert(Ptr: BB);
1004
1005 // We already cloned BB for this NextState, now just update the branch
1006 // and continue.
1007 BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap);
1008 if (NextBB) {
1009 updatePredecessor(PrevBB, OldBB: BB, NewBB: NextBB, DTU);
1010 PrevBB = NextBB;
1011 continue;
1012 }
1013
1014 // Clone the BB and update the successor of Prev to jump to the new block
1015 BasicBlock *NewBB = cloneBlockAndUpdatePredecessor(
1016 BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU);
1017 DuplicateMap[BB].push_back(x: {.BB: NewBB, .State: NextState});
1018 BlocksToClean.insert(Ptr: NewBB);
1019 PrevBB = NewBB;
1020 }
1021 }
1022
1023 /// Restore SSA form after cloning blocks.
1024 ///
1025 /// Each cloned block creates new defs for a variable, and the uses need to be
1026 /// updated to reflect this. The uses may be replaced with a cloned value, or
1027 /// some derived phi instruction. Note that all uses of a value defined in the
1028 /// same block were already remapped when cloning the block.
1029 void updateSSA(DefMap &NewDefs) {
1030 SSAUpdaterBulk SSAUpdate;
1031 SmallVector<Use *, 16> UsesToRename;
1032
1033 for (const auto &KV : NewDefs) {
1034 Instruction *I = KV.first;
1035 BasicBlock *BB = I->getParent();
1036 std::vector<Instruction *> Cloned = KV.second;
1037
1038 // Scan all uses of this instruction to see if it is used outside of its
1039 // block, and if so, record them in UsesToRename.
1040 for (Use &U : I->uses()) {
1041 Instruction *User = cast<Instruction>(Val: U.getUser());
1042 if (PHINode *UserPN = dyn_cast<PHINode>(Val: User)) {
1043 if (UserPN->getIncomingBlock(U) == BB)
1044 continue;
1045 } else if (User->getParent() == BB) {
1046 continue;
1047 }
1048
1049 UsesToRename.push_back(Elt: &U);
1050 }
1051
1052 // If there are no uses outside the block, we're done with this
1053 // instruction.
1054 if (UsesToRename.empty())
1055 continue;
1056 LLVM_DEBUG(dbgs() << "DFA-JT: Renaming non-local uses of: " << *I
1057 << "\n");
1058
1059 // We found a use of I outside of BB. Rename all uses of I that are
1060 // outside its block to be uses of the appropriate PHI node etc. See
1061 // ValuesInBlocks with the values we know.
1062 unsigned VarNum = SSAUpdate.AddVariable(Name: I->getName(), Ty: I->getType());
1063 SSAUpdate.AddAvailableValue(Var: VarNum, BB, V: I);
1064 for (Instruction *New : Cloned)
1065 SSAUpdate.AddAvailableValue(Var: VarNum, BB: New->getParent(), V: New);
1066
1067 while (!UsesToRename.empty())
1068 SSAUpdate.AddUse(Var: VarNum, U: UsesToRename.pop_back_val());
1069
1070 LLVM_DEBUG(dbgs() << "\n");
1071 }
1072 // SSAUpdater handles phi placement and renaming uses with the appropriate
1073 // value.
1074 SSAUpdate.RewriteAllUses(DT);
1075 }
1076
1077 /// Clones a basic block, and adds it to the CFG.
1078 ///
1079 /// This function also includes updating phi nodes in the successors of the
1080 /// BB, and remapping uses that were defined locally in the cloned BB.
1081 BasicBlock *cloneBlockAndUpdatePredecessor(BasicBlock *BB, BasicBlock *PrevBB,
1082 const APInt &NextState,
1083 DuplicateBlockMap &DuplicateMap,
1084 DefMap &NewDefs,
1085 DomTreeUpdater *DTU) {
1086 ValueToValueMapTy VMap;
1087 BasicBlock *NewBB = CloneBasicBlock(
1088 BB, VMap, NameSuffix: ".jt" + std::to_string(val: NextState.getLimitedValue()),
1089 F: BB->getParent());
1090 NewBB->moveAfter(MovePos: BB);
1091 NumCloned++;
1092
1093 for (Instruction &I : *NewBB) {
1094 // Do not remap operands of PHINode in case a definition in BB is an
1095 // incoming value to a phi in the same block. This incoming value will
1096 // be renamed later while restoring SSA.
1097 if (isa<PHINode>(Val: &I))
1098 continue;
1099 RemapInstruction(I: &I, VM&: VMap,
1100 Flags: RF_IgnoreMissingLocals | RF_NoModuleLevelChanges);
1101 if (AssumeInst *II = dyn_cast<AssumeInst>(Val: &I))
1102 AC->registerAssumption(CI: II);
1103 }
1104
1105 updateSuccessorPhis(BB, ClonedBB: NewBB, NextState, VMap, DuplicateMap);
1106 updatePredecessor(PrevBB, OldBB: BB, NewBB, DTU);
1107 updateDefMap(NewDefs, VMap);
1108
1109 // Add all successors to the DominatorTree
1110 SmallPtrSet<BasicBlock *, 4> SuccSet;
1111 for (auto *SuccBB : successors(BB: NewBB)) {
1112 if (SuccSet.insert(Ptr: SuccBB).second)
1113 DTU->applyUpdates(Updates: {{DominatorTree::Insert, NewBB, SuccBB}});
1114 }
1115 SuccSet.clear();
1116 return NewBB;
1117 }
1118
1119 /// Update the phi nodes in BB's successors.
1120 ///
1121 /// This means creating a new incoming value from NewBB with the new
1122 /// instruction wherever there is an incoming value from BB.
1123 void updateSuccessorPhis(BasicBlock *BB, BasicBlock *ClonedBB,
1124 const APInt &NextState, ValueToValueMapTy &VMap,
1125 DuplicateBlockMap &DuplicateMap) {
1126 std::vector<BasicBlock *> BlocksToUpdate;
1127
1128 // If BB is the last block in the path, we can simply update the one case
1129 // successor that will be reached.
1130 if (BB == SwitchPaths->getSwitchBlock()) {
1131 SwitchInst *Switch = SwitchPaths->getSwitchInst();
1132 BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
1133 BlocksToUpdate.push_back(x: NextCase);
1134 BasicBlock *ClonedSucc = getClonedBB(BB: NextCase, NextState, DuplicateMap);
1135 if (ClonedSucc)
1136 BlocksToUpdate.push_back(x: ClonedSucc);
1137 }
1138 // Otherwise update phis in all successors.
1139 else {
1140 for (BasicBlock *Succ : successors(BB)) {
1141 BlocksToUpdate.push_back(x: Succ);
1142
1143 // Check if a successor has already been cloned for the particular exit
1144 // value. In this case if a successor was already cloned, the phi nodes
1145 // in the cloned block should be updated directly.
1146 BasicBlock *ClonedSucc = getClonedBB(BB: Succ, NextState, DuplicateMap);
1147 if (ClonedSucc)
1148 BlocksToUpdate.push_back(x: ClonedSucc);
1149 }
1150 }
1151
1152 // If there is a phi with an incoming value from BB, create a new incoming
1153 // value for the new predecessor ClonedBB. The value will either be the same
1154 // value from BB or a cloned value.
1155 for (BasicBlock *Succ : BlocksToUpdate) {
1156 for (auto II = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(Val&: II);
1157 ++II) {
1158 Value *Incoming = Phi->getIncomingValueForBlock(BB);
1159 if (Incoming) {
1160 if (isa<Constant>(Val: Incoming)) {
1161 Phi->addIncoming(V: Incoming, BB: ClonedBB);
1162 continue;
1163 }
1164 Value *ClonedVal = VMap[Incoming];
1165 if (ClonedVal)
1166 Phi->addIncoming(V: ClonedVal, BB: ClonedBB);
1167 else
1168 Phi->addIncoming(V: Incoming, BB: ClonedBB);
1169 }
1170 }
1171 }
1172 }
1173
1174 /// Sets the successor of PrevBB to be NewBB instead of OldBB. Note that all
1175 /// other successors are kept as well.
1176 void updatePredecessor(BasicBlock *PrevBB, BasicBlock *OldBB,
1177 BasicBlock *NewBB, DomTreeUpdater *DTU) {
1178 // When a path is reused, there is a chance that predecessors were already
1179 // updated before. Check if the predecessor needs to be updated first.
1180 if (!isPredecessor(BB: OldBB, IncomingBB: PrevBB))
1181 return;
1182
1183 Instruction *PrevTerm = PrevBB->getTerminator();
1184 for (unsigned Idx = 0; Idx < PrevTerm->getNumSuccessors(); Idx++) {
1185 if (PrevTerm->getSuccessor(Idx) == OldBB) {
1186 OldBB->removePredecessor(Pred: PrevBB, /* KeepOneInputPHIs = */ true);
1187 PrevTerm->setSuccessor(Idx, BB: NewBB);
1188 }
1189 }
1190 DTU->applyUpdates(Updates: {{DominatorTree::Delete, PrevBB, OldBB},
1191 {DominatorTree::Insert, PrevBB, NewBB}});
1192 }
1193
1194 /// Add new value mappings to the DefMap to keep track of all new definitions
1195 /// for a particular instruction. These will be used while updating SSA form.
1196 void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) {
1197 SmallVector<std::pair<Instruction *, Instruction *>> NewDefsVector;
1198 NewDefsVector.reserve(N: VMap.size());
1199
1200 for (auto Entry : VMap) {
1201 Instruction *Inst =
1202 dyn_cast<Instruction>(Val: const_cast<Value *>(Entry.first));
1203 if (!Inst || !Entry.second || isa<BranchInst>(Val: Inst) ||
1204 isa<SwitchInst>(Val: Inst)) {
1205 continue;
1206 }
1207
1208 Instruction *Cloned = dyn_cast<Instruction>(Val&: Entry.second);
1209 if (!Cloned)
1210 continue;
1211
1212 NewDefsVector.push_back(Elt: {Inst, Cloned});
1213 }
1214
1215 // Sort the defs to get deterministic insertion order into NewDefs.
1216 sort(C&: NewDefsVector, Comp: [](const auto &LHS, const auto &RHS) {
1217 if (LHS.first == RHS.first)
1218 return LHS.second->comesBefore(RHS.second);
1219 return LHS.first->comesBefore(RHS.first);
1220 });
1221
1222 for (const auto &KV : NewDefsVector)
1223 NewDefs[KV.first].push_back(x: KV.second);
1224 }
1225
1226 /// Update the last branch of a particular cloned path to point to the correct
1227 /// case successor.
1228 ///
1229 /// Note that this is an optional step and would have been done in later
1230 /// optimizations, but it makes the CFG significantly easier to work with.
1231 void updateLastSuccessor(ThreadingPath &TPath,
1232 DuplicateBlockMap &DuplicateMap,
1233 DomTreeUpdater *DTU) {
1234 APInt NextState = TPath.getExitValue();
1235 BasicBlock *BB = TPath.getPath().back();
1236 BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap);
1237
1238 // Note multiple paths can end at the same block so check that it is not
1239 // updated yet
1240 if (!isa<SwitchInst>(Val: LastBlock->getTerminator()))
1241 return;
1242 SwitchInst *Switch = cast<SwitchInst>(Val: LastBlock->getTerminator());
1243 BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
1244
1245 std::vector<DominatorTree::UpdateType> DTUpdates;
1246 SmallPtrSet<BasicBlock *, 4> SuccSet;
1247 for (BasicBlock *Succ : successors(BB: LastBlock)) {
1248 if (Succ != NextCase && SuccSet.insert(Ptr: Succ).second)
1249 DTUpdates.push_back(x: {DominatorTree::Delete, LastBlock, Succ});
1250 }
1251
1252 Switch->eraseFromParent();
1253 BranchInst::Create(IfTrue: NextCase, InsertBefore: LastBlock);
1254
1255 DTU->applyUpdates(Updates: DTUpdates);
1256 }
1257
1258 /// After cloning blocks, some of the phi nodes have extra incoming values
1259 /// that are no longer used. This function removes them.
1260 void cleanPhiNodes(BasicBlock *BB) {
1261 // If BB is no longer reachable, remove any remaining phi nodes
1262 if (pred_empty(BB)) {
1263 std::vector<PHINode *> PhiToRemove;
1264 for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(Val&: II); ++II) {
1265 PhiToRemove.push_back(x: Phi);
1266 }
1267 for (PHINode *PN : PhiToRemove) {
1268 PN->replaceAllUsesWith(V: PoisonValue::get(T: PN->getType()));
1269 PN->eraseFromParent();
1270 }
1271 return;
1272 }
1273
1274 // Remove any incoming values that come from an invalid predecessor
1275 for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(Val&: II); ++II) {
1276 std::vector<BasicBlock *> BlocksToRemove;
1277 for (BasicBlock *IncomingBB : Phi->blocks()) {
1278 if (!isPredecessor(BB, IncomingBB))
1279 BlocksToRemove.push_back(x: IncomingBB);
1280 }
1281 for (BasicBlock *BB : BlocksToRemove)
1282 Phi->removeIncomingValue(BB);
1283 }
1284 }
1285
1286 /// Checks if BB was already cloned for a particular next state value. If it
1287 /// was then it returns this cloned block, and otherwise null.
1288 BasicBlock *getClonedBB(BasicBlock *BB, const APInt &NextState,
1289 DuplicateBlockMap &DuplicateMap) {
1290 CloneList ClonedBBs = DuplicateMap[BB];
1291
1292 // Find an entry in the CloneList with this NextState. If it exists then
1293 // return the corresponding BB
1294 auto It = llvm::find_if(Range&: ClonedBBs, P: [NextState](const ClonedBlock &C) {
1295 return C.State == NextState;
1296 });
1297 return It != ClonedBBs.end() ? (*It).BB : nullptr;
1298 }
1299
1300 /// Helper to get the successor corresponding to a particular case value for
1301 /// a switch statement.
1302 BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, const APInt &NextState) {
1303 BasicBlock *NextCase = nullptr;
1304 for (auto Case : Switch->cases()) {
1305 if (Case.getCaseValue()->getValue() == NextState) {
1306 NextCase = Case.getCaseSuccessor();
1307 break;
1308 }
1309 }
1310 if (!NextCase)
1311 NextCase = Switch->getDefaultDest();
1312 return NextCase;
1313 }
1314
1315 /// Returns true if IncomingBB is a predecessor of BB.
1316 bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) {
1317 return llvm::is_contained(Range: predecessors(BB), Element: IncomingBB);
1318 }
1319
1320 AllSwitchPaths *SwitchPaths;
1321 DominatorTree *DT;
1322 AssumptionCache *AC;
1323 TargetTransformInfo *TTI;
1324 OptimizationRemarkEmitter *ORE;
1325 SmallPtrSet<const Value *, 32> EphValues;
1326 std::vector<ThreadingPath> TPaths;
1327};
1328
1329bool DFAJumpThreading::run(Function &F) {
1330 LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n");
1331
1332 if (F.hasOptSize()) {
1333 LLVM_DEBUG(dbgs() << "Skipping due to the 'minsize' attribute\n");
1334 return false;
1335 }
1336
1337 if (ClViewCfgBefore)
1338 F.viewCFG();
1339
1340 SmallVector<AllSwitchPaths, 2> ThreadableLoops;
1341 bool MadeChanges = false;
1342 LoopInfoBroken = false;
1343
1344 for (BasicBlock &BB : F) {
1345 auto *SI = dyn_cast<SwitchInst>(Val: BB.getTerminator());
1346 if (!SI)
1347 continue;
1348
1349 LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName()
1350 << " is a candidate\n");
1351 MainSwitch Switch(SI, LI, ORE);
1352
1353 if (!Switch.getInstr()) {
1354 LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is not a "
1355 << "candidate for jump threading\n");
1356 continue;
1357 }
1358
1359 LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is a "
1360 << "candidate for jump threading\n");
1361 LLVM_DEBUG(SI->dump());
1362
1363 unfoldSelectInstrs(DT, SelectInsts: Switch.getSelectInsts());
1364 if (!Switch.getSelectInsts().empty())
1365 MadeChanges = true;
1366
1367 AllSwitchPaths SwitchPaths(&Switch, ORE, LI,
1368 LI->getLoopFor(BB: &BB)->getOutermostLoop());
1369 SwitchPaths.run();
1370
1371 if (SwitchPaths.getNumThreadingPaths() > 0) {
1372 ThreadableLoops.push_back(Elt: SwitchPaths);
1373
1374 // For the time being limit this optimization to occurring once in a
1375 // function since it can change the CFG significantly. This is not a
1376 // strict requirement but it can cause buggy behavior if there is an
1377 // overlap of blocks in different opportunities. There is a lot of room to
1378 // experiment with catching more opportunities here.
1379 // NOTE: To release this contraint, we must handle LoopInfo invalidation
1380 break;
1381 }
1382 }
1383
1384#ifdef NDEBUG
1385 LI->verify(DomTree: *DT);
1386#endif
1387
1388 SmallPtrSet<const Value *, 32> EphValues;
1389 if (ThreadableLoops.size() > 0)
1390 CodeMetrics::collectEphemeralValues(L: &F, AC, EphValues);
1391
1392 for (AllSwitchPaths SwitchPaths : ThreadableLoops) {
1393 TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues);
1394 Transform.run();
1395 MadeChanges = true;
1396 LoopInfoBroken = true;
1397 }
1398
1399#ifdef EXPENSIVE_CHECKS
1400 assert(DT->verify(DominatorTree::VerificationLevel::Full));
1401 verifyFunction(F, &dbgs());
1402#endif
1403
1404 return MadeChanges;
1405}
1406
1407} // end anonymous namespace
1408
1409/// Integrate with the new Pass Manager
1410PreservedAnalyses DFAJumpThreadingPass::run(Function &F,
1411 FunctionAnalysisManager &AM) {
1412 AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
1413 DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
1414 LoopInfo &LI = AM.getResult<LoopAnalysis>(IR&: F);
1415 TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(IR&: F);
1416 OptimizationRemarkEmitter ORE(&F);
1417 DFAJumpThreading ThreadImpl(&AC, &DT, &LI, &TTI, &ORE);
1418 if (!ThreadImpl.run(F))
1419 return PreservedAnalyses::all();
1420
1421 PreservedAnalyses PA;
1422 PA.preserve<DominatorTreeAnalysis>();
1423 if (!ThreadImpl.LoopInfoBroken)
1424 PA.preserve<LoopAnalysis>();
1425 return PA;
1426}
1427