DFAJumpThreading.cpp source code [llvm_projects/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp]

1	//===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Transform each threading path to effectively jump thread the DFA. For
10	// example, the CFG below could be transformed as follows, where the cloned
11	// blocks unconditionally branch to the next correct case based on what is
12	// identified in the analysis.
13	//
14	// sw.bb sw.bb
15	// / \| \ / \| \
16	// case1 case2 case3 case1 case2 case3
17	// \ \| / \| \| \|
18	// determinator det.2 det.3 det.1
19	// br sw.bb / \| \
20	// sw.bb.2 sw.bb.3 sw.bb.1
21	// br case2 br case3 br case1§
22	//
23	// Definitions and Terminology:
24	//
25	// Threading path:*
26	// a list of basic blocks, the exit state, and the block that determines
27	// the next state, for which the following notation will be used:
28	// < path of BBs that form a cycle > [ state, determinator ]
29	//
30	// Predictable switch:*
31	// The switch variable is always a known constant so that all conditional
32	// jumps based on switch variable can be converted to unconditional jump.
33	//
34	// Determinator:*
35	// The basic block that determines the next state of the DFA.
36	//
37	// Representing the optimization in C-like pseudocode: the code pattern on the
38	// left could functionally be transformed to the right pattern if the switch
39	// condition is predictable.
40	//
41	// X = A goto A
42	// for (...) A:
43	// switch (X) ...
44	// case A goto B
45	// X = B B:
46	// case B ...
47	// X = C goto C
48	//
49	// The pass first checks that switch variable X is decided by the control flow
50	// path taken in the loop; for example, in case B, the next value of X is
51	// decided to be C. It then enumerates through all paths in the loop and labels
52	// the basic blocks where the next state is decided.
53	//
54	// Using this information it creates new paths that unconditionally branch to
55	// the next case. This involves cloning code, so it only gets triggered if the
56	// amount of code duplicated is below a threshold.
57	//
58	//===----------------------------------------------------------------------===//
59
60	#include "llvm/Transforms/Scalar/DFAJumpThreading.h"
61	#include "llvm/ADT/APInt.h"
62	#include "llvm/ADT/DenseMap.h"
63	#include "llvm/ADT/SmallSet.h"
64	#include "llvm/ADT/Statistic.h"
65	#include "llvm/Analysis/AssumptionCache.h"
66	#include "llvm/Analysis/CodeMetrics.h"
67	#include "llvm/Analysis/DomTreeUpdater.h"
68	#include "llvm/Analysis/LoopInfo.h"
69	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
70	#include "llvm/Analysis/TargetTransformInfo.h"
71	#include "llvm/IR/CFG.h"
72	#include "llvm/IR/Constants.h"
73	#include "llvm/IR/IntrinsicInst.h"
74	#include "llvm/Support/CommandLine.h"
75	#include "llvm/Support/Debug.h"
76	#include "llvm/Transforms/Utils/Cloning.h"
77	#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
78	#include "llvm/Transforms/Utils/ValueMapper.h"
79	#include <algorithm>
80	#include <deque>
81
82	#ifdef EXPENSIVE_CHECKS
83	#include "llvm/IR/Verifier.h"
84	#endif
85
86	using namespace llvm;
87
88	#define DEBUG_TYPE "dfa-jump-threading"
89
90	STATISTIC(NumTransforms, "Number of transformations done");
91	STATISTIC(NumCloned, "Number of blocks cloned");
92	STATISTIC(NumPaths, "Number of individual paths threaded");
93
94	static cl::opt<bool>
95	ClViewCfgBefore("dfa-jump-view-cfg-before",
96	cl::desc ("View the CFG before DFA Jump Threading"),
97	cl::Hidden, cl::init(Val: false));
98
99	static cl::opt<bool> EarlyExitHeuristic(
100	"dfa-early-exit-heuristic",
101	cl::desc ("Exit early if an unpredictable value come from the same loop"),
102	cl::Hidden, cl::init(Val: true));
103
104	static cl::opt<unsigned> MaxPathLength(
105	"dfa-max-path-length",
106	cl::desc ("Max number of blocks searched to find a threading path"),
107	cl::Hidden, cl::init(Val: `20`));
108
109	static cl::opt<unsigned>
110	MaxNumPaths("dfa-max-num-paths",
111	cl::desc ("Max number of paths enumerated around a switch"),
112	cl::Hidden, cl::init(Val: `200`));
113
114	static cl::opt<unsigned>
115	CostThreshold("dfa-cost-threshold",
116	cl::desc ("Maximum cost accepted for the transformation"),
117	cl::Hidden, cl::init(Val: `50`));
118
119	namespace {
120
121	class SelectInstToUnfold {
122	SelectInst *SI;
123	PHINode *SIUse;
124
125	public:
126	SelectInstToUnfold(SelectInst SI, PHINode SIUse) : SI(SI), SIUse(SIUse) {}
127
128	SelectInst getInst() { return* SI; }
129	PHINode getUse() { return* SIUse; }
130
131	explicit operator bool() const { return SI && SIUse; }
132	};
133
134	void unfold(DomTreeUpdater DTU, LoopInfo LI, SelectInstToUnfold SIToUnfold,
135	std::vector<SelectInstToUnfold> *NewSIsToUnfold,
136	std::vector<BasicBlock > NewBBs);
137
138	class DFAJumpThreading {
139	public:
140	DFAJumpThreading(AssumptionCache AC, DominatorTree DT, LoopInfo *LI,
141	TargetTransformInfo TTI, OptimizationRemarkEmitter ORE)
142	: AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {}
143
144	bool run(Function &F);
145	bool LoopInfoBroken;
146
147	private:
148	void
149	unfoldSelectInstrs(DominatorTree *DT,
150	const SmallVector<SelectInstToUnfold, `4`> &SelectInsts) {
151	DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
152	SmallVector<SelectInstToUnfold, `4`> Stack;
153	for (SelectInstToUnfold SIToUnfold : SelectInsts)
154	Stack.push_back(Elt: SIToUnfold);
155
156	while (!Stack.empty()) {
157	SelectInstToUnfold SIToUnfold = Stack.pop_back_val();
158
159	std::vector<SelectInstToUnfold> NewSIsToUnfold;
160	std::vector<BasicBlock *> NewBBs;
161	unfold(DTU: &DTU, LI, SIToUnfold, NewSIsToUnfold: &NewSIsToUnfold, NewBBs: &NewBBs);
162
163	// Put newly discovered select instructions into the work list.
164	for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold)
165	Stack.push_back(Elt: NewSIToUnfold);
166	}
167	}
168
169	AssumptionCache *AC;
170	DominatorTree *DT;
171	LoopInfo *LI;
172	TargetTransformInfo *TTI;
173	OptimizationRemarkEmitter *ORE;
174	};
175
176	} // end anonymous namespace
177
178	namespace {
179
180	/// Create a new basic block and sink \p SIToSink into it.
181	void createBasicBlockAndSinkSelectInst(
182	DomTreeUpdater DTU, SelectInst SI, PHINode SIUse, SelectInst SIToSink,
183	BasicBlock EndBlock, StringRef NewBBName, BasicBlock *NewBlock,
184	BranchInst *NewBranch, std::vector<SelectInstToUnfold> NewSIsToUnfold,
185	std::vector<BasicBlock > NewBBs) {
186	assert(SIToSink->hasOneUse());
187	assert(NewBlock);
188	assert(NewBranch);
189	*NewBlock = BasicBlock::Create(Context&: SI->getContext(), Name: NewBBName,
190	Parent: EndBlock->getParent(), InsertBefore: EndBlock);
191	NewBBs->push_back(x: *NewBlock);
192	NewBranch = BranchInst::Create(IfTrue: EndBlock, InsertBefore: NewBlock);
193	SIToSink->moveBefore(MovePos: *NewBranch);
194	NewSIsToUnfold->push_back(x: SelectInstToUnfold (SIToSink, SIUse));
195	DTU->applyUpdates(Updates: {{DominatorTree::Insert, *NewBlock, EndBlock}});
196	}
197
198	/// Unfold the select instruction held in \p SIToUnfold by replacing it with
199	/// control flow.
200	///
201	/// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly
202	/// created basic blocks into \p NewBBs.
203	///
204	/// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible.
205	void unfold(DomTreeUpdater DTU, LoopInfo LI, SelectInstToUnfold SIToUnfold,
206	std::vector<SelectInstToUnfold> *NewSIsToUnfold,
207	std::vector<BasicBlock > NewBBs) {
208	SelectInst *SI = SIToUnfold.getInst();
209	PHINode *SIUse = SIToUnfold.getUse();
210	BasicBlock *StartBlock = SI->getParent();
211	BasicBlock *EndBlock = SIUse->getParent();
212	BranchInst *StartBlockTerm =
213	dyn_cast<BranchInst>(Val: StartBlock->getTerminator());
214
215	assert(StartBlockTerm && StartBlockTerm->isUnconditional());
216	assert(SI->hasOneUse());
217
218	// These are the new basic blocks for the conditional branch.
219	// At least one will become an actual new basic block.
220	BasicBlock TrueBlock = nullptr*;
221	BasicBlock FalseBlock = nullptr*;
222	BranchInst TrueBranch = nullptr*;
223	BranchInst FalseBranch = nullptr*;
224
225	// Sink select instructions to be able to unfold them later.
226	if (SelectInst *SIOp = dyn_cast<SelectInst>(Val: SI->getTrueValue())) {
227	createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIToSink: SIOp, EndBlock,
228	NewBBName: "si.unfold.true", NewBlock: &TrueBlock, NewBranch: &TrueBranch,
229	NewSIsToUnfold, NewBBs);
230	}
231	if (SelectInst *SIOp = dyn_cast<SelectInst>(Val: SI->getFalseValue())) {
232	createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIToSink: SIOp, EndBlock,
233	NewBBName: "si.unfold.false", NewBlock: &FalseBlock,
234	NewBranch: &FalseBranch, NewSIsToUnfold, NewBBs);
235	}
236
237	// If there was nothing to sink, then arbitrarily choose the 'false' side
238	// for a new input value to the PHI.
239	if (!TrueBlock && !FalseBlock) {
240	FalseBlock = BasicBlock::Create(Context&: SI->getContext(), Name: "si.unfold.false",
241	Parent: EndBlock->getParent(), InsertBefore: EndBlock);
242	NewBBs->push_back(x: FalseBlock);
243	BranchInst::Create(IfTrue: EndBlock, InsertBefore: FalseBlock);
244	DTU->applyUpdates(Updates: {{DominatorTree::Insert, FalseBlock, EndBlock}});
245	}
246
247	// Insert the real conditional branch based on the original condition.
248	// If we did not create a new block for one of the 'true' or 'false' paths
249	// of the condition, it means that side of the branch goes to the end block
250	// directly and the path originates from the start block from the point of
251	// view of the new PHI.
252	BasicBlock *TT = EndBlock;
253	BasicBlock *FT = EndBlock;
254	if (TrueBlock && FalseBlock) {
255	// A diamond.
256	TT = TrueBlock;
257	FT = FalseBlock;
258
259	// Update the phi node of SI.
260	SIUse->addIncoming(V: SI->getTrueValue(), BB: TrueBlock);
261	SIUse->addIncoming(V: SI->getFalseValue(), BB: FalseBlock);
262
263	// Update any other PHI nodes in EndBlock.
264	for (PHINode &Phi : EndBlock->phis()) {
265	if (&Phi != SIUse) {
266	Value *OrigValue = Phi.getIncomingValueForBlock(BB: StartBlock);
267	Phi.addIncoming(V: OrigValue, BB: TrueBlock);
268	Phi.addIncoming(V: OrigValue, BB: FalseBlock);
269	}
270
271	// Remove incoming place of original StartBlock, which comes in a indirect
272	// way (through TrueBlock and FalseBlock) now.
273	Phi.removeIncomingValue(BB: StartBlock, / DeletePHIIfEmpty = / false);
274	}
275	} else {
276	BasicBlock NewBlock = nullptr*;
277	Value *SIOp1 = SI->getTrueValue();
278	Value *SIOp2 = SI->getFalseValue();
279
280	// A triangle pointing right.
281	if (!TrueBlock) {
282	NewBlock = FalseBlock;
283	FT = FalseBlock;
284	}
285	// A triangle pointing left.
286	else {
287	NewBlock = TrueBlock;
288	TT = TrueBlock;
289	std::swap(a&: SIOp1, b&: SIOp2);
290	}
291
292	// Update the phi node of SI.
293	for (unsigned Idx = `0`; Idx < SIUse->getNumIncomingValues(); ++Idx) {
294	if (SIUse->getIncomingBlock(i: Idx) == StartBlock)
295	SIUse->setIncomingValue(i: Idx, V: SIOp1);
296	}
297	SIUse->addIncoming(V: SIOp2, BB: NewBlock);
298
299	// Update any other PHI nodes in EndBlock.
300	for (auto II = EndBlock->begin(); PHINode *Phi = dyn_cast<PHINode>(Val&: II);
301	++II) {
302	if (Phi != SIUse)
303	Phi->addIncoming(V: Phi->getIncomingValueForBlock(BB: StartBlock), BB: NewBlock);
304	}
305	}
306	StartBlockTerm->eraseFromParent();
307	BranchInst::Create(IfTrue: TT, IfFalse: FT, Cond: SI->getCondition(), InsertBefore: StartBlock);
308	DTU->applyUpdates(Updates: {{DominatorTree::Insert, StartBlock, TT},
309	{DominatorTree::Insert, StartBlock, FT}});
310
311	// Preserve loop info
312	if (Loop *L = LI->getLoopFor(BB: SI->getParent())) {
313	for (BasicBlock NewBB : NewBBs)
314	L->addBasicBlockToLoop(NewBB, LI&: *LI);
315	}
316
317	// The select is now dead.
318	assert(SI->use_empty() && "Select must be dead now");
319	SI->eraseFromParent();
320	}
321
322	struct ClonedBlock {
323	BasicBlock *BB;
324	APInt State; ///< \p State corresponds to the next value of a switch stmnt.
325	};
326
327	typedef std::deque<BasicBlock *> PathType;
328	typedef std::vector<PathType> PathsType;
329	typedef SmallPtrSet<const BasicBlock *, `8`> VisitedBlocks;
330	typedef std::vector<ClonedBlock> CloneList;
331
332	// This data structure keeps track of all blocks that have been cloned. If two
333	// different ThreadingPaths clone the same block for a certain state it should
334	// be reused, and it can be looked up in this map.
335	typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap;
336
337	// This map keeps track of all the new definitions for an instruction. This
338	// information is needed when restoring SSA form after cloning blocks.
339	typedef MapVector<Instruction , std::vector<Instruction >> DefMap;
340
341	inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) {
342	OS << "< ";
343	for (const BasicBlock *BB : Path) {
344	std::string BBName;
345	if (BB->hasName())
346	raw_string_ostream (BBName) << BB->getName();
347	else
348	raw_string_ostream (BBName) << BB;
349	OS << BBName << " ";
350	}
351	OS << ">";
352	return OS;
353	}
354
355	/// ThreadingPath is a path in the control flow of a loop that can be threaded
356	/// by cloning necessary basic blocks and replacing conditional branches with
357	/// unconditional ones. A threading path includes a list of basic blocks, the
358	/// exit state, and the block that determines the next state.
359	struct ThreadingPath {
360	/// Exit value is DFA's exit state for the given path.
361	APInt getExitValue() const { return ExitVal; }
362	void setExitValue(const ConstantInt *V) {
363	ExitVal = V->getValue();
364	IsExitValSet = true;
365	}
366	bool isExitValueSet() const { return IsExitValSet; }
367
368	/// Determinator is the basic block that determines the next state of the DFA.
369	const BasicBlock getDeterminatorBB() const* { return DBB; }
370	void setDeterminator(const BasicBlock *BB) { DBB = BB; }
371
372	/// Path is a list of basic blocks.
373	const PathType &getPath() const { return Path; }
374	void setPath(const PathType &NewPath) { Path = NewPath; }
375
376	void print(raw_ostream &OS) const {
377	OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]";
378	}
379
380	private:
381	PathType Path;
382	APInt ExitVal;
383	const BasicBlock DBB = nullptr*;
384	bool IsExitValSet = false;
385	};
386
387	#ifndef NDEBUG
388	inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) {
389	TPath.print(OS);
390	return OS;
391	}
392	#endif
393
394	struct MainSwitch {
395	MainSwitch(SwitchInst SI, LoopInfo LI, OptimizationRemarkEmitter *ORE)
396	: LI(LI) {
397	if (isCandidate(SI)) {
398	Instr = SI;
399	} else {
400	ORE->emit(RemarkBuilder: [&]() {
401	return OptimizationRemarkMissed (DEBUG_TYPE, "SwitchNotPredictable", SI)
402	<< "Switch instruction is not predictable.";
403	});
404	}
405	}
406
407	virtual ~MainSwitch() = default;
408
409	SwitchInst getInstr() const* { return Instr; }
410	const SmallVector<SelectInstToUnfold, `4`> getSelectInsts() {
411	return SelectInsts;
412	}
413
414	private:
415	/// Do a use-def chain traversal starting from the switch condition to see if
416	/// \p SI is a potential condidate.
417	///
418	/// Also, collect select instructions to unfold.
419	bool isCandidate(const SwitchInst *SI) {
420	std::deque<std::pair<Value , BasicBlock >> Q;
421	SmallSet<Value *, `16`> SeenValues;
422	SelectInsts.clear();
423
424	Value *SICond = SI->getCondition();
425	LLVM_DEBUG(dbgs() << "\tSICond: " << *SICond << "\n");
426	if (!isa<PHINode>(Val: SICond))
427	return false;
428
429	// The switch must be in a loop.
430	const Loop *L = LI->getLoopFor(BB: SI->getParent());
431	if (!L)
432	return false;
433
434	addToQueue(Val: SICond, BB: nullptr, Q, SeenValues);
435
436	while (!Q.empty()) {
437	Value *Current = Q.front().first;
438	BasicBlock *CurrentIncomingBB = Q.front().second;
439	Q.pop_front();
440
441	if (auto *Phi = dyn_cast<PHINode>(Val: Current)) {
442	for (BasicBlock *IncomingBB : Phi->blocks()) {
443	Value *Incoming = Phi->getIncomingValueForBlock(BB: IncomingBB);
444	addToQueue(Val: Incoming, BB: IncomingBB, Q, SeenValues);
445	}
446	LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n");
447	} else if (SelectInst *SelI = dyn_cast<SelectInst>(Val: Current)) {
448	if (!isValidSelectInst(SI: SelI))
449	return false;
450	addToQueue(Val: SelI->getTrueValue(), BB: CurrentIncomingBB, Q, SeenValues);
451	addToQueue(Val: SelI->getFalseValue(), BB: CurrentIncomingBB, Q, SeenValues);
452	LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n");
453	if (auto *SelIUse = dyn_cast<PHINode>(Val: SelI->user_back()))
454	SelectInsts.push_back(Elt: SelectInstToUnfold (SelI, SelIUse));
455	} else if (isa<Constant>(Val: Current)) {
456	LLVM_DEBUG(dbgs() << "\tconst: " << *Current << "\n");
457	continue;
458	} else {
459	LLVM_DEBUG(dbgs() << "\tother: " << *Current << "\n");
460	// Allow unpredictable values. The hope is that those will be the
461	// initial switch values that can be ignored (they will hit the
462	// unthreaded switch) but this assumption will get checked later after
463	// paths have been enumerated (in function getStateDefMap).
464
465	// If the unpredictable value comes from the same inner loop it is
466	// likely that it will also be on the enumerated paths, causing us to
467	// exit after we have enumerated all the paths. This heuristic save
468	// compile time because a search for all the paths can become expensive.
469	if (EarlyExitHeuristic &&
470	L->contains(L: LI->getLoopFor(BB: CurrentIncomingBB))) {
471	LLVM_DEBUG(dbgs()
472	<< "\tExiting early due to unpredictability heuristic.\n");
473	return false;
474	}
475
476	continue;
477	}
478	}
479
480	return true;
481	}
482
483	void addToQueue(Value Val, BasicBlock BB,
484	std::deque<std::pair<Value , BasicBlock >> &Q,
485	SmallSet<Value *, `16`> &SeenValues) {
486	if (SeenValues.contains(Ptr: Val))
487	return;
488	Q.push_back(x: {Val, BB});
489	SeenValues.insert(Ptr: Val);
490	}
491
492	bool isValidSelectInst(SelectInst *SI) {
493	if (!SI->hasOneUse())
494	return false;
495
496	Instruction *SIUse = dyn_cast<Instruction>(Val: SI->user_back());
497	// The use of the select inst should be either a phi or another select.
498	if (!SIUse && !(isa<PHINode>(Val: SIUse) \|\| isa<SelectInst>(Val: SIUse)))
499	return false;
500
501	BasicBlock *SIBB = SI->getParent();
502
503	// Currently, we can only expand select instructions in basic blocks with
504	// one successor.
505	BranchInst *SITerm = dyn_cast<BranchInst>(Val: SIBB->getTerminator());
506	if (!SITerm \|\| !SITerm->isUnconditional())
507	return false;
508
509	// Only fold the select coming from directly where it is defined.
510	PHINode *PHIUser = dyn_cast<PHINode>(Val: SIUse);
511	if (PHIUser && PHIUser->getIncomingBlock(U: *SI->use_begin()) != SIBB)
512	return false;
513
514	// If select will not be sunk during unfolding, and it is in the same basic
515	// block as another state defining select, then cannot unfold both.
516	for (SelectInstToUnfold SIToUnfold : SelectInsts) {
517	SelectInst *PrevSI = SIToUnfold.getInst();
518	if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI &&
519	PrevSI->getParent() == SI->getParent())
520	return false;
521	}
522
523	return true;
524	}
525
526	LoopInfo *LI;
527	SwitchInst Instr = nullptr*;
528	SmallVector<SelectInstToUnfold, `4`> SelectInsts;
529	};
530
531	struct AllSwitchPaths {
532	AllSwitchPaths(const MainSwitch MSwitch, OptimizationRemarkEmitter ORE,
533	LoopInfo *LI)
534	: Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), ORE(ORE),
535	LI(LI) {}
536
537	std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; }
538	unsigned getNumThreadingPaths() { return TPaths.size(); }
539	SwitchInst getSwitchInst() { return* Switch; }
540	BasicBlock getSwitchBlock() { return* SwitchBlock; }
541
542	void run() {
543	VisitedBlocks Visited;
544	PathsType LoopPaths = paths(BB: SwitchBlock, Visited, / PathDepth = / `1`);
545	StateDefMap StateDef = getStateDefMap(LoopPaths);
546
547	if (StateDef.empty()) {
548	ORE->emit(RemarkBuilder: [&]() {
549	return OptimizationRemarkMissed (DEBUG_TYPE, "SwitchNotPredictable",
550	Switch)
551	<< "Switch instruction is not predictable.";
552	});
553	return;
554	}
555
556	for (const PathType &Path : LoopPaths) {
557	ThreadingPath TPath;
558
559	const BasicBlock *PrevBB = Path.back();
560	for (const BasicBlock *BB : Path) {
561	if (StateDef.contains(Val: BB)) {
562	const PHINode *Phi = dyn_cast<PHINode>(Val: StateDef [BB]);
563	assert(Phi && "Expected a state-defining instr to be a phi node.");
564
565	const Value *V = Phi->getIncomingValueForBlock(BB: PrevBB);
566	if (const ConstantInt C = dyn_cast<const* ConstantInt>(Val: V)) {
567	TPath.setExitValue(C);
568	TPath.setDeterminator(BB);
569	TPath.setPath(Path);
570	}
571	}
572
573	// Switch block is the determinator, this is the final exit value.
574	if (TPath.isExitValueSet() && BB == Path.front())
575	break;
576
577	PrevBB = BB;
578	}
579
580	if (TPath.isExitValueSet() && isSupported(TPath))
581	TPaths.push_back(x: TPath);
582	}
583	}
584
585	private:
586	// Value: an instruction that defines a switch state;
587	// Key: the parent basic block of that instruction.
588	typedef DenseMap<const BasicBlock , const* PHINode *> StateDefMap;
589
590	PathsType paths(BasicBlock *BB, VisitedBlocks &Visited,
591	unsigned PathDepth) const {
592	PathsType Res;
593
594	// Stop exploring paths after visiting MaxPathLength blocks
595	if (PathDepth > MaxPathLength) {
596	ORE->emit(RemarkBuilder: [&]() {
597	return OptimizationRemarkAnalysis (DEBUG_TYPE, "MaxPathLengthReached",
598	Switch)
599	<< "Exploration stopped after visiting MaxPathLength="
600	<< ore::NV ("MaxPathLength", MaxPathLength) << " blocks.";
601	});
602	return Res;
603	}
604
605	Visited.insert(Ptr: BB);
606
607	// Stop if we have reached the BB out of loop, since its successors have no
608	// impact on the DFA.
609	// TODO: Do we need to stop exploring if BB is the outer loop of the switch?
610	if (!LI->getLoopFor(BB))
611	return Res;
612
613	// Some blocks have multiple edges to the same successor, and this set
614	// is used to prevent a duplicate path from being generated
615	SmallSet<BasicBlock *, `4`> Successors;
616	for (BasicBlock *Succ : successors(BB)) {
617	if (!Successors.insert(Ptr: Succ).second)
618	continue;
619
620	// Found a cycle through the SwitchBlock
621	if (Succ == SwitchBlock) {
622	Res.push_back(x: {BB});
623	continue;
624	}
625
626	// We have encountered a cycle, do not get caught in it
627	if (Visited.contains(Ptr: Succ))
628	continue;
629
630	PathsType SuccPaths = paths(BB: Succ, Visited, PathDepth: PathDepth + `1`);
631	for (const PathType &Path : SuccPaths) {
632	PathType NewPath(Path);
633	NewPath.push_front(x: BB);
634	Res.push_back(x: NewPath);
635	if (Res.size() >= MaxNumPaths) {
636	return Res;
637	}
638	}
639	}
640	// This block could now be visited again from a different predecessor. Note
641	// that this will result in exponential runtime. Subpaths could possibly be
642	// cached but it takes a lot of memory to store them.
643	Visited.erase(Ptr: BB);
644	return Res;
645	}
646
647	/// Walk the use-def chain and collect all the state-defining instructions.
648	///
649	/// Return an empty map if unpredictable values encountered inside the basic
650	/// blocks of \p LoopPaths.
651	StateDefMap getStateDefMap(const PathsType &LoopPaths) const {
652	StateDefMap Res;
653
654	// Basic blocks belonging to any of the loops around the switch statement.
655	SmallPtrSet<BasicBlock *, `16`> LoopBBs;
656	for (const PathType &Path : LoopPaths) {
657	for (BasicBlock *BB : Path)
658	LoopBBs.insert(Ptr: BB);
659	}
660
661	Value *FirstDef = Switch->getOperand(i_nocapture: `0`);
662
663	assert(isa<PHINode>(FirstDef) && "The first definition must be a phi.");
664
665	SmallVector<PHINode *, `8`> Stack;
666	Stack.push_back(Elt: dyn_cast<PHINode>(Val: FirstDef));
667	SmallSet<Value *, `16`> SeenValues;
668
669	while (!Stack.empty()) {
670	PHINode *CurPhi = Stack.pop_back_val();
671
672	Res [CurPhi->getParent()] = CurPhi;
673	SeenValues.insert(Ptr: CurPhi);
674
675	for (BasicBlock *IncomingBB : CurPhi->blocks()) {
676	Value *Incoming = CurPhi->getIncomingValueForBlock(BB: IncomingBB);
677	bool IsOutsideLoops = LoopBBs.count(Ptr: IncomingBB) == `0`;
678	if (Incoming == FirstDef \|\| isa<ConstantInt>(Val: Incoming) \|\|
679	SeenValues.contains(Ptr: Incoming) \|\| IsOutsideLoops) {
680	continue;
681	}
682
683	// Any unpredictable value inside the loops means we must bail out.
684	if (!isa<PHINode>(Val: Incoming))
685	return StateDefMap ();
686
687	Stack.push_back(Elt: cast<PHINode>(Val: Incoming));
688	}
689	}
690
691	return Res;
692	}
693
694	/// The determinator BB should precede the switch-defining BB.
695	///
696	/// Otherwise, it is possible that the state defined in the determinator block
697	/// defines the state for the next iteration of the loop, rather than for the
698	/// current one.
699	///
700	/// Currently supported paths:
701	/// \code
702	/// < switch bb1 determ def > [ 42, determ ]
703	/// < switch_and_def bb1 determ > [ 42, determ ]
704	/// < switch_and_def_and_determ bb1 > [ 42, switch_and_def_and_determ ]
705	/// \endcode
706	///
707	/// Unsupported paths:
708	/// \code
709	/// < switch bb1 def determ > [ 43, determ ]
710	/// < switch_and_determ bb1 def > [ 43, switch_and_determ ]
711	/// \endcode
712	bool isSupported(const ThreadingPath &TPath) {
713	Instruction *SwitchCondI = dyn_cast<Instruction>(Val: Switch->getCondition());
714	assert(SwitchCondI);
715	if (!SwitchCondI)
716	return false;
717
718	const BasicBlock *SwitchCondDefBB = SwitchCondI->getParent();
719	const BasicBlock *SwitchCondUseBB = Switch->getParent();
720	const BasicBlock *DeterminatorBB = TPath.getDeterminatorBB();
721
722	assert(
723	SwitchCondUseBB == TPath.getPath().front() &&
724	"The first BB in a threading path should have the switch instruction");
725	if (SwitchCondUseBB != TPath.getPath().front())
726	return false;
727
728	// Make DeterminatorBB the first element in Path.
729	PathType Path = TPath.getPath();
730	auto ItDet = llvm::find(Range&: Path, Val: DeterminatorBB);
731	std::rotate(first: Path.begin(), middle: ItDet, last: Path.end());
732
733	bool IsDetBBSeen = false;
734	bool IsDefBBSeen = false;
735	bool IsUseBBSeen = false;
736	for (BasicBlock *BB : Path) {
737	if (BB == DeterminatorBB)
738	IsDetBBSeen = true;
739	if (BB == SwitchCondDefBB)
740	IsDefBBSeen = true;
741	if (BB == SwitchCondUseBB)
742	IsUseBBSeen = true;
743	if (IsDetBBSeen && IsUseBBSeen && !IsDefBBSeen)
744	return false;
745	}
746
747	return true;
748	}
749
750	SwitchInst *Switch;
751	BasicBlock *SwitchBlock;
752	OptimizationRemarkEmitter *ORE;
753	std::vector<ThreadingPath> TPaths;
754	LoopInfo *LI;
755	};
756
757	struct TransformDFA {
758	TransformDFA(AllSwitchPaths SwitchPaths, DominatorTree DT,
759	AssumptionCache AC, TargetTransformInfo TTI,
760	OptimizationRemarkEmitter *ORE,
761	SmallPtrSet<const Value *, `32`> EphValues)
762	: SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE),
763	EphValues (EphValues) {}
764
765	void run() {
766	if (isLegalAndProfitableToTransform()) {
767	createAllExitPaths();
768	NumTransforms ++;
769	}
770	}
771
772	private:
773	/// This function performs both a legality check and profitability check at
774	/// the same time since it is convenient to do so. It iterates through all
775	/// blocks that will be cloned, and keeps track of the duplication cost. It
776	/// also returns false if it is illegal to clone some required block.
777	bool isLegalAndProfitableToTransform() {
778	CodeMetrics Metrics;
779	SwitchInst *Switch = SwitchPaths->getSwitchInst();
780
781	// Don't thread switch without multiple successors.
782	if (Switch->getNumSuccessors() <= `1`)
783	return false;
784
785	// Note that DuplicateBlockMap is not being used as intended here. It is
786	// just being used to ensure (BB, State) pairs are only counted once.
787	DuplicateBlockMap DuplicateMap;
788
789	for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
790	PathType PathBBs = TPath.getPath();
791	APInt NextState = TPath.getExitValue();
792	const BasicBlock *Determinator = TPath.getDeterminatorBB();
793
794	// Update Metrics for the Switch block, this is always cloned
795	BasicBlock *BB = SwitchPaths->getSwitchBlock();
796	BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
797	if (!VisitedBB) {
798	Metrics.analyzeBasicBlock(BB, TTI: *TTI, EphValues);
799	DuplicateMap [BB].push_back(x: {.BB: BB, .State: NextState});
800	}
801
802	// If the Switch block is the Determinator, then we can continue since
803	// this is the only block that is cloned and we already counted for it.
804	if (PathBBs.front() == Determinator)
805	continue;
806
807	// Otherwise update Metrics for all blocks that will be cloned. If any
808	// block is already cloned and would be reused, don't double count it.
809	auto DetIt = llvm::find(Range&: PathBBs, Val: Determinator);
810	for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt ++) {
811	BB = *BBIt;
812	VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
813	if (VisitedBB)
814	continue;
815	Metrics.analyzeBasicBlock(BB, TTI: *TTI, EphValues);
816	DuplicateMap [BB].push_back(x: {.BB: BB, .State: NextState});
817	}
818
819	if (Metrics.notDuplicatable) {
820	LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
821	<< "non-duplicatable instructions.\n");
822	ORE->emit(RemarkBuilder: [&]() {
823	return OptimizationRemarkMissed (DEBUG_TYPE, "NonDuplicatableInst",
824	Switch)
825	<< "Contains non-duplicatable instructions.";
826	});
827	return false;
828	}
829
830	// FIXME: Allow jump threading with controlled convergence.
831	if (Metrics.Convergence != ConvergenceKind::None) {
832	LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
833	<< "convergent instructions.\n");
834	ORE->emit(RemarkBuilder: [&]() {
835	return OptimizationRemarkMissed (DEBUG_TYPE, "ConvergentInst", Switch)
836	<< "Contains convergent instructions.";
837	});
838	return false;
839	}
840
841	if (!Metrics.NumInsts.isValid()) {
842	LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
843	<< "instructions with invalid cost.\n");
844	ORE->emit(RemarkBuilder: [&]() {
845	return OptimizationRemarkMissed (DEBUG_TYPE, "ConvergentInst", Switch)
846	<< "Contains instructions with invalid cost.";
847	});
848	return false;
849	}
850	}
851
852	InstructionCost DuplicationCost = `0`;
853
854	unsigned JumpTableSize = `0`;
855	TTI->getEstimatedNumberOfCaseClusters(SI: Switch, JTSize&: JumpTableSize, PSI: nullptr*,
856	BFI: nullptr);
857	if (JumpTableSize == `0`) {
858	// Factor in the number of conditional branches reduced from jump
859	// threading. Assume that lowering the switch block is implemented by
860	// using binary search, hence the LogBase2().
861	unsigned CondBranches =
862	APInt (`32`, Switch->getNumSuccessors()).ceilLogBase2();
863	assert(CondBranches > `0` &&
864	"The threaded switch must have multiple branches");
865	DuplicationCost = Metrics.NumInsts / CondBranches;
866	} else {
867	// Compared with jump tables, the DFA optimizer removes an indirect branch
868	// on each loop iteration, thus making branch prediction more precise. The
869	// more branch targets there are, the more likely it is for the branch
870	// predictor to make a mistake, and the more benefit there is in the DFA
871	// optimizer. Thus, the more branch targets there are, the lower is the
872	// cost of the DFA opt.
873	DuplicationCost = Metrics.NumInsts / JumpTableSize;
874	}
875
876	LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block "
877	<< SwitchPaths->getSwitchBlock()->getName()
878	<< " is: " << DuplicationCost << "\n\n");
879
880	if (DuplicationCost > CostThreshold) {
881	LLVM_DEBUG(dbgs() << "Not jump threading, duplication cost exceeds the "
882	<< "cost threshold.\n");
883	ORE->emit(RemarkBuilder: [&]() {
884	return OptimizationRemarkMissed (DEBUG_TYPE, "NotProfitable", Switch)
885	<< "Duplication cost exceeds the cost threshold (cost="
886	<< ore::NV ("Cost", DuplicationCost)
887	<< ", threshold=" << ore::NV ("Threshold", CostThreshold) << ").";
888	});
889	return false;
890	}
891
892	ORE->emit(RemarkBuilder: [&]() {
893	return OptimizationRemark (DEBUG_TYPE, "JumpThreaded", Switch)
894	<< "Switch statement jump-threaded.";
895	});
896
897	return true;
898	}
899
900	/// Transform each threading path to effectively jump thread the DFA.
901	void createAllExitPaths() {
902	DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager);
903
904	// Move the switch block to the end of the path, since it will be duplicated
905	BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock();
906	for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
907	LLVM_DEBUG(dbgs() << TPath << "\n");
908	PathType NewPath(TPath.getPath());
909	NewPath.push_back(x: SwitchBlock);
910	TPath.setPath(NewPath);
911	}
912
913	// Transform the ThreadingPaths and keep track of the cloned values
914	DuplicateBlockMap DuplicateMap;
915	DefMap NewDefs;
916
917	SmallSet<BasicBlock *, `16`> BlocksToClean;
918	for (BasicBlock *BB : successors(BB: SwitchBlock))
919	BlocksToClean.insert(Ptr: BB);
920
921	for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
922	createExitPath(NewDefs, Path&: TPath, DuplicateMap, BlocksToClean, DTU: &DTU);
923	NumPaths ++;
924	}
925
926	// After all paths are cloned, now update the last successor of the cloned
927	// path so it skips over the switch statement
928	for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths())
929	updateLastSuccessor(TPath, DuplicateMap, DTU: &DTU);
930
931	// For each instruction that was cloned and used outside, update its uses
932	updateSSA(NewDefs);
933
934	// Clean PHI Nodes for the newly created blocks
935	for (BasicBlock *BB : BlocksToClean)
936	cleanPhiNodes(BB);
937	}
938
939	/// For a specific ThreadingPath \p Path, create an exit path starting from
940	/// the determinator block.
941	///
942	/// To remember the correct destination, we have to duplicate blocks
943	/// corresponding to each state. Also update the terminating instruction of
944	/// the predecessors, and phis in the successor blocks.
945	void createExitPath(DefMap &NewDefs, ThreadingPath &Path,
946	DuplicateBlockMap &DuplicateMap,
947	SmallSet<BasicBlock *, `16`> &BlocksToClean,
948	DomTreeUpdater *DTU) {
949	APInt NextState = Path.getExitValue();
950	const BasicBlock *Determinator = Path.getDeterminatorBB();
951	PathType PathBBs = Path.getPath();
952
953	// Don't select the placeholder block in front
954	if (PathBBs.front() == Determinator)
955	PathBBs.pop_front();
956
957	auto DetIt = llvm::find(Range&: PathBBs, Val: Determinator);
958	// When there is only one BB in PathBBs, the determinator takes itself as a
959	// direct predecessor.
960	BasicBlock PrevBB = PathBBs.size() == `1` ? DetIt : *std::prev(x: DetIt);
961	for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt ++) {
962	BasicBlock BB = BBIt;
963	BlocksToClean.insert(Ptr: BB);
964
965	// We already cloned BB for this NextState, now just update the branch
966	// and continue.
967	BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap);
968	if (NextBB) {
969	updatePredecessor(PrevBB, OldBB: BB, NewBB: NextBB, DTU);
970	PrevBB = NextBB;
971	continue;
972	}
973
974	// Clone the BB and update the successor of Prev to jump to the new block
975	BasicBlock *NewBB = cloneBlockAndUpdatePredecessor(
976	BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU);
977	DuplicateMap [BB].push_back(x: {.BB: NewBB, .State: NextState});
978	BlocksToClean.insert(Ptr: NewBB);
979	PrevBB = NewBB;
980	}
981	}
982
983	/// Restore SSA form after cloning blocks.
984	///
985	/// Each cloned block creates new defs for a variable, and the uses need to be
986	/// updated to reflect this. The uses may be replaced with a cloned value, or
987	/// some derived phi instruction. Note that all uses of a value defined in the
988	/// same block were already remapped when cloning the block.
989	void updateSSA(DefMap &NewDefs) {
990	SSAUpdaterBulk SSAUpdate;
991	SmallVector<Use *, `16`> UsesToRename;
992
993	for (const auto &KV : NewDefs) {
994	Instruction *I = KV.first;
995	BasicBlock *BB = I->getParent();
996	std::vector<Instruction *> Cloned = KV.second;
997
998	// Scan all uses of this instruction to see if it is used outside of its
999	// block, and if so, record them in UsesToRename.
1000	for (Use &U : I->uses()) {
1001	Instruction *User = cast<Instruction>(Val: U.getUser());
1002	if (PHINode *UserPN = dyn_cast<PHINode>(Val: User)) {
1003	if (UserPN->getIncomingBlock(U) == BB)
1004	continue;
1005	} else if (User->getParent() == BB) {
1006	continue;
1007	}
1008
1009	UsesToRename.push_back(Elt: &U);
1010	}
1011
1012	// If there are no uses outside the block, we're done with this
1013	// instruction.
1014	if (UsesToRename.empty())
1015	continue;
1016	LLVM_DEBUG(dbgs() << "DFA-JT: Renaming non-local uses of: " << *I
1017	<< "\n");
1018
1019	// We found a use of I outside of BB. Rename all uses of I that are
1020	// outside its block to be uses of the appropriate PHI node etc. See
1021	// ValuesInBlocks with the values we know.
1022	unsigned VarNum = SSAUpdate.AddVariable(Name: I->getName(), Ty: I->getType());
1023	SSAUpdate.AddAvailableValue(Var: VarNum, BB, V: I);
1024	for (Instruction *New : Cloned)
1025	SSAUpdate.AddAvailableValue(Var: VarNum, BB: New->getParent(), V: New);
1026
1027	while (!UsesToRename.empty())
1028	SSAUpdate.AddUse(Var: VarNum, U: UsesToRename.pop_back_val());
1029
1030	LLVM_DEBUG(dbgs() << "\n");
1031	}
1032	// SSAUpdater handles phi placement and renaming uses with the appropriate
1033	// value.
1034	SSAUpdate.RewriteAllUses(DT);
1035	}
1036
1037	/// Clones a basic block, and adds it to the CFG.
1038	///
1039	/// This function also includes updating phi nodes in the successors of the
1040	/// BB, and remapping uses that were defined locally in the cloned BB.
1041	BasicBlock cloneBlockAndUpdatePredecessor(BasicBlock BB, BasicBlock *PrevBB,
1042	const APInt &NextState,
1043	DuplicateBlockMap &DuplicateMap,
1044	DefMap &NewDefs,
1045	DomTreeUpdater *DTU) {
1046	ValueToValueMapTy VMap;
1047	BasicBlock *NewBB = CloneBasicBlock(
1048	BB, VMap, NameSuffix: ".jt" + std::to_string(val: NextState.getLimitedValue()),
1049	F: BB->getParent());
1050	NewBB->moveAfter(MovePos: BB);
1051	NumCloned ++;
1052
1053	for (Instruction &I : *NewBB) {
1054	// Do not remap operands of PHINode in case a definition in BB is an
1055	// incoming value to a phi in the same block. This incoming value will
1056	// be renamed later while restoring SSA.
1057	if (isa<PHINode>(Val: &I))
1058	continue;
1059	RemapInstruction(I: &I, VM&: VMap,
1060	Flags: RF_IgnoreMissingLocals \| RF_NoModuleLevelChanges);
1061	if (AssumeInst *II = dyn_cast<AssumeInst>(Val: &I))
1062	AC->registerAssumption(CI: II);
1063	}
1064
1065	updateSuccessorPhis(BB, ClonedBB: NewBB, NextState, VMap, DuplicateMap);
1066	updatePredecessor(PrevBB, OldBB: BB, NewBB, DTU);
1067	updateDefMap(NewDefs, VMap);
1068
1069	// Add all successors to the DominatorTree
1070	SmallPtrSet<BasicBlock *, `4`> SuccSet;
1071	for (auto *SuccBB : successors(BB: NewBB)) {
1072	if (SuccSet.insert(Ptr: SuccBB).second)
1073	DTU->applyUpdates(Updates: {{DominatorTree::Insert, NewBB, SuccBB}});
1074	}
1075	SuccSet.clear();
1076	return NewBB;
1077	}
1078
1079	/// Update the phi nodes in BB's successors.
1080	///
1081	/// This means creating a new incoming value from NewBB with the new
1082	/// instruction wherever there is an incoming value from BB.
1083	void updateSuccessorPhis(BasicBlock BB, BasicBlock ClonedBB,
1084	const APInt &NextState, ValueToValueMapTy &VMap,
1085	DuplicateBlockMap &DuplicateMap) {
1086	std::vector<BasicBlock *> BlocksToUpdate;
1087
1088	// If BB is the last block in the path, we can simply update the one case
1089	// successor that will be reached.
1090	if (BB == SwitchPaths->getSwitchBlock()) {
1091	SwitchInst *Switch = SwitchPaths->getSwitchInst();
1092	BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
1093	BlocksToUpdate.push_back(x: NextCase);
1094	BasicBlock *ClonedSucc = getClonedBB(BB: NextCase, NextState, DuplicateMap);
1095	if (ClonedSucc)
1096	BlocksToUpdate.push_back(x: ClonedSucc);
1097	}
1098	// Otherwise update phis in all successors.
1099	else {
1100	for (BasicBlock *Succ : successors(BB)) {
1101	BlocksToUpdate.push_back(x: Succ);
1102
1103	// Check if a successor has already been cloned for the particular exit
1104	// value. In this case if a successor was already cloned, the phi nodes
1105	// in the cloned block should be updated directly.
1106	BasicBlock *ClonedSucc = getClonedBB(BB: Succ, NextState, DuplicateMap);
1107	if (ClonedSucc)
1108	BlocksToUpdate.push_back(x: ClonedSucc);
1109	}
1110	}
1111
1112	// If there is a phi with an incoming value from BB, create a new incoming
1113	// value for the new predecessor ClonedBB. The value will either be the same
1114	// value from BB or a cloned value.
1115	for (BasicBlock *Succ : BlocksToUpdate) {
1116	for (auto II = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(Val&: II);
1117	++II) {
1118	Value *Incoming = Phi->getIncomingValueForBlock(BB);
1119	if (Incoming) {
1120	if (isa<Constant>(Val: Incoming)) {
1121	Phi->addIncoming(V: Incoming, BB: ClonedBB);
1122	continue;
1123	}
1124	Value *ClonedVal = VMap [Incoming];
1125	if (ClonedVal)
1126	Phi->addIncoming(V: ClonedVal, BB: ClonedBB);
1127	else
1128	Phi->addIncoming(V: Incoming, BB: ClonedBB);
1129	}
1130	}
1131	}
1132	}
1133
1134	/// Sets the successor of PrevBB to be NewBB instead of OldBB. Note that all
1135	/// other successors are kept as well.
1136	void updatePredecessor(BasicBlock PrevBB, BasicBlock OldBB,
1137	BasicBlock NewBB, DomTreeUpdater DTU) {
1138	// When a path is reused, there is a chance that predecessors were already
1139	// updated before. Check if the predecessor needs to be updated first.
1140	if (!isPredecessor(BB: OldBB, IncomingBB: PrevBB))
1141	return;
1142
1143	Instruction *PrevTerm = PrevBB->getTerminator();
1144	for (unsigned Idx = `0`; Idx < PrevTerm->getNumSuccessors(); Idx++) {
1145	if (PrevTerm->getSuccessor(Idx) == OldBB) {
1146	OldBB->removePredecessor(Pred: PrevBB, / KeepOneInputPHIs = / true);
1147	PrevTerm->setSuccessor(Idx, BB: NewBB);
1148	}
1149	}
1150	DTU->applyUpdates(Updates: {{DominatorTree::Delete, PrevBB, OldBB},
1151	{DominatorTree::Insert, PrevBB, NewBB}});
1152	}
1153
1154	/// Add new value mappings to the DefMap to keep track of all new definitions
1155	/// for a particular instruction. These will be used while updating SSA form.
1156	void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) {
1157	SmallVector<std::pair<Instruction , Instruction >> NewDefsVector;
1158	NewDefsVector.reserve(N: VMap.size());
1159
1160	for (auto Entry : VMap) {
1161	Instruction *Inst =
1162	dyn_cast<Instruction>(Val: const_cast<Value *>(Entry.first));
1163	if (!Inst \|\| !Entry.second \|\| isa<BranchInst>(Val: Inst) \|\|
1164	isa<SwitchInst>(Val: Inst)) {
1165	continue;
1166	}
1167
1168	Instruction *Cloned = dyn_cast<Instruction>(Val&: Entry.second);
1169	if (!Cloned)
1170	continue;
1171
1172	NewDefsVector.push_back(Elt: {Inst, Cloned});
1173	}
1174
1175	// Sort the defs to get deterministic insertion order into NewDefs.
1176	sort(C&: NewDefsVector, Comp: [](const auto &LHS, const auto &RHS) {
1177	if (LHS.first == RHS.first)
1178	return LHS.second->comesBefore(RHS.second);
1179	return LHS.first->comesBefore(RHS.first);
1180	});
1181
1182	for (const auto &KV : NewDefsVector)
1183	NewDefs [KV.first].push_back(x: KV.second);
1184	}
1185
1186	/// Update the last branch of a particular cloned path to point to the correct
1187	/// case successor.
1188	///
1189	/// Note that this is an optional step and would have been done in later
1190	/// optimizations, but it makes the CFG significantly easier to work with.
1191	void updateLastSuccessor(ThreadingPath &TPath,
1192	DuplicateBlockMap &DuplicateMap,
1193	DomTreeUpdater *DTU) {
1194	APInt NextState = TPath.getExitValue();
1195	BasicBlock *BB = TPath.getPath().back();
1196	BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap);
1197
1198	// Note multiple paths can end at the same block so check that it is not
1199	// updated yet
1200	if (!isa<SwitchInst>(Val: LastBlock->getTerminator()))
1201	return;
1202	SwitchInst *Switch = cast<SwitchInst>(Val: LastBlock->getTerminator());
1203	BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
1204
1205	std::vector<DominatorTree::UpdateType> DTUpdates;
1206	SmallPtrSet<BasicBlock *, `4`> SuccSet;
1207	for (BasicBlock *Succ : successors(BB: LastBlock)) {
1208	if (Succ != NextCase && SuccSet.insert(Ptr: Succ).second)
1209	DTUpdates.push_back(x: {DominatorTree::Delete, LastBlock, Succ});
1210	}
1211
1212	Switch->eraseFromParent();
1213	BranchInst::Create(IfTrue: NextCase, InsertBefore: LastBlock);
1214
1215	DTU->applyUpdates(Updates: DTUpdates);
1216	}
1217
1218	/// After cloning blocks, some of the phi nodes have extra incoming values
1219	/// that are no longer used. This function removes them.
1220	void cleanPhiNodes(BasicBlock *BB) {
1221	// If BB is no longer reachable, remove any remaining phi nodes
1222	if (pred_empty(BB)) {
1223	std::vector<PHINode *> PhiToRemove;
1224	for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(Val&: II); ++II) {
1225	PhiToRemove.push_back(x: Phi);
1226	}
1227	for (PHINode *PN : PhiToRemove) {
1228	PN->replaceAllUsesWith(V: PoisonValue::get(T: PN->getType()));
1229	PN->eraseFromParent();
1230	}
1231	return;
1232	}
1233
1234	// Remove any incoming values that come from an invalid predecessor
1235	for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(Val&: II); ++II) {
1236	std::vector<BasicBlock *> BlocksToRemove;
1237	for (BasicBlock *IncomingBB : Phi->blocks()) {
1238	if (!isPredecessor(BB, IncomingBB))
1239	BlocksToRemove.push_back(x: IncomingBB);
1240	}
1241	for (BasicBlock *BB : BlocksToRemove)
1242	Phi->removeIncomingValue(BB);
1243	}
1244	}
1245
1246	/// Checks if BB was already cloned for a particular next state value. If it
1247	/// was then it returns this cloned block, and otherwise null.
1248	BasicBlock getClonedBB(BasicBlock BB, const APInt &NextState,
1249	DuplicateBlockMap &DuplicateMap) {
1250	CloneList ClonedBBs = DuplicateMap [BB];
1251
1252	// Find an entry in the CloneList with this NextState. If it exists then
1253	// return the corresponding BB
1254	auto It = llvm::find_if(Range&: ClonedBBs, P: [NextState](const ClonedBlock &C) {
1255	return C.State == NextState;
1256	});
1257	return It != ClonedBBs.end() ? (It).BB : nullptr*;
1258	}
1259
1260	/// Helper to get the successor corresponding to a particular case value for
1261	/// a switch statement.
1262	BasicBlock getNextCaseSuccessor(SwitchInst Switch, const APInt &NextState) {
1263	BasicBlock NextCase = nullptr*;
1264	for (auto Case : Switch->cases()) {
1265	if (Case.getCaseValue()->getValue() == NextState) {
1266	NextCase = Case.getCaseSuccessor();
1267	break;
1268	}
1269	}
1270	if (!NextCase)
1271	NextCase = Switch->getDefaultDest();
1272	return NextCase;
1273	}
1274
1275	/// Returns true if IncomingBB is a predecessor of BB.
1276	bool isPredecessor(BasicBlock BB, BasicBlock IncomingBB) {
1277	return llvm::is_contained(Range: predecessors(BB), Element: IncomingBB);
1278	}
1279
1280	AllSwitchPaths *SwitchPaths;
1281	DominatorTree *DT;
1282	AssumptionCache *AC;
1283	TargetTransformInfo *TTI;
1284	OptimizationRemarkEmitter *ORE;
1285	SmallPtrSet<const Value *, `32`> EphValues;
1286	std::vector<ThreadingPath> TPaths;
1287	};
1288
1289	bool DFAJumpThreading::run(Function &F) {
1290	LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n");
1291
1292	if (F.hasOptSize()) {
1293	LLVM_DEBUG(dbgs() << "Skipping due to the 'minsize' attribute\n");
1294	return false;
1295	}
1296
1297	if (ClViewCfgBefore)
1298	F.viewCFG();
1299
1300	SmallVector<AllSwitchPaths, `2`> ThreadableLoops;
1301	bool MadeChanges = false;
1302	LoopInfoBroken = false;
1303
1304	for (BasicBlock &BB : F) {
1305	auto *SI = dyn_cast<SwitchInst>(Val: BB.getTerminator());
1306	if (!SI)
1307	continue;
1308
1309	LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName()
1310	<< " is a candidate\n");
1311	MainSwitch Switch(SI, LI, ORE);
1312
1313	if (!Switch.getInstr())
1314	continue;
1315
1316	LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is a "
1317	<< "candidate for jump threading\n");
1318	LLVM_DEBUG(SI->dump());
1319
1320	unfoldSelectInstrs(DT, SelectInsts: Switch.getSelectInsts());
1321	if (!Switch.getSelectInsts().empty())
1322	MadeChanges = true;
1323
1324	AllSwitchPaths SwitchPaths(&Switch, ORE, LI);
1325	SwitchPaths.run();
1326
1327	if (SwitchPaths.getNumThreadingPaths() > `0`) {
1328	ThreadableLoops.push_back(Elt: SwitchPaths);
1329
1330	// For the time being limit this optimization to occurring once in a
1331	// function since it can change the CFG significantly. This is not a
1332	// strict requirement but it can cause buggy behavior if there is an
1333	// overlap of blocks in different opportunities. There is a lot of room to
1334	// experiment with catching more opportunities here.
1335	// NOTE: To release this contraint, we must handle LoopInfo invalidation
1336	break;
1337	}
1338	}
1339
1340	#ifdef NDEBUG
1341	LI->verify(DomTree: *DT);
1342	#endif
1343
1344	SmallPtrSet<const Value *, `32`> EphValues;
1345	if (ThreadableLoops.size() > `0`)
1346	CodeMetrics::collectEphemeralValues(L: &F, AC, EphValues);
1347
1348	for (AllSwitchPaths SwitchPaths : ThreadableLoops) {
1349	TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues);
1350	Transform.run();
1351	MadeChanges = true;
1352	LoopInfoBroken = true;
1353	}
1354
1355	#ifdef EXPENSIVE_CHECKS
1356	assert(DT->verify(DominatorTree::VerificationLevel::Full));
1357	verifyFunction(F, &dbgs());
1358	#endif
1359
1360	return MadeChanges;
1361	}
1362
1363	} // end anonymous namespace
1364
1365	/// Integrate with the new Pass Manager
1366	PreservedAnalyses DFAJumpThreadingPass::run(Function &F,
1367	FunctionAnalysisManager &AM) {
1368	AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
1369	DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
1370	LoopInfo &LI = AM.getResult<LoopAnalysis>(IR&: F);
1371	TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(IR&: F);
1372	OptimizationRemarkEmitter ORE(&F);
1373	DFAJumpThreading ThreadImpl(&AC, &DT, &LI, &TTI, &ORE);
1374	if (!ThreadImpl.run(F))
1375	return PreservedAnalyses::all();
1376
1377	PreservedAnalyses PA;
1378	PA.preserve<DominatorTreeAnalysis>();
1379	if (!ThreadImpl.LoopInfoBroken)
1380	PA.preserve<LoopAnalysis>();
1381	return PA;
1382	}
1383

Browse the source code of llvm_projects/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp