DFAJumpThreading.cpp source code [llvm_projects/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp]

1	//===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Transform each threading path to effectively jump thread the DFA. For
10	// example, the CFG below could be transformed as follows, where the cloned
11	// blocks unconditionally branch to the next correct case based on what is
12	// identified in the analysis.
13	//
14	// sw.bb sw.bb
15	// / \| \ / \| \
16	// case1 case2 case3 case1 case2 case3
17	// \ \| / \| \| \|
18	// determinator det.2 det.3 det.1
19	// br sw.bb / \| \
20	// sw.bb.2 sw.bb.3 sw.bb.1
21	// br case2 br case3 br case1§
22	//
23	// Definitions and Terminology:
24	//
25	// Threading path:*
26	// a list of basic blocks, the exit state, and the block that determines
27	// the next state, for which the following notation will be used:
28	// < path of BBs that form a cycle > [ state, determinator ]
29	//
30	// Predictable switch:*
31	// The switch variable is always a known constant so that all conditional
32	// jumps based on switch variable can be converted to unconditional jump.
33	//
34	// Determinator:*
35	// The basic block that determines the next state of the DFA.
36	//
37	// Representing the optimization in C-like pseudocode: the code pattern on the
38	// left could functionally be transformed to the right pattern if the switch
39	// condition is predictable.
40	//
41	// X = A goto A
42	// for (...) A:
43	// switch (X) ...
44	// case A goto B
45	// X = B B:
46	// case B ...
47	// X = C goto C
48	//
49	// The pass first checks that switch variable X is decided by the control flow
50	// path taken in the loop; for example, in case B, the next value of X is
51	// decided to be C. It then enumerates through all paths in the loop and labels
52	// the basic blocks where the next state is decided.
53	//
54	// Using this information it creates new paths that unconditionally branch to
55	// the next case. This involves cloning code, so it only gets triggered if the
56	// amount of code duplicated is below a threshold.
57	//
58	//===----------------------------------------------------------------------===//
59
60	#include "llvm/Transforms/Scalar/DFAJumpThreading.h"
61	#include "llvm/ADT/APInt.h"
62	#include "llvm/ADT/DenseMap.h"
63	#include "llvm/ADT/SmallSet.h"
64	#include "llvm/ADT/Statistic.h"
65	#include "llvm/Analysis/AssumptionCache.h"
66	#include "llvm/Analysis/CodeMetrics.h"
67	#include "llvm/Analysis/DomTreeUpdater.h"
68	#include "llvm/Analysis/LoopInfo.h"
69	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
70	#include "llvm/Analysis/TargetTransformInfo.h"
71	#include "llvm/IR/CFG.h"
72	#include "llvm/IR/Constants.h"
73	#include "llvm/IR/IntrinsicInst.h"
74	#include "llvm/Support/CommandLine.h"
75	#include "llvm/Support/Debug.h"
76	#include "llvm/Transforms/Utils/Cloning.h"
77	#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
78	#include "llvm/Transforms/Utils/ValueMapper.h"
79	#include <deque>
80
81	#ifdef EXPENSIVE_CHECKS
82	#include "llvm/IR/Verifier.h"
83	#endif
84
85	using namespace llvm;
86
87	#define DEBUG_TYPE "dfa-jump-threading"
88
89	STATISTIC(NumTransforms, "Number of transformations done");
90	STATISTIC(NumCloned, "Number of blocks cloned");
91	STATISTIC(NumPaths, "Number of individual paths threaded");
92
93	static cl::opt<bool>
94	ClViewCfgBefore("dfa-jump-view-cfg-before",
95	cl::desc ("View the CFG before DFA Jump Threading"),
96	cl::Hidden, cl::init(Val: false));
97
98	static cl::opt<bool> EarlyExitHeuristic(
99	"dfa-early-exit-heuristic",
100	cl::desc ("Exit early if an unpredictable value come from the same loop"),
101	cl::Hidden, cl::init(Val: true));
102
103	static cl::opt<unsigned> MaxPathLength(
104	"dfa-max-path-length",
105	cl::desc ("Max number of blocks searched to find a threading path"),
106	cl::Hidden, cl::init(Val: `20`));
107
108	static cl::opt<unsigned> MaxNumVisitiedPaths(
109	"dfa-max-num-visited-paths",
110	cl::desc (
111	"Max number of blocks visited while enumerating paths around a switch"),
112	cl::Hidden, cl::init(Val: `2500`));
113
114	static cl::opt<unsigned>
115	MaxNumPaths("dfa-max-num-paths",
116	cl::desc ("Max number of paths enumerated around a switch"),
117	cl::Hidden, cl::init(Val: `200`));
118
119	static cl::opt<unsigned>
120	CostThreshold("dfa-cost-threshold",
121	cl::desc ("Maximum cost accepted for the transformation"),
122	cl::Hidden, cl::init(Val: `50`));
123
124	namespace {
125
126	class SelectInstToUnfold {
127	SelectInst *SI;
128	PHINode *SIUse;
129
130	public:
131	SelectInstToUnfold(SelectInst SI, PHINode SIUse) : SI(SI), SIUse(SIUse) {}
132
133	SelectInst getInst() { return* SI; }
134	PHINode getUse() { return* SIUse; }
135
136	explicit operator bool() const { return SI && SIUse; }
137	};
138
139	void unfold(DomTreeUpdater DTU, LoopInfo LI, SelectInstToUnfold SIToUnfold,
140	std::vector<SelectInstToUnfold> *NewSIsToUnfold,
141	std::vector<BasicBlock > NewBBs);
142
143	class DFAJumpThreading {
144	public:
145	DFAJumpThreading(AssumptionCache AC, DominatorTree DT, LoopInfo *LI,
146	TargetTransformInfo TTI, OptimizationRemarkEmitter ORE)
147	: AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {}
148
149	bool run(Function &F);
150	bool LoopInfoBroken;
151
152	private:
153	void
154	unfoldSelectInstrs(DominatorTree *DT,
155	const SmallVector<SelectInstToUnfold, `4`> &SelectInsts) {
156	DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
157	SmallVector<SelectInstToUnfold, `4`> Stack(SelectInsts);
158
159	while (!Stack.empty()) {
160	SelectInstToUnfold SIToUnfold = Stack.pop_back_val();
161
162	std::vector<SelectInstToUnfold> NewSIsToUnfold;
163	std::vector<BasicBlock *> NewBBs;
164	unfold(DTU: &DTU, LI, SIToUnfold, NewSIsToUnfold: &NewSIsToUnfold, NewBBs: &NewBBs);
165
166	// Put newly discovered select instructions into the work list.
167	llvm::append_range(C&: Stack, R&: NewSIsToUnfold);
168	}
169	}
170
171	AssumptionCache *AC;
172	DominatorTree *DT;
173	LoopInfo *LI;
174	TargetTransformInfo *TTI;
175	OptimizationRemarkEmitter *ORE;
176	};
177
178	} // end anonymous namespace
179
180	namespace {
181
182	/// Unfold the select instruction held in \p SIToUnfold by replacing it with
183	/// control flow.
184	///
185	/// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly
186	/// created basic blocks into \p NewBBs.
187	///
188	/// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible.
189	void unfold(DomTreeUpdater DTU, LoopInfo LI, SelectInstToUnfold SIToUnfold,
190	std::vector<SelectInstToUnfold> *NewSIsToUnfold,
191	std::vector<BasicBlock > NewBBs) {
192	SelectInst *SI = SIToUnfold.getInst();
193	PHINode *SIUse = SIToUnfold.getUse();
194	BasicBlock *StartBlock = SI->getParent();
195	BranchInst *StartBlockTerm =
196	dyn_cast<BranchInst>(Val: StartBlock->getTerminator());
197
198	assert(StartBlockTerm);
199	assert(SI->hasOneUse());
200
201	if (StartBlockTerm->isUnconditional()) {
202	BasicBlock *EndBlock = StartBlock->getUniqueSuccessor();
203	// Arbitrarily choose the 'false' side for a new input value to the PHI.
204	BasicBlock *NewBlock = BasicBlock::Create(
205	Context&: SI->getContext(), Name: Twine (SI->getName(), ".si.unfold.false"),
206	Parent: EndBlock->getParent(), InsertBefore: EndBlock);
207	NewBBs->push_back(x: NewBlock);
208	BranchInst::Create(IfTrue: EndBlock, InsertBefore: NewBlock);
209	DTU->applyUpdates(Updates: {{DominatorTree::Insert, NewBlock, EndBlock}});
210
211	// StartBlock
212	// \| \
213	// \| NewBlock
214	// \| /
215	// EndBlock
216	Value *SIOp1 = SI->getTrueValue();
217	Value *SIOp2 = SI->getFalseValue();
218
219	PHINode *NewPhi = PHINode::Create(Ty: SIUse->getType(), NumReservedValues: `1`,
220	NameStr: Twine (SIOp2->getName(), ".si.unfold.phi"),
221	InsertBefore: NewBlock->getFirstInsertionPt());
222	NewPhi->addIncoming(V: SIOp2, BB: StartBlock);
223
224	// Update any other PHI nodes in EndBlock.
225	for (PHINode &Phi : EndBlock->phis()) {
226	if (SIUse == &Phi)
227	continue;
228	Phi.addIncoming(V: Phi.getIncomingValueForBlock(BB: StartBlock), BB: NewBlock);
229	}
230
231	// Update the phi node of SI, which is its only use.
232	if (EndBlock == SIUse->getParent()) {
233	SIUse->addIncoming(V: NewPhi, BB: NewBlock);
234	SIUse->replaceUsesOfWith(From: SI, To: SIOp1);
235	} else {
236	PHINode *EndPhi = PHINode::Create(Ty: SIUse->getType(), NumReservedValues: pred_size(BB: EndBlock),
237	NameStr: Twine (SI->getName(), ".si.unfold.phi"),
238	InsertBefore: EndBlock->getFirstInsertionPt());
239	for (BasicBlock *Pred : predecessors(BB: EndBlock)) {
240	if (Pred != StartBlock && Pred != NewBlock)
241	EndPhi->addIncoming(V: EndPhi, BB: Pred);
242	}
243
244	EndPhi->addIncoming(V: SIOp1, BB: StartBlock);
245	EndPhi->addIncoming(V: NewPhi, BB: NewBlock);
246	SIUse->replaceUsesOfWith(From: SI, To: EndPhi);
247	SIUse = EndPhi;
248	}
249
250	if (auto *OpSi = dyn_cast<SelectInst>(Val: SIOp1))
251	NewSIsToUnfold->push_back(x: SelectInstToUnfold (OpSi, SIUse));
252	if (auto *OpSi = dyn_cast<SelectInst>(Val: SIOp2))
253	NewSIsToUnfold->push_back(x: SelectInstToUnfold (OpSi, NewPhi));
254
255	// Insert the real conditional branch based on the original condition.
256	StartBlockTerm->eraseFromParent();
257	BranchInst::Create(IfTrue: EndBlock, IfFalse: NewBlock, Cond: SI->getCondition(), InsertBefore: StartBlock);
258	DTU->applyUpdates(Updates: {{DominatorTree::Insert, StartBlock, EndBlock},
259	{DominatorTree::Insert, StartBlock, NewBlock}});
260	} else {
261	BasicBlock *EndBlock = SIUse->getParent();
262	BasicBlock *NewBlockT = BasicBlock::Create(
263	Context&: SI->getContext(), Name: Twine (SI->getName(), ".si.unfold.true"),
264	Parent: EndBlock->getParent(), InsertBefore: EndBlock);
265	BasicBlock *NewBlockF = BasicBlock::Create(
266	Context&: SI->getContext(), Name: Twine (SI->getName(), ".si.unfold.false"),
267	Parent: EndBlock->getParent(), InsertBefore: EndBlock);
268
269	NewBBs->push_back(x: NewBlockT);
270	NewBBs->push_back(x: NewBlockF);
271
272	// Def only has one use in EndBlock.
273	// Before transformation:
274	// StartBlock(Def)
275	// \| \
276	// EndBlock OtherBlock
277	// (Use)
278	//
279	// After transformation:
280	// StartBlock(Def)
281	// \| \
282	// \| OtherBlock
283	// NewBlockT
284	// \| \
285	// \| NewBlockF
286	// \| /
287	// \| /
288	// EndBlock
289	// (Use)
290	BranchInst::Create(IfTrue: EndBlock, InsertBefore: NewBlockF);
291	// Insert the real conditional branch based on the original condition.
292	BranchInst::Create(IfTrue: EndBlock, IfFalse: NewBlockF, Cond: SI->getCondition(), InsertBefore: NewBlockT);
293	DTU->applyUpdates(Updates: {{DominatorTree::Insert, NewBlockT, NewBlockF},
294	{DominatorTree::Insert, NewBlockT, EndBlock},
295	{DominatorTree::Insert, NewBlockF, EndBlock}});
296
297	Value *TrueVal = SI->getTrueValue();
298	Value *FalseVal = SI->getFalseValue();
299
300	PHINode *NewPhiT = PHINode::Create(
301	Ty: SIUse->getType(), NumReservedValues: `1`, NameStr: Twine (TrueVal->getName(), ".si.unfold.phi"),
302	InsertBefore: NewBlockT->getFirstInsertionPt());
303	PHINode *NewPhiF = PHINode::Create(
304	Ty: SIUse->getType(), NumReservedValues: `1`, NameStr: Twine (FalseVal->getName(), ".si.unfold.phi"),
305	InsertBefore: NewBlockF->getFirstInsertionPt());
306	NewPhiT->addIncoming(V: TrueVal, BB: StartBlock);
307	NewPhiF->addIncoming(V: FalseVal, BB: NewBlockT);
308
309	if (auto *TrueSI = dyn_cast<SelectInst>(Val: TrueVal))
310	NewSIsToUnfold->push_back(x: SelectInstToUnfold (TrueSI, NewPhiT));
311	if (auto *FalseSi = dyn_cast<SelectInst>(Val: FalseVal))
312	NewSIsToUnfold->push_back(x: SelectInstToUnfold (FalseSi, NewPhiF));
313
314	SIUse->addIncoming(V: NewPhiT, BB: NewBlockT);
315	SIUse->addIncoming(V: NewPhiF, BB: NewBlockF);
316	SIUse->removeIncomingValue(BB: StartBlock);
317
318	// Update any other PHI nodes in EndBlock.
319	for (PHINode &Phi : EndBlock->phis()) {
320	if (SIUse == &Phi)
321	continue;
322	Phi.addIncoming(V: Phi.getIncomingValueForBlock(BB: StartBlock), BB: NewBlockT);
323	Phi.addIncoming(V: Phi.getIncomingValueForBlock(BB: StartBlock), BB: NewBlockF);
324	Phi.removeIncomingValue(BB: StartBlock);
325	}
326
327	// Update the appropriate successor of the start block to point to the new
328	// unfolded block.
329	unsigned SuccNum = StartBlockTerm->getSuccessor(i: `1`) == EndBlock ? `1` : `0`;
330	StartBlockTerm->setSuccessor(idx: SuccNum, NewSucc: NewBlockT);
331	DTU->applyUpdates(Updates: {{DominatorTree::Delete, StartBlock, EndBlock},
332	{DominatorTree::Insert, StartBlock, NewBlockT}});
333	}
334
335	// Preserve loop info
336	if (Loop *L = LI->getLoopFor(BB: SI->getParent())) {
337	for (BasicBlock NewBB : NewBBs)
338	L->addBasicBlockToLoop(NewBB, LI&: *LI);
339	}
340
341	// The select is now dead.
342	assert(SI->use_empty() && "Select must be dead now");
343	SI->eraseFromParent();
344	}
345
346	struct ClonedBlock {
347	BasicBlock *BB;
348	APInt State; ///< \p State corresponds to the next value of a switch stmnt.
349	};
350
351	typedef std::deque<BasicBlock *> PathType;
352	typedef std::vector<PathType> PathsType;
353	typedef SmallPtrSet<const BasicBlock *, `8`> VisitedBlocks;
354	typedef std::vector<ClonedBlock> CloneList;
355
356	// This data structure keeps track of all blocks that have been cloned. If two
357	// different ThreadingPaths clone the same block for a certain state it should
358	// be reused, and it can be looked up in this map.
359	typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap;
360
361	// This map keeps track of all the new definitions for an instruction. This
362	// information is needed when restoring SSA form after cloning blocks.
363	typedef MapVector<Instruction , std::vector<Instruction >> DefMap;
364
365	inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) {
366	OS << "< ";
367	for (const BasicBlock *BB : Path) {
368	std::string BBName;
369	if (BB->hasName())
370	raw_string_ostream (BBName) << BB->getName();
371	else
372	raw_string_ostream (BBName) << BB;
373	OS << BBName << " ";
374	}
375	OS << ">";
376	return OS;
377	}
378
379	/// ThreadingPath is a path in the control flow of a loop that can be threaded
380	/// by cloning necessary basic blocks and replacing conditional branches with
381	/// unconditional ones. A threading path includes a list of basic blocks, the
382	/// exit state, and the block that determines the next state.
383	struct ThreadingPath {
384	/// Exit value is DFA's exit state for the given path.
385	APInt getExitValue() const { return ExitVal; }
386	void setExitValue(const ConstantInt *V) {
387	ExitVal = V->getValue();
388	IsExitValSet = true;
389	}
390	bool isExitValueSet() const { return IsExitValSet; }
391
392	/// Determinator is the basic block that determines the next state of the DFA.
393	const BasicBlock getDeterminatorBB() const* { return DBB; }
394	void setDeterminator(const BasicBlock *BB) { DBB = BB; }
395
396	/// Path is a list of basic blocks.
397	const PathType &getPath() const { return Path; }
398	void setPath(const PathType &NewPath) { Path = NewPath; }
399	void push_back(BasicBlock *BB) { Path.push_back(x: BB); }
400	void push_front(BasicBlock *BB) { Path.push_front(x: BB); }
401	void appendExcludingFirst(const PathType &OtherPath) {
402	llvm::append_range(C&: Path, R: llvm::drop_begin(RangeOrContainer: OtherPath));
403	}
404
405	void print(raw_ostream &OS) const {
406	OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]";
407	}
408
409	private:
410	PathType Path;
411	APInt ExitVal;
412	const BasicBlock DBB = nullptr*;
413	bool IsExitValSet = false;
414	};
415
416	#ifndef NDEBUG
417	inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) {
418	TPath.print(OS);
419	return OS;
420	}
421	#endif
422
423	struct MainSwitch {
424	MainSwitch(SwitchInst SI, LoopInfo LI, OptimizationRemarkEmitter *ORE)
425	: LI(LI) {
426	if (isCandidate(SI)) {
427	Instr = SI;
428	} else {
429	ORE->emit(RemarkBuilder: [&]() {
430	return OptimizationRemarkMissed (DEBUG_TYPE, "SwitchNotPredictable", SI)
431	<< "Switch instruction is not predictable.";
432	});
433	}
434	}
435
436	virtual ~MainSwitch() = default;
437
438	SwitchInst getInstr() const* { return Instr; }
439	const SmallVector<SelectInstToUnfold, `4`> getSelectInsts() {
440	return SelectInsts;
441	}
442
443	private:
444	/// Do a use-def chain traversal starting from the switch condition to see if
445	/// \p SI is a potential condidate.
446	///
447	/// Also, collect select instructions to unfold.
448	bool isCandidate(const SwitchInst *SI) {
449	std::deque<std::pair<Value , BasicBlock >> Q;
450	SmallSet<Value *, `16`> SeenValues;
451	SelectInsts.clear();
452
453	Value *SICond = SI->getCondition();
454	LLVM_DEBUG(dbgs() << "\tSICond: " << *SICond << "\n");
455	if (!isa<PHINode>(Val: SICond))
456	return false;
457
458	// The switch must be in a loop.
459	const Loop *L = LI->getLoopFor(BB: SI->getParent());
460	if (!L)
461	return false;
462
463	addToQueue(Val: SICond, BB: nullptr, Q, SeenValues);
464
465	while (!Q.empty()) {
466	Value *Current = Q.front().first;
467	BasicBlock *CurrentIncomingBB = Q.front().second;
468	Q.pop_front();
469
470	if (auto *Phi = dyn_cast<PHINode>(Val: Current)) {
471	for (BasicBlock *IncomingBB : Phi->blocks()) {
472	Value *Incoming = Phi->getIncomingValueForBlock(BB: IncomingBB);
473	addToQueue(Val: Incoming, BB: IncomingBB, Q, SeenValues);
474	}
475	LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n");
476	} else if (SelectInst *SelI = dyn_cast<SelectInst>(Val: Current)) {
477	if (!isValidSelectInst(SI: SelI))
478	return false;
479	addToQueue(Val: SelI->getTrueValue(), BB: CurrentIncomingBB, Q, SeenValues);
480	addToQueue(Val: SelI->getFalseValue(), BB: CurrentIncomingBB, Q, SeenValues);
481	LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n");
482	if (auto *SelIUse = dyn_cast<PHINode>(Val: SelI->user_back()))
483	SelectInsts.push_back(Elt: SelectInstToUnfold (SelI, SelIUse));
484	} else if (isa<Constant>(Val: Current)) {
485	LLVM_DEBUG(dbgs() << "\tconst: " << *Current << "\n");
486	continue;
487	} else {
488	LLVM_DEBUG(dbgs() << "\tother: " << *Current << "\n");
489	// Allow unpredictable values. The hope is that those will be the
490	// initial switch values that can be ignored (they will hit the
491	// unthreaded switch) but this assumption will get checked later after
492	// paths have been enumerated (in function getStateDefMap).
493
494	// If the unpredictable value comes from the same inner loop it is
495	// likely that it will also be on the enumerated paths, causing us to
496	// exit after we have enumerated all the paths. This heuristic save
497	// compile time because a search for all the paths can become expensive.
498	if (EarlyExitHeuristic &&
499	L->contains(L: LI->getLoopFor(BB: CurrentIncomingBB))) {
500	LLVM_DEBUG(dbgs()
501	<< "\tExiting early due to unpredictability heuristic.\n");
502	return false;
503	}
504
505	continue;
506	}
507	}
508
509	return true;
510	}
511
512	void addToQueue(Value Val, BasicBlock BB,
513	std::deque<std::pair<Value , BasicBlock >> &Q,
514	SmallSet<Value *, `16`> &SeenValues) {
515	if (SeenValues.insert(Ptr: Val).second)
516	Q.push_back(x: {Val, BB});
517	}
518
519	bool isValidSelectInst(SelectInst *SI) {
520	if (!SI->hasOneUse())
521	return false;
522
523	Instruction *SIUse = dyn_cast<Instruction>(Val: SI->user_back());
524	// The use of the select inst should be either a phi or another select.
525	if (!SIUse && !(isa<PHINode>(Val: SIUse) \|\| isa<SelectInst>(Val: SIUse)))
526	return false;
527
528	BasicBlock *SIBB = SI->getParent();
529
530	// Currently, we can only expand select instructions in basic blocks with
531	// one successor.
532	BranchInst *SITerm = dyn_cast<BranchInst>(Val: SIBB->getTerminator());
533	if (!SITerm \|\| !SITerm->isUnconditional())
534	return false;
535
536	// Only fold the select coming from directly where it is defined.
537	PHINode *PHIUser = dyn_cast<PHINode>(Val: SIUse);
538	if (PHIUser && PHIUser->getIncomingBlock(U: *SI->use_begin()) != SIBB)
539	return false;
540
541	// If select will not be sunk during unfolding, and it is in the same basic
542	// block as another state defining select, then cannot unfold both.
543	for (SelectInstToUnfold SIToUnfold : SelectInsts) {
544	SelectInst *PrevSI = SIToUnfold.getInst();
545	if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI &&
546	PrevSI->getParent() == SI->getParent())
547	return false;
548	}
549
550	return true;
551	}
552
553	LoopInfo *LI;
554	SwitchInst Instr = nullptr*;
555	SmallVector<SelectInstToUnfold, `4`> SelectInsts;
556	};
557
558	struct AllSwitchPaths {
559	AllSwitchPaths(const MainSwitch MSwitch, OptimizationRemarkEmitter ORE,
560	LoopInfo LI, Loop L)
561	: Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), ORE(ORE),
562	LI(LI), SwitchOuterLoop(L) {}
563
564	std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; }
565	unsigned getNumThreadingPaths() { return TPaths.size(); }
566	SwitchInst getSwitchInst() { return* Switch; }
567	BasicBlock getSwitchBlock() { return* SwitchBlock; }
568
569	void run() {
570	StateDefMap StateDef = getStateDefMap();
571	if (StateDef.empty()) {
572	ORE->emit(RemarkBuilder: [&]() {
573	return OptimizationRemarkMissed (DEBUG_TYPE, "SwitchNotPredictable",
574	Switch)
575	<< "Switch instruction is not predictable.";
576	});
577	return;
578	}
579
580	auto *SwitchPhi = cast<PHINode>(Val: Switch->getOperand(i_nocapture: `0`));
581	auto *SwitchPhiDefBB = SwitchPhi->getParent();
582	VisitedBlocks VB;
583	// Get paths from the determinator BBs to SwitchPhiDefBB
584	std::vector<ThreadingPath> PathsToPhiDef =
585	getPathsFromStateDefMap(StateDef, Phi: SwitchPhi, VB);
586	if (SwitchPhiDefBB == SwitchBlock) {
587	TPaths = std::move(PathsToPhiDef);
588	return;
589	}
590
591	// Find and append paths from SwitchPhiDefBB to SwitchBlock.
592	PathsType PathsToSwitchBB =
593	paths(BB: SwitchPhiDefBB, ToBB: SwitchBlock, Visited&: VB, / PathDepth = / `1`);
594	if (PathsToSwitchBB.empty())
595	return;
596
597	std::vector<ThreadingPath> TempList;
598	for (const ThreadingPath &Path : PathsToPhiDef) {
599	for (const PathType &PathToSw : PathsToSwitchBB) {
600	ThreadingPath PathCopy(Path);
601	PathCopy.appendExcludingFirst(OtherPath: PathToSw);
602	TempList.push_back(x: PathCopy);
603	}
604	}
605	TPaths = std::move(TempList);
606	}
607
608	private:
609	// Value: an instruction that defines a switch state;
610	// Key: the parent basic block of that instruction.
611	typedef DenseMap<const BasicBlock , const* PHINode *> StateDefMap;
612	std::vector<ThreadingPath> getPathsFromStateDefMap(StateDefMap &StateDef,
613	PHINode *Phi,
614	VisitedBlocks &VB) {
615	std::vector<ThreadingPath> Res;
616	auto *PhiBB = Phi->getParent();
617	VB.insert(Ptr: PhiBB);
618
619	VisitedBlocks UniqueBlocks;
620	for (auto *IncomingBB : Phi->blocks()) {
621	if (!UniqueBlocks.insert(Ptr: IncomingBB).second)
622	continue;
623	if (!SwitchOuterLoop->contains(BB: IncomingBB))
624	continue;
625
626	Value *IncomingValue = Phi->getIncomingValueForBlock(BB: IncomingBB);
627	// We found the determinator. This is the start of our path.
628	if (auto *C = dyn_cast<ConstantInt>(Val: IncomingValue)) {
629	// SwitchBlock is the determinator, unsupported unless its also the def.
630	if (PhiBB == SwitchBlock &&
631	SwitchBlock != cast<PHINode>(Val: Switch->getOperand(i_nocapture: `0`))->getParent())
632	continue;
633	ThreadingPath NewPath;
634	NewPath.setDeterminator(PhiBB);
635	NewPath.setExitValue(C);
636	// Don't add SwitchBlock at the start, this is handled later.
637	if (IncomingBB != SwitchBlock)
638	NewPath.push_back(BB: IncomingBB);
639	NewPath.push_back(BB: PhiBB);
640	Res.push_back(x: NewPath);
641	continue;
642	}
643	// Don't get into a cycle.
644	if (VB.contains(Ptr: IncomingBB) \|\| IncomingBB == SwitchBlock)
645	continue;
646	// Recurse up the PHI chain.
647	auto *IncomingPhi = dyn_cast<PHINode>(Val: IncomingValue);
648	if (!IncomingPhi)
649	continue;
650	auto *IncomingPhiDefBB = IncomingPhi->getParent();
651	if (!StateDef.contains(Val: IncomingPhiDefBB))
652	continue;
653
654	// Direct predecessor, just add to the path.
655	if (IncomingPhiDefBB == IncomingBB) {
656	std::vector<ThreadingPath> PredPaths =
657	getPathsFromStateDefMap(StateDef, Phi: IncomingPhi, VB);
658	for (ThreadingPath &Path : PredPaths) {
659	Path.push_back(BB: PhiBB);
660	Res.push_back(x: std::move(Path));
661	}
662	continue;
663	}
664	// Not a direct predecessor, find intermediate paths to append to the
665	// existing path.
666	if (VB.contains(Ptr: IncomingPhiDefBB))
667	continue;
668
669	PathsType IntermediatePaths;
670	IntermediatePaths =
671	paths(BB: IncomingPhiDefBB, ToBB: IncomingBB, Visited&: VB, / PathDepth = / `1`);
672	if (IntermediatePaths.empty())
673	continue;
674
675	std::vector<ThreadingPath> PredPaths =
676	getPathsFromStateDefMap(StateDef, Phi: IncomingPhi, VB);
677	for (const ThreadingPath &Path : PredPaths) {
678	for (const PathType &IPath : IntermediatePaths) {
679	ThreadingPath NewPath(Path);
680	NewPath.appendExcludingFirst(OtherPath: IPath);
681	NewPath.push_back(BB: PhiBB);
682	Res.push_back(x: NewPath);
683	}
684	}
685	}
686	VB.erase(Ptr: PhiBB);
687	return Res;
688	}
689
690	PathsType paths(BasicBlock BB, BasicBlock ToBB, VisitedBlocks &Visited,
691	unsigned PathDepth) {
692	PathsType Res;
693
694	// Stop exploring paths after visiting MaxPathLength blocks
695	if (PathDepth > MaxPathLength) {
696	ORE->emit(RemarkBuilder: [&]() {
697	return OptimizationRemarkAnalysis (DEBUG_TYPE, "MaxPathLengthReached",
698	Switch)
699	<< "Exploration stopped after visiting MaxPathLength="
700	<< ore::NV ("MaxPathLength", MaxPathLength) << " blocks.";
701	});
702	return Res;
703	}
704
705	Visited.insert(Ptr: BB);
706	if (++NumVisited > MaxNumVisitiedPaths)
707	return Res;
708
709	// Stop if we have reached the BB out of loop, since its successors have no
710	// impact on the DFA.
711	if (!SwitchOuterLoop->contains(BB))
712	return Res;
713
714	// Some blocks have multiple edges to the same successor, and this set
715	// is used to prevent a duplicate path from being generated
716	SmallSet<BasicBlock *, `4`> Successors;
717	for (BasicBlock *Succ : successors(BB)) {
718	if (!Successors.insert(Ptr: Succ).second)
719	continue;
720
721	// Found a cycle through the final block.
722	if (Succ == ToBB) {
723	Res.push_back(x: {BB, ToBB});
724	continue;
725	}
726
727	// We have encountered a cycle, do not get caught in it
728	if (Visited.contains(Ptr: Succ))
729	continue;
730
731	auto *CurrLoop = LI->getLoopFor(BB);
732	// Unlikely to be beneficial.
733	if (Succ == CurrLoop->getHeader())
734	continue;
735	// Skip for now, revisit this condition later to see the impact on
736	// coverage and compile time.
737	if (LI->getLoopFor(BB: Succ) != CurrLoop)
738	continue;
739
740	PathsType SuccPaths = paths(BB: Succ, ToBB, Visited, PathDepth: PathDepth + `1`);
741	for (PathType &Path : SuccPaths) {
742	Path.push_front(x: BB);
743	Res.push_back(x: Path);
744	if (Res.size() >= MaxNumPaths) {
745	return Res;
746	}
747	}
748	}
749	// This block could now be visited again from a different predecessor. Note
750	// that this will result in exponential runtime. Subpaths could possibly be
751	// cached but it takes a lot of memory to store them.
752	Visited.erase(Ptr: BB);
753	return Res;
754	}
755
756	/// Walk the use-def chain and collect all the state-defining blocks and the
757	/// PHI nodes in those blocks that define the state.
758	StateDefMap getStateDefMap() const {
759	StateDefMap Res;
760	PHINode *FirstDef = dyn_cast<PHINode>(Val: Switch->getOperand(i_nocapture: `0`));
761	assert(FirstDef && "The first definition must be a phi.");
762
763	SmallVector<PHINode *, `8`> Stack;
764	Stack.push_back(Elt: FirstDef);
765	SmallSet<Value *, `16`> SeenValues;
766
767	while (!Stack.empty()) {
768	PHINode *CurPhi = Stack.pop_back_val();
769
770	Res [CurPhi->getParent()] = CurPhi;
771	SeenValues.insert(Ptr: CurPhi);
772
773	for (BasicBlock *IncomingBB : CurPhi->blocks()) {
774	PHINode *IncomingPhi =
775	dyn_cast<PHINode>(Val: CurPhi->getIncomingValueForBlock(BB: IncomingBB));
776	if (!IncomingPhi)
777	continue;
778	bool IsOutsideLoops = !SwitchOuterLoop->contains(BB: IncomingBB);
779	if (SeenValues.contains(Ptr: IncomingPhi) \|\| IsOutsideLoops)
780	continue;
781
782	Stack.push_back(Elt: IncomingPhi);
783	}
784	}
785
786	return Res;
787	}
788
789	unsigned NumVisited = `0`;
790	SwitchInst *Switch;
791	BasicBlock *SwitchBlock;
792	OptimizationRemarkEmitter *ORE;
793	std::vector<ThreadingPath> TPaths;
794	LoopInfo *LI;
795	Loop *SwitchOuterLoop;
796	};
797
798	struct TransformDFA {
799	TransformDFA(AllSwitchPaths SwitchPaths, DominatorTree DT,
800	AssumptionCache AC, TargetTransformInfo TTI,
801	OptimizationRemarkEmitter *ORE,
802	SmallPtrSet<const Value *, `32`> EphValues)
803	: SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE),
804	EphValues (EphValues) {}
805
806	void run() {
807	if (isLegalAndProfitableToTransform()) {
808	createAllExitPaths();
809	NumTransforms ++;
810	}
811	}
812
813	private:
814	/// This function performs both a legality check and profitability check at
815	/// the same time since it is convenient to do so. It iterates through all
816	/// blocks that will be cloned, and keeps track of the duplication cost. It
817	/// also returns false if it is illegal to clone some required block.
818	bool isLegalAndProfitableToTransform() {
819	CodeMetrics Metrics;
820	SwitchInst *Switch = SwitchPaths->getSwitchInst();
821
822	// Don't thread switch without multiple successors.
823	if (Switch->getNumSuccessors() <= `1`)
824	return false;
825
826	// Note that DuplicateBlockMap is not being used as intended here. It is
827	// just being used to ensure (BB, State) pairs are only counted once.
828	DuplicateBlockMap DuplicateMap;
829
830	for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
831	PathType PathBBs = TPath.getPath();
832	APInt NextState = TPath.getExitValue();
833	const BasicBlock *Determinator = TPath.getDeterminatorBB();
834
835	// Update Metrics for the Switch block, this is always cloned
836	BasicBlock *BB = SwitchPaths->getSwitchBlock();
837	BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
838	if (!VisitedBB) {
839	Metrics.analyzeBasicBlock(BB, TTI: *TTI, EphValues);
840	DuplicateMap [BB].push_back(x: {.BB: BB, .State: NextState});
841	}
842
843	// If the Switch block is the Determinator, then we can continue since
844	// this is the only block that is cloned and we already counted for it.
845	if (PathBBs.front() == Determinator)
846	continue;
847
848	// Otherwise update Metrics for all blocks that will be cloned. If any
849	// block is already cloned and would be reused, don't double count it.
850	auto DetIt = llvm::find(Range&: PathBBs, Val: Determinator);
851	for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt ++) {
852	BB = *BBIt;
853	VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
854	if (VisitedBB)
855	continue;
856	Metrics.analyzeBasicBlock(BB, TTI: *TTI, EphValues);
857	DuplicateMap [BB].push_back(x: {.BB: BB, .State: NextState});
858	}
859
860	if (Metrics.notDuplicatable) {
861	LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
862	<< "non-duplicatable instructions.\n");
863	ORE->emit(RemarkBuilder: [&]() {
864	return OptimizationRemarkMissed (DEBUG_TYPE, "NonDuplicatableInst",
865	Switch)
866	<< "Contains non-duplicatable instructions.";
867	});
868	return false;
869	}
870
871	// FIXME: Allow jump threading with controlled convergence.
872	if (Metrics.Convergence != ConvergenceKind::None) {
873	LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
874	<< "convergent instructions.\n");
875	ORE->emit(RemarkBuilder: [&]() {
876	return OptimizationRemarkMissed (DEBUG_TYPE, "ConvergentInst", Switch)
877	<< "Contains convergent instructions.";
878	});
879	return false;
880	}
881
882	if (!Metrics.NumInsts.isValid()) {
883	LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
884	<< "instructions with invalid cost.\n");
885	ORE->emit(RemarkBuilder: [&]() {
886	return OptimizationRemarkMissed (DEBUG_TYPE, "ConvergentInst", Switch)
887	<< "Contains instructions with invalid cost.";
888	});
889	return false;
890	}
891	}
892
893	InstructionCost DuplicationCost = `0`;
894
895	unsigned JumpTableSize = `0`;
896	TTI->getEstimatedNumberOfCaseClusters(SI: Switch, JTSize&: JumpTableSize, PSI: nullptr*,
897	BFI: nullptr);
898	if (JumpTableSize == `0`) {
899	// Factor in the number of conditional branches reduced from jump
900	// threading. Assume that lowering the switch block is implemented by
901	// using binary search, hence the LogBase2().
902	unsigned CondBranches =
903	APInt (`32`, Switch->getNumSuccessors()).ceilLogBase2();
904	assert(CondBranches > `0` &&
905	"The threaded switch must have multiple branches");
906	DuplicationCost = Metrics.NumInsts / CondBranches;
907	} else {
908	// Compared with jump tables, the DFA optimizer removes an indirect branch
909	// on each loop iteration, thus making branch prediction more precise. The
910	// more branch targets there are, the more likely it is for the branch
911	// predictor to make a mistake, and the more benefit there is in the DFA
912	// optimizer. Thus, the more branch targets there are, the lower is the
913	// cost of the DFA opt.
914	DuplicationCost = Metrics.NumInsts / JumpTableSize;
915	}
916
917	LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block "
918	<< SwitchPaths->getSwitchBlock()->getName()
919	<< " is: " << DuplicationCost << "\n\n");
920
921	if (DuplicationCost > CostThreshold) {
922	LLVM_DEBUG(dbgs() << "Not jump threading, duplication cost exceeds the "
923	<< "cost threshold.\n");
924	ORE->emit(RemarkBuilder: [&]() {
925	return OptimizationRemarkMissed (DEBUG_TYPE, "NotProfitable", Switch)
926	<< "Duplication cost exceeds the cost threshold (cost="
927	<< ore::NV ("Cost", DuplicationCost)
928	<< ", threshold=" << ore::NV ("Threshold", CostThreshold) << ").";
929	});
930	return false;
931	}
932
933	ORE->emit(RemarkBuilder: [&]() {
934	return OptimizationRemark (DEBUG_TYPE, "JumpThreaded", Switch)
935	<< "Switch statement jump-threaded.";
936	});
937
938	return true;
939	}
940
941	/// Transform each threading path to effectively jump thread the DFA.
942	void createAllExitPaths() {
943	DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager);
944
945	// Move the switch block to the end of the path, since it will be duplicated
946	BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock();
947	for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
948	LLVM_DEBUG(dbgs() << TPath << "\n");
949	// TODO: Fix exit path creation logic so that we dont need this
950	// placeholder.
951	TPath.push_front(BB: SwitchBlock);
952	}
953
954	// Transform the ThreadingPaths and keep track of the cloned values
955	DuplicateBlockMap DuplicateMap;
956	DefMap NewDefs;
957
958	SmallSet<BasicBlock *, `16`> BlocksToClean;
959	BlocksToClean.insert_range(R: successors(BB: SwitchBlock));
960
961	for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
962	createExitPath(NewDefs, Path&: TPath, DuplicateMap, BlocksToClean, DTU: &DTU);
963	NumPaths ++;
964	}
965
966	// After all paths are cloned, now update the last successor of the cloned
967	// path so it skips over the switch statement
968	for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths())
969	updateLastSuccessor(TPath, DuplicateMap, DTU: &DTU);
970
971	// For each instruction that was cloned and used outside, update its uses
972	updateSSA(NewDefs);
973
974	// Clean PHI Nodes for the newly created blocks
975	for (BasicBlock *BB : BlocksToClean)
976	cleanPhiNodes(BB);
977	}
978
979	/// For a specific ThreadingPath \p Path, create an exit path starting from
980	/// the determinator block.
981	///
982	/// To remember the correct destination, we have to duplicate blocks
983	/// corresponding to each state. Also update the terminating instruction of
984	/// the predecessors, and phis in the successor blocks.
985	void createExitPath(DefMap &NewDefs, ThreadingPath &Path,
986	DuplicateBlockMap &DuplicateMap,
987	SmallSet<BasicBlock *, `16`> &BlocksToClean,
988	DomTreeUpdater *DTU) {
989	APInt NextState = Path.getExitValue();
990	const BasicBlock *Determinator = Path.getDeterminatorBB();
991	PathType PathBBs = Path.getPath();
992
993	// Don't select the placeholder block in front
994	if (PathBBs.front() == Determinator)
995	PathBBs.pop_front();
996
997	auto DetIt = llvm::find(Range&: PathBBs, Val: Determinator);
998	// When there is only one BB in PathBBs, the determinator takes itself as a
999	// direct predecessor.
1000	BasicBlock PrevBB = PathBBs.size() == `1` ? DetIt : *std::prev(x: DetIt);
1001	for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt ++) {
1002	BasicBlock BB = BBIt;
1003	BlocksToClean.insert(Ptr: BB);
1004
1005	// We already cloned BB for this NextState, now just update the branch
1006	// and continue.
1007	BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap);
1008	if (NextBB) {
1009	updatePredecessor(PrevBB, OldBB: BB, NewBB: NextBB, DTU);
1010	PrevBB = NextBB;
1011	continue;
1012	}
1013
1014	// Clone the BB and update the successor of Prev to jump to the new block
1015	BasicBlock *NewBB = cloneBlockAndUpdatePredecessor(
1016	BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU);
1017	DuplicateMap [BB].push_back(x: {.BB: NewBB, .State: NextState});
1018	BlocksToClean.insert(Ptr: NewBB);
1019	PrevBB = NewBB;
1020	}
1021	}
1022
1023	/// Restore SSA form after cloning blocks.
1024	///
1025	/// Each cloned block creates new defs for a variable, and the uses need to be
1026	/// updated to reflect this. The uses may be replaced with a cloned value, or
1027	/// some derived phi instruction. Note that all uses of a value defined in the
1028	/// same block were already remapped when cloning the block.
1029	void updateSSA(DefMap &NewDefs) {
1030	SSAUpdaterBulk SSAUpdate;
1031	SmallVector<Use *, `16`> UsesToRename;
1032
1033	for (const auto &KV : NewDefs) {
1034	Instruction *I = KV.first;
1035	BasicBlock *BB = I->getParent();
1036	std::vector<Instruction *> Cloned = KV.second;
1037
1038	// Scan all uses of this instruction to see if it is used outside of its
1039	// block, and if so, record them in UsesToRename.
1040	for (Use &U : I->uses()) {
1041	Instruction *User = cast<Instruction>(Val: U.getUser());
1042	if (PHINode *UserPN = dyn_cast<PHINode>(Val: User)) {
1043	if (UserPN->getIncomingBlock(U) == BB)
1044	continue;
1045	} else if (User->getParent() == BB) {
1046	continue;
1047	}
1048
1049	UsesToRename.push_back(Elt: &U);
1050	}
1051
1052	// If there are no uses outside the block, we're done with this
1053	// instruction.
1054	if (UsesToRename.empty())
1055	continue;
1056	LLVM_DEBUG(dbgs() << "DFA-JT: Renaming non-local uses of: " << *I
1057	<< "\n");
1058
1059	// We found a use of I outside of BB. Rename all uses of I that are
1060	// outside its block to be uses of the appropriate PHI node etc. See
1061	// ValuesInBlocks with the values we know.
1062	unsigned VarNum = SSAUpdate.AddVariable(Name: I->getName(), Ty: I->getType());
1063	SSAUpdate.AddAvailableValue(Var: VarNum, BB, V: I);
1064	for (Instruction *New : Cloned)
1065	SSAUpdate.AddAvailableValue(Var: VarNum, BB: New->getParent(), V: New);
1066
1067	while (!UsesToRename.empty())
1068	SSAUpdate.AddUse(Var: VarNum, U: UsesToRename.pop_back_val());
1069
1070	LLVM_DEBUG(dbgs() << "\n");
1071	}
1072	// SSAUpdater handles phi placement and renaming uses with the appropriate
1073	// value.
1074	SSAUpdate.RewriteAllUses(DT);
1075	}
1076
1077	/// Clones a basic block, and adds it to the CFG.
1078	///
1079	/// This function also includes updating phi nodes in the successors of the
1080	/// BB, and remapping uses that were defined locally in the cloned BB.
1081	BasicBlock cloneBlockAndUpdatePredecessor(BasicBlock BB, BasicBlock *PrevBB,
1082	const APInt &NextState,
1083	DuplicateBlockMap &DuplicateMap,
1084	DefMap &NewDefs,
1085	DomTreeUpdater *DTU) {
1086	ValueToValueMapTy VMap;
1087	BasicBlock *NewBB = CloneBasicBlock(
1088	BB, VMap, NameSuffix: ".jt" + std::to_string(val: NextState.getLimitedValue()),
1089	F: BB->getParent());
1090	NewBB->moveAfter(MovePos: BB);
1091	NumCloned ++;
1092
1093	for (Instruction &I : *NewBB) {
1094	// Do not remap operands of PHINode in case a definition in BB is an
1095	// incoming value to a phi in the same block. This incoming value will
1096	// be renamed later while restoring SSA.
1097	if (isa<PHINode>(Val: &I))
1098	continue;
1099	RemapInstruction(I: &I, VM&: VMap,
1100	Flags: RF_IgnoreMissingLocals \| RF_NoModuleLevelChanges);
1101	if (AssumeInst *II = dyn_cast<AssumeInst>(Val: &I))
1102	AC->registerAssumption(CI: II);
1103	}
1104
1105	updateSuccessorPhis(BB, ClonedBB: NewBB, NextState, VMap, DuplicateMap);
1106	updatePredecessor(PrevBB, OldBB: BB, NewBB, DTU);
1107	updateDefMap(NewDefs, VMap);
1108
1109	// Add all successors to the DominatorTree
1110	SmallPtrSet<BasicBlock *, `4`> SuccSet;
1111	for (auto *SuccBB : successors(BB: NewBB)) {
1112	if (SuccSet.insert(Ptr: SuccBB).second)
1113	DTU->applyUpdates(Updates: {{DominatorTree::Insert, NewBB, SuccBB}});
1114	}
1115	SuccSet.clear();
1116	return NewBB;
1117	}
1118
1119	/// Update the phi nodes in BB's successors.
1120	///
1121	/// This means creating a new incoming value from NewBB with the new
1122	/// instruction wherever there is an incoming value from BB.
1123	void updateSuccessorPhis(BasicBlock BB, BasicBlock ClonedBB,
1124	const APInt &NextState, ValueToValueMapTy &VMap,
1125	DuplicateBlockMap &DuplicateMap) {
1126	std::vector<BasicBlock *> BlocksToUpdate;
1127
1128	// If BB is the last block in the path, we can simply update the one case
1129	// successor that will be reached.
1130	if (BB == SwitchPaths->getSwitchBlock()) {
1131	SwitchInst *Switch = SwitchPaths->getSwitchInst();
1132	BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
1133	BlocksToUpdate.push_back(x: NextCase);
1134	BasicBlock *ClonedSucc = getClonedBB(BB: NextCase, NextState, DuplicateMap);
1135	if (ClonedSucc)
1136	BlocksToUpdate.push_back(x: ClonedSucc);
1137	}
1138	// Otherwise update phis in all successors.
1139	else {
1140	for (BasicBlock *Succ : successors(BB)) {
1141	BlocksToUpdate.push_back(x: Succ);
1142
1143	// Check if a successor has already been cloned for the particular exit
1144	// value. In this case if a successor was already cloned, the phi nodes
1145	// in the cloned block should be updated directly.
1146	BasicBlock *ClonedSucc = getClonedBB(BB: Succ, NextState, DuplicateMap);
1147	if (ClonedSucc)
1148	BlocksToUpdate.push_back(x: ClonedSucc);
1149	}
1150	}
1151
1152	// If there is a phi with an incoming value from BB, create a new incoming
1153	// value for the new predecessor ClonedBB. The value will either be the same
1154	// value from BB or a cloned value.
1155	for (BasicBlock *Succ : BlocksToUpdate) {
1156	for (auto II = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(Val&: II);
1157	++II) {
1158	Value *Incoming = Phi->getIncomingValueForBlock(BB);
1159	if (Incoming) {
1160	if (isa<Constant>(Val: Incoming)) {
1161	Phi->addIncoming(V: Incoming, BB: ClonedBB);
1162	continue;
1163	}
1164	Value *ClonedVal = VMap [Incoming];
1165	if (ClonedVal)
1166	Phi->addIncoming(V: ClonedVal, BB: ClonedBB);
1167	else
1168	Phi->addIncoming(V: Incoming, BB: ClonedBB);
1169	}
1170	}
1171	}
1172	}
1173
1174	/// Sets the successor of PrevBB to be NewBB instead of OldBB. Note that all
1175	/// other successors are kept as well.
1176	void updatePredecessor(BasicBlock PrevBB, BasicBlock OldBB,
1177	BasicBlock NewBB, DomTreeUpdater DTU) {
1178	// When a path is reused, there is a chance that predecessors were already
1179	// updated before. Check if the predecessor needs to be updated first.
1180	if (!isPredecessor(BB: OldBB, IncomingBB: PrevBB))
1181	return;
1182
1183	Instruction *PrevTerm = PrevBB->getTerminator();
1184	for (unsigned Idx = `0`; Idx < PrevTerm->getNumSuccessors(); Idx++) {
1185	if (PrevTerm->getSuccessor(Idx) == OldBB) {
1186	OldBB->removePredecessor(Pred: PrevBB, / KeepOneInputPHIs = / true);
1187	PrevTerm->setSuccessor(Idx, BB: NewBB);
1188	}
1189	}
1190	DTU->applyUpdates(Updates: {{DominatorTree::Delete, PrevBB, OldBB},
1191	{DominatorTree::Insert, PrevBB, NewBB}});
1192	}
1193
1194	/// Add new value mappings to the DefMap to keep track of all new definitions
1195	/// for a particular instruction. These will be used while updating SSA form.
1196	void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) {
1197	SmallVector<std::pair<Instruction , Instruction >> NewDefsVector;
1198	NewDefsVector.reserve(N: VMap.size());
1199
1200	for (auto Entry : VMap) {
1201	Instruction *Inst =
1202	dyn_cast<Instruction>(Val: const_cast<Value *>(Entry.first));
1203	if (!Inst \|\| !Entry.second \|\| isa<BranchInst>(Val: Inst) \|\|
1204	isa<SwitchInst>(Val: Inst)) {
1205	continue;
1206	}
1207
1208	Instruction *Cloned = dyn_cast<Instruction>(Val&: Entry.second);
1209	if (!Cloned)
1210	continue;
1211
1212	NewDefsVector.push_back(Elt: {Inst, Cloned});
1213	}
1214
1215	// Sort the defs to get deterministic insertion order into NewDefs.
1216	sort(C&: NewDefsVector, Comp: [](const auto &LHS, const auto &RHS) {
1217	if (LHS.first == RHS.first)
1218	return LHS.second->comesBefore(RHS.second);
1219	return LHS.first->comesBefore(RHS.first);
1220	});
1221
1222	for (const auto &KV : NewDefsVector)
1223	NewDefs [KV.first].push_back(x: KV.second);
1224	}
1225
1226	/// Update the last branch of a particular cloned path to point to the correct
1227	/// case successor.
1228	///
1229	/// Note that this is an optional step and would have been done in later
1230	/// optimizations, but it makes the CFG significantly easier to work with.
1231	void updateLastSuccessor(ThreadingPath &TPath,
1232	DuplicateBlockMap &DuplicateMap,
1233	DomTreeUpdater *DTU) {
1234	APInt NextState = TPath.getExitValue();
1235	BasicBlock *BB = TPath.getPath().back();
1236	BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap);
1237
1238	// Note multiple paths can end at the same block so check that it is not
1239	// updated yet
1240	if (!isa<SwitchInst>(Val: LastBlock->getTerminator()))
1241	return;
1242	SwitchInst *Switch = cast<SwitchInst>(Val: LastBlock->getTerminator());
1243	BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
1244
1245	std::vector<DominatorTree::UpdateType> DTUpdates;
1246	SmallPtrSet<BasicBlock *, `4`> SuccSet;
1247	for (BasicBlock *Succ : successors(BB: LastBlock)) {
1248	if (Succ != NextCase && SuccSet.insert(Ptr: Succ).second)
1249	DTUpdates.push_back(x: {DominatorTree::Delete, LastBlock, Succ});
1250	}
1251
1252	Switch->eraseFromParent();
1253	BranchInst::Create(IfTrue: NextCase, InsertBefore: LastBlock);
1254
1255	DTU->applyUpdates(Updates: DTUpdates);
1256	}
1257
1258	/// After cloning blocks, some of the phi nodes have extra incoming values
1259	/// that are no longer used. This function removes them.
1260	void cleanPhiNodes(BasicBlock *BB) {
1261	// If BB is no longer reachable, remove any remaining phi nodes
1262	if (pred_empty(BB)) {
1263	std::vector<PHINode *> PhiToRemove;
1264	for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(Val&: II); ++II) {
1265	PhiToRemove.push_back(x: Phi);
1266	}
1267	for (PHINode *PN : PhiToRemove) {
1268	PN->replaceAllUsesWith(V: PoisonValue::get(T: PN->getType()));
1269	PN->eraseFromParent();
1270	}
1271	return;
1272	}
1273
1274	// Remove any incoming values that come from an invalid predecessor
1275	for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(Val&: II); ++II) {
1276	std::vector<BasicBlock *> BlocksToRemove;
1277	for (BasicBlock *IncomingBB : Phi->blocks()) {
1278	if (!isPredecessor(BB, IncomingBB))
1279	BlocksToRemove.push_back(x: IncomingBB);
1280	}
1281	for (BasicBlock *BB : BlocksToRemove)
1282	Phi->removeIncomingValue(BB);
1283	}
1284	}
1285
1286	/// Checks if BB was already cloned for a particular next state value. If it
1287	/// was then it returns this cloned block, and otherwise null.
1288	BasicBlock getClonedBB(BasicBlock BB, const APInt &NextState,
1289	DuplicateBlockMap &DuplicateMap) {
1290	CloneList ClonedBBs = DuplicateMap [BB];
1291
1292	// Find an entry in the CloneList with this NextState. If it exists then
1293	// return the corresponding BB
1294	auto It = llvm::find_if(Range&: ClonedBBs, P: [NextState](const ClonedBlock &C) {
1295	return C.State == NextState;
1296	});
1297	return It != ClonedBBs.end() ? (It).BB : nullptr*;
1298	}
1299
1300	/// Helper to get the successor corresponding to a particular case value for
1301	/// a switch statement.
1302	BasicBlock getNextCaseSuccessor(SwitchInst Switch, const APInt &NextState) {
1303	BasicBlock NextCase = nullptr*;
1304	for (auto Case : Switch->cases()) {
1305	if (Case.getCaseValue()->getValue() == NextState) {
1306	NextCase = Case.getCaseSuccessor();
1307	break;
1308	}
1309	}
1310	if (!NextCase)
1311	NextCase = Switch->getDefaultDest();
1312	return NextCase;
1313	}
1314
1315	/// Returns true if IncomingBB is a predecessor of BB.
1316	bool isPredecessor(BasicBlock BB, BasicBlock IncomingBB) {
1317	return llvm::is_contained(Range: predecessors(BB), Element: IncomingBB);
1318	}
1319
1320	AllSwitchPaths *SwitchPaths;
1321	DominatorTree *DT;
1322	AssumptionCache *AC;
1323	TargetTransformInfo *TTI;
1324	OptimizationRemarkEmitter *ORE;
1325	SmallPtrSet<const Value *, `32`> EphValues;
1326	std::vector<ThreadingPath> TPaths;
1327	};
1328
1329	bool DFAJumpThreading::run(Function &F) {
1330	LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n");
1331
1332	if (F.hasOptSize()) {
1333	LLVM_DEBUG(dbgs() << "Skipping due to the 'minsize' attribute\n");
1334	return false;
1335	}
1336
1337	if (ClViewCfgBefore)
1338	F.viewCFG();
1339
1340	SmallVector<AllSwitchPaths, `2`> ThreadableLoops;
1341	bool MadeChanges = false;
1342	LoopInfoBroken = false;
1343
1344	for (BasicBlock &BB : F) {
1345	auto *SI = dyn_cast<SwitchInst>(Val: BB.getTerminator());
1346	if (!SI)
1347	continue;
1348
1349	LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName()
1350	<< " is a candidate\n");
1351	MainSwitch Switch(SI, LI, ORE);
1352
1353	if (!Switch.getInstr()) {
1354	LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is not a "
1355	<< "candidate for jump threading\n");
1356	continue;
1357	}
1358
1359	LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is a "
1360	<< "candidate for jump threading\n");
1361	LLVM_DEBUG(SI->dump());
1362
1363	unfoldSelectInstrs(DT, SelectInsts: Switch.getSelectInsts());
1364	if (!Switch.getSelectInsts().empty())
1365	MadeChanges = true;
1366
1367	AllSwitchPaths SwitchPaths(&Switch, ORE, LI,
1368	LI->getLoopFor(BB: &BB)->getOutermostLoop());
1369	SwitchPaths.run();
1370
1371	if (SwitchPaths.getNumThreadingPaths() > `0`) {
1372	ThreadableLoops.push_back(Elt: SwitchPaths);
1373
1374	// For the time being limit this optimization to occurring once in a
1375	// function since it can change the CFG significantly. This is not a
1376	// strict requirement but it can cause buggy behavior if there is an
1377	// overlap of blocks in different opportunities. There is a lot of room to
1378	// experiment with catching more opportunities here.
1379	// NOTE: To release this contraint, we must handle LoopInfo invalidation
1380	break;
1381	}
1382	}
1383
1384	#ifdef NDEBUG
1385	LI->verify(DomTree: *DT);
1386	#endif
1387
1388	SmallPtrSet<const Value *, `32`> EphValues;
1389	if (ThreadableLoops.size() > `0`)
1390	CodeMetrics::collectEphemeralValues(L: &F, AC, EphValues);
1391
1392	for (AllSwitchPaths SwitchPaths : ThreadableLoops) {
1393	TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues);
1394	Transform.run();
1395	MadeChanges = true;
1396	LoopInfoBroken = true;
1397	}
1398
1399	#ifdef EXPENSIVE_CHECKS
1400	assert(DT->verify(DominatorTree::VerificationLevel::Full));
1401	verifyFunction(F, &dbgs());
1402	#endif
1403
1404	return MadeChanges;
1405	}
1406
1407	} // end anonymous namespace
1408
1409	/// Integrate with the new Pass Manager
1410	PreservedAnalyses DFAJumpThreadingPass::run(Function &F,
1411	FunctionAnalysisManager &AM) {
1412	AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
1413	DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
1414	LoopInfo &LI = AM.getResult<LoopAnalysis>(IR&: F);
1415	TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(IR&: F);
1416	OptimizationRemarkEmitter ORE(&F);
1417	DFAJumpThreading ThreadImpl(&AC, &DT, &LI, &TTI, &ORE);
1418	if (!ThreadImpl.run(F))
1419	return PreservedAnalyses::all();
1420
1421	PreservedAnalyses PA;
1422	PA.preserve<DominatorTreeAnalysis>();
1423	if (!ThreadImpl.LoopInfoBroken)
1424	PA.preserve<LoopAnalysis>();
1425	return PA;
1426	}
1427

Browse the source code of llvm_projects/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp