SimplifyCFG.cpp source code [llvm_projects/llvm/lib/Transforms/Utils/SimplifyCFG.cpp]

1	//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Peephole optimize the CFG.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/ADT/APInt.h"
14	#include "llvm/ADT/ArrayRef.h"
15	#include "llvm/ADT/DenseMap.h"
16	#include "llvm/ADT/MapVector.h"
17	#include "llvm/ADT/STLExtras.h"
18	#include "llvm/ADT/Sequence.h"
19	#include "llvm/ADT/SetOperations.h"
20	#include "llvm/ADT/SetVector.h"
21	#include "llvm/ADT/SmallPtrSet.h"
22	#include "llvm/ADT/SmallVector.h"
23	#include "llvm/ADT/Statistic.h"
24	#include "llvm/ADT/StringRef.h"
25	#include "llvm/Analysis/AssumptionCache.h"
26	#include "llvm/Analysis/CaptureTracking.h"
27	#include "llvm/Analysis/ConstantFolding.h"
28	#include "llvm/Analysis/DomTreeUpdater.h"
29	#include "llvm/Analysis/GuardUtils.h"
30	#include "llvm/Analysis/InstructionSimplify.h"
31	#include "llvm/Analysis/Loads.h"
32	#include "llvm/Analysis/MemorySSA.h"
33	#include "llvm/Analysis/MemorySSAUpdater.h"
34	#include "llvm/Analysis/TargetTransformInfo.h"
35	#include "llvm/Analysis/ValueTracking.h"
36	#include "llvm/IR/Attributes.h"
37	#include "llvm/IR/BasicBlock.h"
38	#include "llvm/IR/CFG.h"
39	#include "llvm/IR/Constant.h"
40	#include "llvm/IR/ConstantRange.h"
41	#include "llvm/IR/Constants.h"
42	#include "llvm/IR/DataLayout.h"
43	#include "llvm/IR/DebugInfo.h"
44	#include "llvm/IR/DerivedTypes.h"
45	#include "llvm/IR/Function.h"
46	#include "llvm/IR/GlobalValue.h"
47	#include "llvm/IR/GlobalVariable.h"
48	#include "llvm/IR/IRBuilder.h"
49	#include "llvm/IR/InstrTypes.h"
50	#include "llvm/IR/Instruction.h"
51	#include "llvm/IR/Instructions.h"
52	#include "llvm/IR/IntrinsicInst.h"
53	#include "llvm/IR/LLVMContext.h"
54	#include "llvm/IR/MDBuilder.h"
55	#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
56	#include "llvm/IR/Metadata.h"
57	#include "llvm/IR/Module.h"
58	#include "llvm/IR/NoFolder.h"
59	#include "llvm/IR/Operator.h"
60	#include "llvm/IR/PatternMatch.h"
61	#include "llvm/IR/ProfDataUtils.h"
62	#include "llvm/IR/Type.h"
63	#include "llvm/IR/Use.h"
64	#include "llvm/IR/User.h"
65	#include "llvm/IR/Value.h"
66	#include "llvm/IR/ValueHandle.h"
67	#include "llvm/Support/BranchProbability.h"
68	#include "llvm/Support/Casting.h"
69	#include "llvm/Support/CommandLine.h"
70	#include "llvm/Support/Debug.h"
71	#include "llvm/Support/ErrorHandling.h"
72	#include "llvm/Support/KnownBits.h"
73	#include "llvm/Support/MathExtras.h"
74	#include "llvm/Support/raw_ostream.h"
75	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
76	#include "llvm/Transforms/Utils/Cloning.h"
77	#include "llvm/Transforms/Utils/Local.h"
78	#include "llvm/Transforms/Utils/LockstepReverseIterator.h"
79	#include "llvm/Transforms/Utils/ValueMapper.h"
80	#include <algorithm>
81	#include <cassert>
82	#include <climits>
83	#include <cstddef>
84	#include <cstdint>
85	#include <iterator>
86	#include <map>
87	#include <optional>
88	#include <set>
89	#include <tuple>
90	#include <utility>
91	#include <vector>
92
93	using namespace llvm;
94	using namespace PatternMatch;
95
96	#define DEBUG_TYPE "simplifycfg"
97
98	cl::opt<bool> llvm::RequireAndPreserveDomTree(
99	"simplifycfg-require-and-preserve-domtree", cl::Hidden,
100
101	cl::desc (
102	"Temporary development switch used to gradually uplift SimplifyCFG "
103	"into preserving DomTree,"));
104
105	// Chosen as 2 so as to be cheap, but still to have enough power to fold
106	// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
107	// To catch this, we need to fold a compare and a select, hence '2' being the
108	// minimum reasonable default.
109	static cl::opt<unsigned> PHINodeFoldingThreshold(
110	"phi-node-folding-threshold", cl::Hidden, cl::init(Val: `2`),
111	cl::desc (
112	"Control the amount of phi node folding to perform (default = 2)"));
113
114	static cl::opt<unsigned> TwoEntryPHINodeFoldingThreshold(
115	"two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(Val: `4`),
116	cl::desc ("Control the maximal total instruction cost that we are willing "
117	"to speculatively execute to fold a 2-entry PHI node into a "
118	"select (default = 4)"));
119
120	static cl::opt<bool>
121	HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(Val: true),
122	cl::desc ("Hoist common instructions up to the parent block"));
123
124	static cl::opt<bool> HoistLoadsWithCondFaulting(
125	"simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(Val: true),
126	cl::desc ("Hoist loads if the target supports conditional faulting"));
127
128	static cl::opt<bool> HoistStoresWithCondFaulting(
129	"simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(Val: true),
130	cl::desc ("Hoist stores if the target supports conditional faulting"));
131
132	static cl::opt<unsigned> HoistLoadsStoresWithCondFaultingThreshold(
133	"hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(Val: `6`),
134	cl::desc ("Control the maximal conditional load/store that we are willing "
135	"to speculatively execute to eliminate conditional branch "
136	"(default = 6)"));
137
138	static cl::opt<unsigned>
139	HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
140	cl::init(Val: `20`),
141	cl::desc ("Allow reordering across at most this many "
142	"instructions when hoisting"));
143
144	static cl::opt<bool>
145	SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(Val: true),
146	cl::desc ("Sink common instructions down to the end block"));
147
148	static cl::opt<bool> HoistCondStores(
149	"simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(Val: true),
150	cl::desc ("Hoist conditional stores if an unconditional store precedes"));
151
152	static cl::opt<bool> MergeCondStores(
153	"simplifycfg-merge-cond-stores", cl::Hidden, cl::init(Val: true),
154	cl::desc ("Hoist conditional stores even if an unconditional store does not "
155	"precede - hoist multiple conditional stores into a single "
156	"predicated store"));
157
158	static cl::opt<bool> MergeCondStoresAggressively(
159	"simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(Val: false),
160	cl::desc ("When merging conditional stores, do so even if the resultant "
161	"basic blocks are unlikely to be if-converted as a result"));
162
163	static cl::opt<bool> SpeculateOneExpensiveInst(
164	"speculate-one-expensive-inst", cl::Hidden, cl::init(Val: true),
165	cl::desc ("Allow exactly one expensive instruction to be speculatively "
166	"executed"));
167
168	static cl::opt<unsigned> MaxSpeculationDepth(
169	"max-speculation-depth", cl::Hidden, cl::init(Val: `10`),
170	cl::desc ("Limit maximum recursion depth when calculating costs of "
171	"speculatively executed instructions"));
172
173	static cl::opt<int>
174	MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
175	cl::init(Val: `10`),
176	cl::desc ("Max size of a block which is still considered "
177	"small enough to thread through"));
178
179	// Two is chosen to allow one negation and a logical combine.
180	static cl::opt<unsigned>
181	BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
182	cl::init(Val: `2`),
183	cl::desc ("Maximum cost of combining conditions when "
184	"folding branches"));
185
186	static cl::opt<unsigned> BranchFoldToCommonDestVectorMultiplier(
187	"simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
188	cl::init(Val: `2`),
189	cl::desc ("Multiplier to apply to threshold when determining whether or not "
190	"to fold branch to common destination when vector operations are "
191	"present"));
192
193	static cl::opt<bool> EnableMergeCompatibleInvokes(
194	"simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(Val: true),
195	cl::desc ("Allow SimplifyCFG to merge invokes together when appropriate"));
196
197	static cl::opt<unsigned> MaxSwitchCasesPerResult(
198	"max-switch-cases-per-result", cl::Hidden, cl::init(Val: `16`),
199	cl::desc ("Limit cases to analyze when converting a switch to select"));
200
201	STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
202	STATISTIC(NumLinearMaps,
203	"Number of switch instructions turned into linear mapping");
204	STATISTIC(NumLookupTables,
205	"Number of switch instructions turned into lookup tables");
206	STATISTIC(
207	NumLookupTablesHoles,
208	"Number of switch instructions turned into lookup tables (holes checked)");
209	STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
210	STATISTIC(NumFoldValueComparisonIntoPredecessors,
211	"Number of value comparisons folded into predecessor basic blocks");
212	STATISTIC(NumFoldBranchToCommonDest,
213	"Number of branches folded into predecessor basic block");
214	STATISTIC(
215	NumHoistCommonCode,
216	"Number of common instruction 'blocks' hoisted up to the begin block");
217	STATISTIC(NumHoistCommonInstrs,
218	"Number of common instructions hoisted up to the begin block");
219	STATISTIC(NumSinkCommonCode,
220	"Number of common instruction 'blocks' sunk down to the end block");
221	STATISTIC(NumSinkCommonInstrs,
222	"Number of common instructions sunk down to the end block");
223	STATISTIC(NumSpeculations, "Number of speculative executed instructions");
224	STATISTIC(NumInvokes,
225	"Number of invokes with empty resume blocks simplified into calls");
226	STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
227	STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
228
229	namespace {
230
231	// The first field contains the value that the switch produces when a certain
232	// case group is selected, and the second field is a vector containing the
233	// cases composing the case group.
234	using SwitchCaseResultVectorTy =
235	SmallVector<std::pair<Constant , SmallVector<ConstantInt , `4`>>, `2`>;
236
237	// The first field contains the phi node that generates a result of the switch
238	// and the second field contains the value generated for a certain case in the
239	// switch for that PHI.
240	using SwitchCaseResultsTy = SmallVector<std::pair<PHINode , Constant >, `4`>;
241
242	/// ValueEqualityComparisonCase - Represents a case of a switch.
243	struct ValueEqualityComparisonCase {
244	ConstantInt *Value;
245	BasicBlock *Dest;
246
247	ValueEqualityComparisonCase(ConstantInt Value, BasicBlock Dest)
248	: Value(Value), Dest(Dest) {}
249
250	bool operator<(ValueEqualityComparisonCase RHS) const {
251	// Comparing pointers is ok as we only rely on the order for uniquing.
252	return Value < RHS.Value;
253	}
254
255	bool operator==(BasicBlock RHSDest) const* { return Dest == RHSDest; }
256	};
257
258	class SimplifyCFGOpt {
259	const TargetTransformInfo &TTI;
260	DomTreeUpdater *DTU;
261	const DataLayout &DL;
262	ArrayRef<WeakVH> LoopHeaders;
263	const SimplifyCFGOptions &Options;
264	bool Resimplify;
265
266	Value isValueEqualityComparison(Instruction TI);
267	BasicBlock *getValueEqualityComparisonCases(
268	Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
269	bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
270	BasicBlock *Pred,
271	IRBuilder<> &Builder);
272	bool performValueComparisonIntoPredecessorFolding(Instruction TI, Value &CV,
273	Instruction *PTI,
274	IRBuilder<> &Builder);
275	bool foldValueComparisonIntoPredecessors(Instruction *TI,
276	IRBuilder<> &Builder);
277
278	bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
279	bool simplifySingleResume(ResumeInst *RI);
280	bool simplifyCommonResume(ResumeInst *RI);
281	bool simplifyCleanupReturn(CleanupReturnInst *RI);
282	bool simplifyUnreachable(UnreachableInst *UI);
283	bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
284	bool simplifyDuplicateSwitchArms(SwitchInst SI, DomTreeUpdater DTU);
285	bool simplifyIndirectBr(IndirectBrInst *IBI);
286	bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
287	bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
288	bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
289
290	bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
291	IRBuilder<> &Builder);
292
293	bool hoistCommonCodeFromSuccessors(Instruction TI, bool* AllInstsEqOnly);
294	bool hoistSuccIdenticalTerminatorToSwitchOrIf(
295	Instruction TI, Instruction I1,
296	SmallVectorImpl<Instruction *> &OtherSuccTIs);
297	bool speculativelyExecuteBB(BranchInst BI, BasicBlock ThenBB);
298	bool simplifyTerminatorOnSelect(Instruction OldTerm, Value Cond,
299	BasicBlock TrueBB, BasicBlock FalseBB,
300	uint32_t TrueWeight, uint32_t FalseWeight);
301	bool simplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
302	const DataLayout &DL);
303	bool simplifySwitchOnSelect(SwitchInst SI, SelectInst Select);
304	bool simplifyIndirectBrOnSelect(IndirectBrInst IBI, SelectInst SI);
305	bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
306
307	public:
308	SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
309	const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
310	const SimplifyCFGOptions &Opts)
311	: TTI(TTI), DTU(DTU), DL(DL), LoopHeaders (LoopHeaders), Options(Opts) {
312	assert((!DTU \|\| !DTU->hasPostDomTree()) &&
313	"SimplifyCFG is not yet capable of maintaining validity of a "
314	"PostDomTree, so don't ask for it.");
315	}
316
317	bool simplifyOnce(BasicBlock *BB);
318	bool run(BasicBlock *BB);
319
320	// Helper to set Resimplify and return change indication.
321	bool requestResimplify() {
322	Resimplify = true;
323	return true;
324	}
325	};
326
327	} // end anonymous namespace
328
329	/// Return true if all the PHI nodes in the basic block \p BB
330	/// receive compatible (identical) incoming values when coming from
331	/// all of the predecessor blocks that are specified in \p IncomingBlocks.
332	///
333	/// Note that if the values aren't exactly identical, but \p EquivalenceSet
334	/// is provided, and both* of the values are present in the set,*
335	/// then they are considered equal.
336	static bool incomingValuesAreCompatible(
337	BasicBlock BB, ArrayRef<BasicBlock > IncomingBlocks,
338	SmallPtrSetImpl<Value > EquivalenceSet = nullptr) {
339	assert(IncomingBlocks.size() == `2` &&
340	"Only for a pair of incoming blocks at the time!");
341
342	// FIXME: it is okay if one of the incoming values is an `undef` value,
343	// iff the other incoming value is guaranteed to be a non-poison value.
344	// FIXME: it is okay if one of the incoming values is a `poison` value.
345	return all_of(Range: BB->phis(), P: [IncomingBlocks, EquivalenceSet](PHINode &PN) {
346	Value *IV0 = PN.getIncomingValueForBlock(BB: IncomingBlocks [`0`]);
347	Value *IV1 = PN.getIncomingValueForBlock(BB: IncomingBlocks [`1`]);
348	if (IV0 == IV1)
349	return true;
350	if (EquivalenceSet && EquivalenceSet->contains(Ptr: IV0) &&
351	EquivalenceSet->contains(Ptr: IV1))
352	return true;
353	return false;
354	});
355	}
356
357	/// Return true if it is safe to merge these two
358	/// terminator instructions together.
359	static bool
360	safeToMergeTerminators(Instruction SI1, Instruction SI2,
361	SmallSetVector<BasicBlock , `4`> FailBlocks = nullptr) {
362	if (SI1 == SI2)
363	return false; // Can't merge with self!
364
365	// It is not safe to merge these two switch instructions if they have a common
366	// successor, and if that successor has a PHI node, and if that* PHI node has*
367	// conflicting incoming values from the two switch blocks.
368	BasicBlock *SI1BB = SI1->getParent();
369	BasicBlock *SI2BB = SI2->getParent();
370
371	SmallPtrSet<BasicBlock *, `16`> SI1Succs(llvm::from_range, successors(BB: SI1BB));
372	bool Fail = false;
373	for (BasicBlock *Succ : successors(BB: SI2BB)) {
374	if (!SI1Succs.count(Ptr: Succ))
375	continue;
376	if (incomingValuesAreCompatible(BB: Succ, IncomingBlocks: {SI1BB, SI2BB}))
377	continue;
378	Fail = true;
379	if (FailBlocks)
380	FailBlocks->insert(X: Succ);
381	else
382	break;
383	}
384
385	return !Fail;
386	}
387
388	/// Update PHI nodes in Succ to indicate that there will now be entries in it
389	/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
390	/// will be the same as those coming in from ExistPred, an existing predecessor
391	/// of Succ.
392	static void addPredecessorToBlock(BasicBlock Succ, BasicBlock NewPred,
393	BasicBlock *ExistPred,
394	MemorySSAUpdater MSSAU = nullptr*) {
395	for (PHINode &PN : Succ->phis())
396	PN.addIncoming(V: PN.getIncomingValueForBlock(BB: ExistPred), BB: NewPred);
397	if (MSSAU)
398	if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(BB: Succ))
399	MPhi->addIncoming(V: MPhi->getIncomingValueForBlock(BB: ExistPred), BB: NewPred);
400	}
401
402	/// Compute an abstract "cost" of speculating the given instruction,
403	/// which is assumed to be safe to speculate. TCC_Free means cheap,
404	/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
405	/// expensive.
406	static InstructionCost computeSpeculationCost(const User *I,
407	const TargetTransformInfo &TTI) {
408	return TTI.getInstructionCost(U: I, CostKind: TargetTransformInfo::TCK_SizeAndLatency);
409	}
410
411	/// If we have a merge point of an "if condition" as accepted above,
412	/// return true if the specified value dominates the block. We don't handle
413	/// the true generality of domination here, just a special case which works
414	/// well enough for us.
415	///
416	/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
417	/// see if V (which must be an instruction) and its recursive operands
418	/// that do not dominate BB have a combined cost lower than Budget and
419	/// are non-trapping. If both are true, the instruction is inserted into the
420	/// set and true is returned.
421	///
422	/// The cost for most non-trapping instructions is defined as 1 except for
423	/// Select whose cost is 2.
424	///
425	/// After this function returns, Cost is increased by the cost of
426	/// V plus its non-dominating operands. If that cost is greater than
427	/// Budget, false is returned and Cost is undefined.
428	static bool dominatesMergePoint(
429	Value V, BasicBlock BB, Instruction *InsertPt,
430	SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
431	InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC,
432	SmallPtrSetImpl<Instruction > &ZeroCostInstructions, unsigned* Depth = `0`) {
433	// It is possible to hit a zero-cost cycle (phi/gep instructions for example),
434	// so limit the recursion depth.
435	// TODO: While this recursion limit does prevent pathological behavior, it
436	// would be better to track visited instructions to avoid cycles.
437	if (Depth == MaxSpeculationDepth)
438	return false;
439
440	Instruction *I = dyn_cast<Instruction>(Val: V);
441	if (!I) {
442	// Non-instructions dominate all instructions and can be executed
443	// unconditionally.
444	return true;
445	}
446	BasicBlock *PBB = I->getParent();
447
448	// We don't want to allow weird loops that might have the "if condition" in
449	// the bottom of this block.
450	if (PBB == BB)
451	return false;
452
453	// If this instruction is defined in a block that contains an unconditional
454	// branch to BB, then it must be in the 'conditional' part of the "if
455	// statement". If not, it definitely dominates the region.
456	BranchInst *BI = dyn_cast<BranchInst>(Val: PBB->getTerminator());
457	if (!BI \|\| BI->isConditional() \|\| BI->getSuccessor(i: `0`) != BB)
458	return true;
459
460	// If we have seen this instruction before, don't count it again.
461	if (AggressiveInsts.count(Ptr: I))
462	return true;
463
464	// Okay, it looks like the instruction IS in the "condition". Check to
465	// see if it's a cheap instruction to unconditionally compute, and if it
466	// only uses stuff defined outside of the condition. If so, hoist it out.
467	if (!isSafeToSpeculativelyExecute(I, CtxI: InsertPt, AC))
468	return false;
469
470	// Overflow arithmetic instruction plus extract value are usually generated
471	// when a division is being replaced. But, in this case, the zero check may
472	// still be kept in the code. In that case it would be worth to hoist these
473	// two instruction out of the basic block. Let's treat this pattern as one
474	// single cheap instruction here!
475	WithOverflowInst *OverflowInst;
476	if (match(V: I, P: m_ExtractValue<`1`>(V: m_OneUse(SubPattern: m_WithOverflowInst(I&: OverflowInst))))) {
477	ZeroCostInstructions.insert(Ptr: OverflowInst);
478	Cost += `1`;
479	} else if (!ZeroCostInstructions.contains(Ptr: I))
480	Cost += computeSpeculationCost(I, TTI);
481
482	// Allow exactly one instruction to be speculated regardless of its cost
483	// (as long as it is safe to do so).
484	// This is intended to flatten the CFG even if the instruction is a division
485	// or other expensive operation. The speculation of an expensive instruction
486	// is expected to be undone in CodeGenPrepare if the speculation has not
487	// enabled further IR optimizations.
488	if (Cost > Budget &&
489	(!SpeculateOneExpensiveInst \|\| !AggressiveInsts.empty() \|\| Depth > `0` \|\|
490	!Cost.isValid()))
491	return false;
492
493	// Okay, we can only really hoist these out if their operands do
494	// not take us over the cost threshold.
495	for (Use &Op : I->operands())
496	if (!dominatesMergePoint(V: Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
497	TTI, AC, ZeroCostInstructions, Depth: Depth + `1`))
498	return false;
499	// Okay, it's safe to do this! Remember this instruction.
500	AggressiveInsts.insert(Ptr: I);
501	return true;
502	}
503
504	/// Extract ConstantInt from value, looking through IntToPtr
505	/// and PointerNullValue. Return NULL if value is not a constant int.
506	static ConstantInt getConstantInt(Value V, const DataLayout &DL) {
507	// Normal constant int.
508	ConstantInt *CI = dyn_cast<ConstantInt>(Val: V);
509	if (CI \|\| !isa<Constant>(Val: V) \|\| !V->getType()->isPointerTy() \|\|
510	DL.isNonIntegralPointerType(Ty: V->getType()))
511	return CI;
512
513	// This is some kind of pointer constant. Turn it into a pointer-sized
514	// ConstantInt if possible.
515	IntegerType *PtrTy = cast<IntegerType>(Val: DL.getIntPtrType(V->getType()));
516
517	// Null pointer means 0, see SelectionDAGBuilder::getValue(const Value).*
518	if (isa<ConstantPointerNull>(Val: V))
519	return ConstantInt::get(Ty: PtrTy, V: `0`);
520
521	// IntToPtr const int.
522	if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: V))
523	if (CE->getOpcode() == Instruction::IntToPtr)
524	if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: CE->getOperand(i_nocapture: `0`))) {
525	// The constant is very likely to have the right type already.
526	if (CI->getType() == PtrTy)
527	return CI;
528	else
529	return cast<ConstantInt>(
530	Val: ConstantFoldIntegerCast(C: CI, DestTy: PtrTy, /isSigned=/IsSigned: false, DL));
531	}
532	return nullptr;
533	}
534
535	namespace {
536
537	/// Given a chain of or (\|\|) or and (&&) comparison of a value against a
538	/// constant, this will try to recover the information required for a switch
539	/// structure.
540	/// It will depth-first traverse the chain of comparison, seeking for patterns
541	/// like %a == 12 or %a < 4 and combine them to produce a set of integer
542	/// representing the different cases for the switch.
543	/// Note that if the chain is composed of '\|\|' it will build the set of elements
544	/// that matches the comparisons (i.e. any of this value validate the chain)
545	/// while for a chain of '&&' it will build the set elements that make the test
546	/// fail.
547	struct ConstantComparesGatherer {
548	const DataLayout &DL;
549
550	/// Value found for the switch comparison
551	Value CompValue = nullptr*;
552
553	/// Extra clause to be checked before the switch
554	Value Extra = nullptr*;
555
556	/// Set of integers to match in switch
557	SmallVector<ConstantInt *, `8`> Vals;
558
559	/// Number of comparisons matched in the and/or chain
560	unsigned UsedICmps = `0`;
561
562	/// Construct and compute the result for the comparison instruction Cond
563	ConstantComparesGatherer(Instruction Cond, const* DataLayout &DL) : DL(DL) {
564	gather(V: Cond);
565	}
566
567	ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
568	ConstantComparesGatherer &
569	operator=(const ConstantComparesGatherer &) = delete;
570
571	private:
572	/// Try to set the current value used for the comparison, it succeeds only if
573	/// it wasn't set before or if the new value is the same as the old one
574	bool setValueOnce(Value *NewVal) {
575	if (CompValue && CompValue != NewVal)
576	return false;
577	CompValue = NewVal;
578	return (CompValue != nullptr);
579	}
580
581	/// Try to match Instruction "I" as a comparison against a constant and
582	/// populates the array Vals with the set of values that match (or do not
583	/// match depending on isEQ).
584	/// Return false on failure. On success, the Value the comparison matched
585	/// against is placed in CompValue.
586	/// If CompValue is already set, the function is expected to fail if a match
587	/// is found but the value compared to is different.
588	bool matchInstruction(Instruction I, bool* isEQ) {
589	// If this is an icmp against a constant, handle this as one of the cases.
590	ICmpInst *ICI;
591	ConstantInt *C;
592	if (!((ICI = dyn_cast<ICmpInst>(Val: I)) &&
593	(C = getConstantInt(V: I->getOperand(i: `1`), DL)))) {
594	return false;
595	}
596
597	Value *RHSVal;
598	const APInt *RHSC;
599
600	// Pattern match a special case
601	// (x & ~2^z) == y --> x == y \|\| x == y\|2^z
602	// This undoes a transformation done by instcombine to fuse 2 compares.
603	if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
604	// It's a little bit hard to see why the following transformations are
605	// correct. Here is a CVC3 program to verify them for 64-bit values:
606
607	/*
608	ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
609	x : BITVECTOR(64);
610	y : BITVECTOR(64);
611	z : BITVECTOR(64);
612	mask : BITVECTOR(64) = BVSHL(ONE, z);
613	QUERY( (y & ~mask = y) =>
614	((x & ~mask = y) <=> (x = y OR x = (y \| mask)))
615	);
616	QUERY( (y \| mask = y) =>
617	((x \| mask = y) <=> (x = y OR x = (y & ~mask)))
618	);
619	*/
620
621	// Please note that each pattern must be a dual implication (<--> or
622	// iff). One directional implication can create spurious matches. If the
623	// implication is only one-way, an unsatisfiable condition on the left
624	// side can imply a satisfiable condition on the right side. Dual
625	// implication ensures that satisfiable conditions are transformed to
626	// other satisfiable conditions and unsatisfiable conditions are
627	// transformed to other unsatisfiable conditions.
628
629	// Here is a concrete example of a unsatisfiable condition on the left
630	// implying a satisfiable condition on the right:
631	//
632	// mask = (1 << z)
633	// (x & ~mask) == y --> (x == y \|\| x == (y \| mask))
634	//
635	// Substituting y = 3, z = 0 yields:
636	// (x & -2) == 3 --> (x == 3 \|\| x == 2)
637
638	// Pattern match a special case:
639	/*
640	QUERY( (y & ~mask = y) =>
641	((x & ~mask = y) <=> (x = y OR x = (y \| mask)))
642	);
643	*/
644	if (match(V: ICI->getOperand(i_nocapture: `0`),
645	P: m_And(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
646	APInt Mask = ~*RHSC;
647	if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
648	// If we already have a value for the switch, it has to match!
649	if (!setValueOnce(RHSVal))
650	return false;
651
652	Vals.push_back(Elt: C);
653	Vals.push_back(
654	Elt: ConstantInt::get(Context&: C->getContext(),
655	V: C->getValue() \| Mask));
656	UsedICmps++;
657	return true;
658	}
659	}
660
661	// Pattern match a special case:
662	/*
663	QUERY( (y \| mask = y) =>
664	((x \| mask = y) <=> (x = y OR x = (y & ~mask)))
665	);
666	*/
667	if (match(V: ICI->getOperand(i_nocapture: `0`),
668	P: m_Or(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
669	APInt Mask = *RHSC;
670	if (Mask.isPowerOf2() && (C->getValue() \| Mask) == C->getValue()) {
671	// If we already have a value for the switch, it has to match!
672	if (!setValueOnce(RHSVal))
673	return false;
674
675	Vals.push_back(Elt: C);
676	Vals.push_back(Elt: ConstantInt::get(Context&: C->getContext(),
677	V: C->getValue() & ~Mask));
678	UsedICmps++;
679	return true;
680	}
681	}
682
683	// If we already have a value for the switch, it has to match!
684	if (!setValueOnce(ICI->getOperand(i_nocapture: `0`)))
685	return false;
686
687	UsedICmps++;
688	Vals.push_back(Elt: C);
689	return ICI->getOperand(i_nocapture: `0`);
690	}
691
692	// If we have "x ult 3", for example, then we can add 0,1,2 to the set.
693	ConstantRange Span =
694	ConstantRange::makeExactICmpRegion(Pred: ICI->getPredicate(), Other: C->getValue());
695
696	// Shift the range if the compare is fed by an add. This is the range
697	// compare idiom as emitted by instcombine.
698	Value *CandidateVal = I->getOperand(i: `0`);
699	if (match(V: I->getOperand(i: `0`), P: m_Add(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
700	Span = Span.subtract(CI: *RHSC);
701	CandidateVal = RHSVal;
702	}
703
704	// If this is an and/!= check, then we are looking to build the set of
705	// value that don't* pass the and chain. I.e. to turn "x ugt 2" into*
706	// x != 0 && x != 1.
707	if (!isEQ)
708	Span = Span.inverse();
709
710	// If there are a ton of values, we don't want to make a ginormous switch.
711	if (Span.isSizeLargerThan(MaxSize: `8`) \|\| Span.isEmptySet()) {
712	return false;
713	}
714
715	// If we already have a value for the switch, it has to match!
716	if (!setValueOnce(CandidateVal))
717	return false;
718
719	// Add all values from the range to the set
720	for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
721	Vals.push_back(Elt: ConstantInt::get(Context&: I->getContext(), V: Tmp));
722
723	UsedICmps++;
724	return true;
725	}
726
727	/// Given a potentially 'or'd or 'and'd together collection of icmp
728	/// eq/ne/lt/gt instructions that compare a value against a constant, extract
729	/// the value being compared, and stick the list constants into the Vals
730	/// vector.
731	/// One "Extra" case is allowed to differ from the other.
732	void gather(Value *V) {
733	bool isEQ = match(V, P: m_LogicalOr(L: m_Value(), R: m_Value()));
734
735	// Keep a stack (SmallVector for efficiency) for depth-first traversal
736	SmallVector<Value *, `8`> DFT;
737	SmallPtrSet<Value *, `8`> Visited;
738
739	// Initialize
740	Visited.insert(Ptr: V);
741	DFT.push_back(Elt: V);
742
743	while (!DFT.empty()) {
744	V = DFT.pop_back_val();
745
746	if (Instruction *I = dyn_cast<Instruction>(Val: V)) {
747	// If it is a \|\| (or && depending on isEQ), process the operands.
748	Value Op0, Op1;
749	if (isEQ ? match(V: I, P: m_LogicalOr(L: m_Value(V&: Op0), R: m_Value(V&: Op1)))
750	: match(V: I, P: m_LogicalAnd(L: m_Value(V&: Op0), R: m_Value(V&: Op1)))) {
751	if (Visited.insert(Ptr: Op1).second)
752	DFT.push_back(Elt: Op1);
753	if (Visited.insert(Ptr: Op0).second)
754	DFT.push_back(Elt: Op0);
755
756	continue;
757	}
758
759	// Try to match the current instruction
760	if (matchInstruction(I, isEQ))
761	// Match succeed, continue the loop
762	continue;
763	}
764
765	// One element of the sequence of \|\| (or &&) could not be match as a
766	// comparison against the same value as the others.
767	// We allow only one "Extra" case to be checked before the switch
768	if (!Extra) {
769	Extra = V;
770	continue;
771	}
772	// Failed to parse a proper sequence, abort now
773	CompValue = nullptr;
774	break;
775	}
776	}
777	};
778
779	} // end anonymous namespace
780
781	static void eraseTerminatorAndDCECond(Instruction *TI,
782	MemorySSAUpdater MSSAU = nullptr*) {
783	Instruction Cond = nullptr*;
784	if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
785	Cond = dyn_cast<Instruction>(Val: SI->getCondition());
786	} else if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI)) {
787	if (BI->isConditional())
788	Cond = dyn_cast<Instruction>(Val: BI->getCondition());
789	} else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(Val: TI)) {
790	Cond = dyn_cast<Instruction>(Val: IBI->getAddress());
791	}
792
793	TI->eraseFromParent();
794	if (Cond)
795	RecursivelyDeleteTriviallyDeadInstructions(V: Cond, TLI: nullptr, MSSAU);
796	}
797
798	/// Return true if the specified terminator checks
799	/// to see if a value is equal to constant integer value.
800	Value SimplifyCFGOpt::isValueEqualityComparison(Instruction TI) {
801	Value CV = nullptr*;
802	if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
803	// Do not permit merging of large switch instructions into their
804	// predecessors unless there is only one predecessor.
805	if (!SI->getParent()->hasNPredecessorsOrMore(N: `128` / SI->getNumSuccessors()))
806	CV = SI->getCondition();
807	} else if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI))
808	if (BI->isConditional() && BI->getCondition()->hasOneUse())
809	if (ICmpInst *ICI = dyn_cast<ICmpInst>(Val: BI->getCondition())) {
810	if (ICI->isEquality() && getConstantInt(V: ICI->getOperand(i_nocapture: `1`), DL))
811	CV = ICI->getOperand(i_nocapture: `0`);
812	}
813
814	// Unwrap any lossless ptrtoint cast.
815	if (CV) {
816	if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(Val: CV)) {
817	Value *Ptr = PTII->getPointerOperand();
818	if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
819	CV = Ptr;
820	}
821	}
822	return CV;
823	}
824
825	/// Given a value comparison instruction,
826	/// decode all of the 'cases' that it represents and return the 'default' block.
827	BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
828	Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
829	if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
830	Cases.reserve(n: SI->getNumCases());
831	for (auto Case : SI->cases())
832	Cases.push_back(x: ValueEqualityComparisonCase (Case.getCaseValue(),
833	Case.getCaseSuccessor()));
834	return SI->getDefaultDest();
835	}
836
837	BranchInst *BI = cast<BranchInst>(Val: TI);
838	ICmpInst *ICI = cast<ICmpInst>(Val: BI->getCondition());
839	BasicBlock *Succ = BI->getSuccessor(i: ICI->getPredicate() == ICmpInst::ICMP_NE);
840	Cases.push_back(x: ValueEqualityComparisonCase (
841	getConstantInt(V: ICI->getOperand(i_nocapture: `1`), DL), Succ));
842	return BI->getSuccessor(i: ICI->getPredicate() == ICmpInst::ICMP_EQ);
843	}
844
845	/// Given a vector of bb/value pairs, remove any entries
846	/// in the list that match the specified block.
847	static void
848	eliminateBlockCases(BasicBlock *BB,
849	std::vector<ValueEqualityComparisonCase> &Cases) {
850	llvm::erase(C&: Cases, V: BB);
851	}
852
853	/// Return true if there are any keys in C1 that exist in C2 as well.
854	static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
855	std::vector<ValueEqualityComparisonCase> &C2) {
856	std::vector<ValueEqualityComparisonCase> V1 = &C1, V2 = &C2;
857
858	// Make V1 be smaller than V2.
859	if (V1->size() > V2->size())
860	std::swap(a&: V1, b&: V2);
861
862	if (V1->empty())
863	return false;
864	if (V1->size() == `1`) {
865	// Just scan V2.
866	ConstantInt TheVal = (V1)[`0`].Value;
867	for (const ValueEqualityComparisonCase &VECC : *V2)
868	if (TheVal == VECC.Value)
869	return true;
870	}
871
872	// Otherwise, just sort both lists and compare element by element.
873	array_pod_sort(Start: V1->begin(), End: V1->end());
874	array_pod_sort(Start: V2->begin(), End: V2->end());
875	unsigned i1 = `0`, i2 = `0`, e1 = V1->size(), e2 = V2->size();
876	while (i1 != e1 && i2 != e2) {
877	if ((V1)[i1].Value == (V2)[i2].Value)
878	return true;
879	if ((V1)[i1].Value < (V2)[i2].Value)
880	++i1;
881	else
882	++i2;
883	}
884	return false;
885	}
886
887	// Set branch weights on SwitchInst. This sets the metadata if there is at
888	// least one non-zero weight.
889	static void setBranchWeights(SwitchInst *SI, ArrayRef<uint32_t> Weights,
890	bool IsExpected) {
891	// Check that there is at least one non-zero weight. Otherwise, pass
892	// nullptr to setMetadata which will erase the existing metadata.
893	MDNode N = nullptr*;
894	if (llvm::any_of(Range&: Weights, P: [](uint32_t W) { return W != `0`; }))
895	N = MDBuilder (SI->getParent()->getContext())
896	.createBranchWeights(Weights, IsExpected);
897	SI->setMetadata(KindID: LLVMContext::MD_prof, Node: N);
898	}
899
900	// Similar to the above, but for branch and select instructions that take
901	// exactly 2 weights.
902	static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
903	uint32_t FalseWeight, bool IsExpected) {
904	assert(isa<BranchInst>(I) \|\| isa<SelectInst>(I));
905	// Check that there is at least one non-zero weight. Otherwise, pass
906	// nullptr to setMetadata which will erase the existing metadata.
907	MDNode N = nullptr*;
908	if (TrueWeight \|\| FalseWeight)
909	N = MDBuilder (I->getParent()->getContext())
910	.createBranchWeights(TrueWeight, FalseWeight, IsExpected);
911	I->setMetadata(KindID: LLVMContext::MD_prof, Node: N);
912	}
913
914	/// If TI is known to be a terminator instruction and its block is known to
915	/// only have a single predecessor block, check to see if that predecessor is
916	/// also a value comparison with the same value, and if that comparison
917	/// determines the outcome of this comparison. If so, simplify TI. This does a
918	/// very limited form of jump threading.
919	bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
920	Instruction TI, BasicBlock Pred, IRBuilder<> &Builder) {
921	Value *PredVal = isValueEqualityComparison(TI: Pred->getTerminator());
922	if (!PredVal)
923	return false; // Not a value comparison in predecessor.
924
925	Value *ThisVal = isValueEqualityComparison(TI);
926	assert(ThisVal && "This isn't a value comparison!!");
927	if (ThisVal != PredVal)
928	return false; // Different predicates.
929
930	// TODO: Preserve branch weight metadata, similarly to how
931	// foldValueComparisonIntoPredecessors preserves it.
932
933	// Find out information about when control will move from Pred to TI's block.
934	std::vector<ValueEqualityComparisonCase> PredCases;
935	BasicBlock *PredDef =
936	getValueEqualityComparisonCases(TI: Pred->getTerminator(), Cases&: PredCases);
937	eliminateBlockCases(BB: PredDef, Cases&: PredCases); // Remove default from cases.
938
939	// Find information about how control leaves this block.
940	std::vector<ValueEqualityComparisonCase> ThisCases;
941	BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, Cases&: ThisCases);
942	eliminateBlockCases(BB: ThisDef, Cases&: ThisCases); // Remove default from cases.
943
944	// If TI's block is the default block from Pred's comparison, potentially
945	// simplify TI based on this knowledge.
946	if (PredDef == TI->getParent()) {
947	// If we are here, we know that the value is none of those cases listed in
948	// PredCases. If there are any cases in ThisCases that are in PredCases, we
949	// can simplify TI.
950	if (!valuesOverlap(C1&: PredCases, C2&: ThisCases))
951	return false;
952
953	if (isa<BranchInst>(Val: TI)) {
954	// Okay, one of the successors of this condbr is dead. Convert it to a
955	// uncond br.
956	assert(ThisCases.size() == `1` && "Branch can only have one case!");
957	// Insert the new branch.
958	Instruction *NI = Builder.CreateBr(Dest: ThisDef);
959	(void)NI;
960
961	// Remove PHI node entries for the dead edge.
962	ThisCases [`0`].Dest->removePredecessor(Pred: PredDef);
963
964	LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
965	<< "Through successor TI: " << TI << "Leaving: " << NI
966	<< "\n");
967
968	eraseTerminatorAndDCECond(TI);
969
970	if (DTU)
971	DTU->applyUpdates(
972	Updates: {{DominatorTree::Delete, PredDef, ThisCases [`0`].Dest}});
973
974	return true;
975	}
976
977	SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(Val: TI);
978	// Okay, TI has cases that are statically dead, prune them away.
979	SmallPtrSet<Constant *, `16`> DeadCases;
980	for (const ValueEqualityComparisonCase &Case : PredCases)
981	DeadCases.insert(Ptr: Case.Value);
982
983	LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
984	<< "Through successor TI: " << *TI);
985
986	SmallDenseMap<BasicBlock , int*, `8`> NumPerSuccessorCases;
987	for (SwitchInst::CaseIt i = SI ->case_end(), e = SI ->case_begin(); i != e;) {
988	--i;
989	auto *Successor = i ->getCaseSuccessor();
990	if (DTU)
991	++NumPerSuccessorCases [Successor];
992	if (DeadCases.count(Ptr: i ->getCaseValue())) {
993	Successor->removePredecessor(Pred: PredDef);
994	SI.removeCase(I: i);
995	if (DTU)
996	--NumPerSuccessorCases [Successor];
997	}
998	}
999
1000	if (DTU) {
1001	std::vector<DominatorTree::UpdateType> Updates;
1002	for (const std::pair<BasicBlock , int*> &I : NumPerSuccessorCases)
1003	if (I.second == `0`)
1004	Updates.push_back(x: {DominatorTree::Delete, PredDef, I.first});
1005	DTU->applyUpdates(Updates);
1006	}
1007
1008	LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1009	return true;
1010	}
1011
1012	// Otherwise, TI's block must correspond to some matched value. Find out
1013	// which value (or set of values) this is.
1014	ConstantInt TIV = nullptr*;
1015	BasicBlock *TIBB = TI->getParent();
1016	for (const auto &[Value, Dest] : PredCases)
1017	if (Dest == TIBB) {
1018	if (TIV)
1019	return false; // Cannot handle multiple values coming to this block.
1020	TIV = Value;
1021	}
1022	assert(TIV && "No edge from pred to succ?");
1023
1024	// Okay, we found the one constant that our value can be if we get into TI's
1025	// BB. Find out which successor will unconditionally be branched to.
1026	BasicBlock TheRealDest = nullptr*;
1027	for (const auto &[Value, Dest] : ThisCases)
1028	if (Value == TIV) {
1029	TheRealDest = Dest;
1030	break;
1031	}
1032
1033	// If not handled by any explicit cases, it is handled by the default case.
1034	if (!TheRealDest)
1035	TheRealDest = ThisDef;
1036
1037	SmallPtrSet<BasicBlock *, `2`> RemovedSuccs;
1038
1039	// Remove PHI node entries for dead edges.
1040	BasicBlock *CheckEdge = TheRealDest;
1041	for (BasicBlock *Succ : successors(BB: TIBB))
1042	if (Succ != CheckEdge) {
1043	if (Succ != TheRealDest)
1044	RemovedSuccs.insert(Ptr: Succ);
1045	Succ->removePredecessor(Pred: TIBB);
1046	} else
1047	CheckEdge = nullptr;
1048
1049	// Insert the new branch.
1050	Instruction *NI = Builder.CreateBr(Dest: TheRealDest);
1051	(void)NI;
1052
1053	LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1054	<< "Through successor TI: " << TI << "Leaving: " << NI
1055	<< "\n");
1056
1057	eraseTerminatorAndDCECond(TI);
1058	if (DTU) {
1059	SmallVector<DominatorTree::UpdateType, `2`> Updates;
1060	Updates.reserve(N: RemovedSuccs.size());
1061	for (auto *RemovedSucc : RemovedSuccs)
1062	Updates.push_back(Elt: {DominatorTree::Delete, TIBB, RemovedSucc});
1063	DTU->applyUpdates(Updates);
1064	}
1065	return true;
1066	}
1067
1068	namespace {
1069
1070	/// This class implements a stable ordering of constant
1071	/// integers that does not depend on their address. This is important for
1072	/// applications that sort ConstantInt's to ensure uniqueness.
1073	struct ConstantIntOrdering {
1074	bool operator()(const ConstantInt LHS, const* ConstantInt RHS) const* {
1075	return LHS->getValue().ult(RHS: RHS->getValue());
1076	}
1077	};
1078
1079	} // end anonymous namespace
1080
1081	static int constantIntSortPredicate(ConstantInt *const *P1,
1082	ConstantInt *const *P2) {
1083	const ConstantInt LHS = P1;
1084	const ConstantInt RHS = P2;
1085	if (LHS == RHS)
1086	return `0`;
1087	return LHS->getValue().ult(RHS: RHS->getValue()) ? `1` : -`1`;
1088	}
1089
1090	/// Get Weights of a given terminator, the default weight is at the front
1091	/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1092	/// metadata.
1093	static void getBranchWeights(Instruction *TI,
1094	SmallVectorImpl<uint64_t> &Weights) {
1095	MDNode *MD = TI->getMetadata(KindID: LLVMContext::MD_prof);
1096	assert(MD && "Invalid branch-weight metadata");
1097	extractFromBranchWeightMD64(ProfileData: MD, Weights);
1098
1099	// If TI is a conditional eq, the default case is the false case,
1100	// and the corresponding branch-weight data is at index 2. We swap the
1101	// default weight to be the first entry.
1102	if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI)) {
1103	assert(Weights.size() == `2`);
1104	ICmpInst *ICI = cast<ICmpInst>(Val: BI->getCondition());
1105	if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1106	std::swap(a&: Weights.front(), b&: Weights.back());
1107	}
1108	}
1109
1110	/// Keep halving the weights until all can fit in uint32_t.
1111	static void fitWeights(MutableArrayRef<uint64_t> Weights) {
1112	uint64_t Max = *llvm::max_element(Range&: Weights);
1113	if (Max > UINT_MAX) {
1114	unsigned Offset = `32` - llvm::countl_zero(Val: Max);
1115	for (uint64_t &I : Weights)
1116	I >>= Offset;
1117	}
1118	}
1119
1120	static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
1121	BasicBlock BB, BasicBlock PredBlock, ValueToValueMapTy &VMap) {
1122	Instruction *PTI = PredBlock->getTerminator();
1123
1124	// If we have bonus instructions, clone them into the predecessor block.
1125	// Note that there may be multiple predecessor blocks, so we cannot move
1126	// bonus instructions to a predecessor block.
1127	for (Instruction &BonusInst : *BB) {
1128	if (BonusInst.isTerminator())
1129	continue;
1130
1131	Instruction *NewBonusInst = BonusInst.clone();
1132
1133	if (!NewBonusInst->getDebugLoc().isSameSourceLocation(Other: PTI->getDebugLoc())) {
1134	// Unless the instruction has the same !dbg location as the original
1135	// branch, drop it. When we fold the bonus instructions we want to make
1136	// sure we reset their debug locations in order to avoid stepping on
1137	// dead code caused by folding dead branches.
1138	NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1139	} else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1140	mapAtomInstance(DL, VMap);
1141	}
1142
1143	RemapInstruction(I: NewBonusInst, VM&: VMap,
1144	Flags: RF_NoModuleLevelChanges \| RF_IgnoreMissingLocals);
1145
1146	// If we speculated an instruction, we need to drop any metadata that may
1147	// result in undefined behavior, as the metadata might have been valid
1148	// only given the branch precondition.
1149	// Similarly strip attributes on call parameters that may cause UB in
1150	// location the call is moved to.
1151	NewBonusInst->dropUBImplyingAttrsAndMetadata();
1152
1153	NewBonusInst->insertInto(ParentBB: PredBlock, It: PTI->getIterator());
1154	auto Range = NewBonusInst->cloneDebugInfoFrom(From: &BonusInst);
1155	RemapDbgRecordRange(M: NewBonusInst->getModule(), Range, VM&: VMap,
1156	Flags: RF_NoModuleLevelChanges \| RF_IgnoreMissingLocals);
1157
1158	NewBonusInst->takeName(V: &BonusInst);
1159	BonusInst.setName(NewBonusInst->getName() + ".old");
1160	VMap [&BonusInst] = NewBonusInst;
1161
1162	// Update (liveout) uses of bonus instructions,
1163	// now that the bonus instruction has been cloned into predecessor.
1164	// Note that we expect to be in a block-closed SSA form for this to work!
1165	for (Use &U : make_early_inc_range(Range: BonusInst.uses())) {
1166	auto *UI = cast<Instruction>(Val: U.getUser());
1167	auto *PN = dyn_cast<PHINode>(Val: UI);
1168	if (!PN) {
1169	assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1170	"If the user is not a PHI node, then it should be in the same "
1171	"block as, and come after, the original bonus instruction.");
1172	continue; // Keep using the original bonus instruction.
1173	}
1174	// Is this the block-closed SSA form PHI node?
1175	if (PN->getIncomingBlock(U) == BB)
1176	continue; // Great, keep using the original bonus instruction.
1177	// The only other alternative is an "use" when coming from
1178	// the predecessor block - here we should refer to the cloned bonus instr.
1179	assert(PN->getIncomingBlock(U) == PredBlock &&
1180	"Not in block-closed SSA form?");
1181	U.set(NewBonusInst);
1182	}
1183	}
1184
1185	// Key Instructions: We may have propagated atom info into the pred. If the
1186	// pred's terminator already has atom info do nothing as merging would drop
1187	// one atom group anyway. If it doesn't, propagte the remapped atom group
1188	// from BB's terminator.
1189	if (auto &PredDL = PTI->getDebugLoc()) {
1190	auto &DL = BB->getTerminator()->getDebugLoc();
1191	if (!PredDL ->getAtomGroup() && DL && DL ->getAtomGroup() &&
1192	PredDL.isSameSourceLocation(Other: DL)) {
1193	PTI->setDebugLoc(DL);
1194	RemapSourceAtom(I: PTI, VM&: VMap);
1195	}
1196	}
1197	}
1198
1199	bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1200	Instruction TI, Value &CV, Instruction *PTI, IRBuilder<> &Builder) {
1201	BasicBlock *BB = TI->getParent();
1202	BasicBlock *Pred = PTI->getParent();
1203
1204	SmallVector<DominatorTree::UpdateType, `32`> Updates;
1205
1206	// Figure out which 'cases' to copy from SI to PSI.
1207	std::vector<ValueEqualityComparisonCase> BBCases;
1208	BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, Cases&: BBCases);
1209
1210	std::vector<ValueEqualityComparisonCase> PredCases;
1211	BasicBlock *PredDefault = getValueEqualityComparisonCases(TI: PTI, Cases&: PredCases);
1212
1213	// Based on whether the default edge from PTI goes to BB or not, fill in
1214	// PredCases and PredDefault with the new switch cases we would like to
1215	// build.
1216	SmallMapVector<BasicBlock , int*, `8`> NewSuccessors;
1217
1218	// Update the branch weight metadata along the way
1219	SmallVector<uint64_t, `8`> Weights;
1220	bool PredHasWeights = hasBranchWeightMD(I: *PTI);
1221	bool SuccHasWeights = hasBranchWeightMD(I: *TI);
1222
1223	if (PredHasWeights) {
1224	getBranchWeights(TI: PTI, Weights);
1225	// branch-weight metadata is inconsistent here.
1226	if (Weights.size() != `1` + PredCases.size())
1227	PredHasWeights = SuccHasWeights = false;
1228	} else if (SuccHasWeights)
1229	// If there are no predecessor weights but there are successor weights,
1230	// populate Weights with 1, which will later be scaled to the sum of
1231	// successor's weights
1232	Weights.assign(NumElts: `1` + PredCases.size(), Elt: `1`);
1233
1234	SmallVector<uint64_t, `8`> SuccWeights;
1235	if (SuccHasWeights) {
1236	getBranchWeights(TI, Weights&: SuccWeights);
1237	// branch-weight metadata is inconsistent here.
1238	if (SuccWeights.size() != `1` + BBCases.size())
1239	PredHasWeights = SuccHasWeights = false;
1240	} else if (PredHasWeights)
1241	SuccWeights.assign(NumElts: `1` + BBCases.size(), Elt: `1`);
1242
1243	if (PredDefault == BB) {
1244	// If this is the default destination from PTI, only the edges in TI
1245	// that don't occur in PTI, or that branch to BB will be activated.
1246	std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1247	for (unsigned i = `0`, e = PredCases.size(); i != e; ++i)
1248	if (PredCases [i].Dest != BB)
1249	PTIHandled.insert(x: PredCases [i].Value);
1250	else {
1251	// The default destination is BB, we don't need explicit targets.
1252	std::swap(a&: PredCases [i], b&: PredCases.back());
1253
1254	if (PredHasWeights \|\| SuccHasWeights) {
1255	// Increase weight for the default case.
1256	Weights [`0`] += Weights [i + `1`];
1257	std::swap(a&: Weights [i + `1`], b&: Weights.back());
1258	Weights.pop_back();
1259	}
1260
1261	PredCases.pop_back();
1262	--i;
1263	--e;
1264	}
1265
1266	// Reconstruct the new switch statement we will be building.
1267	if (PredDefault != BBDefault) {
1268	PredDefault->removePredecessor(Pred);
1269	if (DTU && PredDefault != BB)
1270	Updates.push_back(Elt: {DominatorTree::Delete, Pred, PredDefault});
1271	PredDefault = BBDefault;
1272	++NewSuccessors [BBDefault];
1273	}
1274
1275	unsigned CasesFromPred = Weights.size();
1276	uint64_t ValidTotalSuccWeight = `0`;
1277	for (unsigned i = `0`, e = BBCases.size(); i != e; ++i)
1278	if (!PTIHandled.count(x: BBCases [i].Value) && BBCases [i].Dest != BBDefault) {
1279	PredCases.push_back(x: BBCases [i]);
1280	++NewSuccessors [BBCases [i].Dest];
1281	if (SuccHasWeights \|\| PredHasWeights) {
1282	// The default weight is at index 0, so weight for the ith case
1283	// should be at index i+1. Scale the cases from successor by
1284	// PredDefaultWeight (Weights[0]).
1285	Weights.push_back(Elt: Weights [`0`] * SuccWeights [i + `1`]);
1286	ValidTotalSuccWeight += SuccWeights [i + `1`];
1287	}
1288	}
1289
1290	if (SuccHasWeights \|\| PredHasWeights) {
1291	ValidTotalSuccWeight += SuccWeights [`0`];
1292	// Scale the cases from predecessor by ValidTotalSuccWeight.
1293	for (unsigned i = `1`; i < CasesFromPred; ++i)
1294	Weights [i] *= ValidTotalSuccWeight;
1295	// Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1296	Weights [`0`] *= SuccWeights [`0`];
1297	}
1298	} else {
1299	// If this is not the default destination from PSI, only the edges
1300	// in SI that occur in PSI with a destination of BB will be
1301	// activated.
1302	std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1303	std::map<ConstantInt *, uint64_t> WeightsForHandled;
1304	for (unsigned i = `0`, e = PredCases.size(); i != e; ++i)
1305	if (PredCases [i].Dest == BB) {
1306	PTIHandled.insert(x: PredCases [i].Value);
1307
1308	if (PredHasWeights \|\| SuccHasWeights) {
1309	WeightsForHandled [PredCases [i].Value] = Weights [i + `1`];
1310	std::swap(a&: Weights [i + `1`], b&: Weights.back());
1311	Weights.pop_back();
1312	}
1313
1314	std::swap(a&: PredCases [i], b&: PredCases.back());
1315	PredCases.pop_back();
1316	--i;
1317	--e;
1318	}
1319
1320	// Okay, now we know which constants were sent to BB from the
1321	// predecessor. Figure out where they will all go now.
1322	for (const ValueEqualityComparisonCase &Case : BBCases)
1323	if (PTIHandled.count(x: Case.Value)) {
1324	// If this is one we are capable of getting...
1325	if (PredHasWeights \|\| SuccHasWeights)
1326	Weights.push_back(Elt: WeightsForHandled [Case.Value]);
1327	PredCases.push_back(x: Case);
1328	++NewSuccessors [Case.Dest];
1329	PTIHandled.erase(x: Case.Value); // This constant is taken care of
1330	}
1331
1332	// If there are any constants vectored to BB that TI doesn't handle,
1333	// they must go to the default destination of TI.
1334	for (ConstantInt *I : PTIHandled) {
1335	if (PredHasWeights \|\| SuccHasWeights)
1336	Weights.push_back(Elt: WeightsForHandled [I]);
1337	PredCases.push_back(x: ValueEqualityComparisonCase (I, BBDefault));
1338	++NewSuccessors [BBDefault];
1339	}
1340	}
1341
1342	// Okay, at this point, we know which new successor Pred will get. Make
1343	// sure we update the number of entries in the PHI nodes for these
1344	// successors.
1345	SmallPtrSet<BasicBlock *, `2`> SuccsOfPred;
1346	if (DTU) {
1347	SuccsOfPred = {llvm::from_range, successors(BB: Pred)};
1348	Updates.reserve(N: Updates.size() + NewSuccessors.size());
1349	}
1350	for (const std::pair<BasicBlock , int* /Num/> &NewSuccessor :
1351	NewSuccessors) {
1352	for (auto I : seq(Size: NewSuccessor.second)) {
1353	(void)I;
1354	addPredecessorToBlock(Succ: NewSuccessor.first, NewPred: Pred, ExistPred: BB);
1355	}
1356	if (DTU && !SuccsOfPred.contains(Ptr: NewSuccessor.first))
1357	Updates.push_back(Elt: {DominatorTree::Insert, Pred, NewSuccessor.first});
1358	}
1359
1360	Builder.SetInsertPoint(PTI);
1361	// Convert pointer to int before we switch.
1362	if (CV->getType()->isPointerTy()) {
1363	CV =
1364	Builder.CreatePtrToInt(V: CV, DestTy: DL.getIntPtrType(CV->getType()), Name: "magicptr");
1365	}
1366
1367	// Now that the successors are updated, create the new Switch instruction.
1368	SwitchInst *NewSI = Builder.CreateSwitch(V: CV, Dest: PredDefault, NumCases: PredCases.size());
1369	NewSI->setDebugLoc(PTI->getDebugLoc());
1370	for (ValueEqualityComparisonCase &V : PredCases)
1371	NewSI->addCase(OnVal: V.Value, Dest: V.Dest);
1372
1373	if (PredHasWeights \|\| SuccHasWeights) {
1374	// Halve the weights if any of them cannot fit in an uint32_t
1375	fitWeights(Weights);
1376
1377	SmallVector<uint32_t, `8`> MDWeights(Weights.begin(), Weights.end());
1378
1379	setBranchWeights(SI: NewSI, Weights: MDWeights, /IsExpected=/false);
1380	}
1381
1382	eraseTerminatorAndDCECond(TI: PTI);
1383
1384	// Okay, last check. If BB is still a successor of PSI, then we must
1385	// have an infinite loop case. If so, add an infinitely looping block
1386	// to handle the case to preserve the behavior of the code.
1387	BasicBlock InfLoopBlock = nullptr*;
1388	for (unsigned i = `0`, e = NewSI->getNumSuccessors(); i != e; ++i)
1389	if (NewSI->getSuccessor(idx: i) == BB) {
1390	if (!InfLoopBlock) {
1391	// Insert it at the end of the function, because it's either code,
1392	// or it won't matter if it's hot. :)
1393	InfLoopBlock =
1394	BasicBlock::Create(Context&: BB->getContext(), Name: "infloop", Parent: BB->getParent());
1395	BranchInst::Create(IfTrue: InfLoopBlock, InsertBefore: InfLoopBlock);
1396	if (DTU)
1397	Updates.push_back(
1398	Elt: {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1399	}
1400	NewSI->setSuccessor(idx: i, NewSucc: InfLoopBlock);
1401	}
1402
1403	if (DTU) {
1404	if (InfLoopBlock)
1405	Updates.push_back(Elt: {DominatorTree::Insert, Pred, InfLoopBlock});
1406
1407	Updates.push_back(Elt: {DominatorTree::Delete, Pred, BB});
1408
1409	DTU->applyUpdates(Updates);
1410	}
1411
1412	++NumFoldValueComparisonIntoPredecessors;
1413	return true;
1414	}
1415
1416	/// The specified terminator is a value equality comparison instruction
1417	/// (either a switch or a branch on "X == c").
1418	/// See if any of the predecessors of the terminator block are value comparisons
1419	/// on the same value. If so, and if safe to do so, fold them together.
1420	bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1421	IRBuilder<> &Builder) {
1422	BasicBlock *BB = TI->getParent();
1423	Value CV = isValueEqualityComparison(TI); // CondVal*
1424	assert(CV && "Not a comparison?");
1425
1426	bool Changed = false;
1427
1428	SmallSetVector<BasicBlock *, `16`> Preds(pred_begin(BB), pred_end(BB));
1429	while (!Preds.empty()) {
1430	BasicBlock *Pred = Preds.pop_back_val();
1431	Instruction *PTI = Pred->getTerminator();
1432
1433	// Don't try to fold into itself.
1434	if (Pred == BB)
1435	continue;
1436
1437	// See if the predecessor is a comparison with the same value.
1438	Value PCV = isValueEqualityComparison(TI: PTI); // PredCondVal*
1439	if (PCV != CV)
1440	continue;
1441
1442	SmallSetVector<BasicBlock *, `4`> FailBlocks;
1443	if (!safeToMergeTerminators(SI1: TI, SI2: PTI, FailBlocks: &FailBlocks)) {
1444	for (auto *Succ : FailBlocks) {
1445	if (!SplitBlockPredecessors(BB: Succ, Preds: TI->getParent(), Suffix: ".fold.split", DTU))
1446	return false;
1447	}
1448	}
1449
1450	performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1451	Changed = true;
1452	}
1453	return Changed;
1454	}
1455
1456	// If we would need to insert a select that uses the value of this invoke
1457	// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1458	// need to do this), we can't hoist the invoke, as there is nowhere to put the
1459	// select in this case.
1460	static bool isSafeToHoistInvoke(BasicBlock BB1, BasicBlock BB2,
1461	Instruction I1, Instruction I2) {
1462	for (BasicBlock *Succ : successors(BB: BB1)) {
1463	for (const PHINode &PN : Succ->phis()) {
1464	Value *BB1V = PN.getIncomingValueForBlock(BB: BB1);
1465	Value *BB2V = PN.getIncomingValueForBlock(BB: BB2);
1466	if (BB1V != BB2V && (BB1V == I1 \|\| BB2V == I2)) {
1467	return false;
1468	}
1469	}
1470	}
1471	return true;
1472	}
1473
1474	// Get interesting characteristics of instructions that
1475	// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1476	// instructions can be reordered across.
1477	enum SkipFlags {
1478	SkipReadMem = `1`,
1479	SkipSideEffect = `2`,
1480	SkipImplicitControlFlow = `4`
1481	};
1482
1483	static unsigned skippedInstrFlags(Instruction *I) {
1484	unsigned Flags = `0`;
1485	if (I->mayReadFromMemory())
1486	Flags \|= SkipReadMem;
1487	// We can't arbitrarily move around allocas, e.g. moving allocas (especially
1488	// inalloca) across stacksave/stackrestore boundaries.
1489	if (I->mayHaveSideEffects() \|\| isa<AllocaInst>(Val: I))
1490	Flags \|= SkipSideEffect;
1491	if (!isGuaranteedToTransferExecutionToSuccessor(I))
1492	Flags \|= SkipImplicitControlFlow;
1493	return Flags;
1494	}
1495
1496	// Returns true if it is safe to reorder an instruction across preceding
1497	// instructions in a basic block.
1498	static bool isSafeToHoistInstr(Instruction I, unsigned* Flags) {
1499	// Don't reorder a store over a load.
1500	if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1501	return false;
1502
1503	// If we have seen an instruction with side effects, it's unsafe to reorder an
1504	// instruction which reads memory or itself has side effects.
1505	if ((Flags & SkipSideEffect) &&
1506	(I->mayReadFromMemory() \|\| I->mayHaveSideEffects() \|\| isa<AllocaInst>(Val: I)))
1507	return false;
1508
1509	// Reordering across an instruction which does not necessarily transfer
1510	// control to the next instruction is speculation.
1511	if ((Flags & SkipImplicitControlFlow) && !isSafeToSpeculativelyExecute(I))
1512	return false;
1513
1514	// Hoisting of llvm.deoptimize is only legal together with the next return
1515	// instruction, which this pass is not always able to do.
1516	if (auto *CB = dyn_cast<CallBase>(Val: I))
1517	if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1518	return false;
1519
1520	// It's also unsafe/illegal to hoist an instruction above its instruction
1521	// operands
1522	BasicBlock *BB = I->getParent();
1523	for (Value *Op : I->operands()) {
1524	if (auto *J = dyn_cast<Instruction>(Val: Op))
1525	if (J->getParent() == BB)
1526	return false;
1527	}
1528
1529	return true;
1530	}
1531
1532	static bool passingValueIsAlwaysUndefined(Value V, Instruction I, bool PtrValueMayBeModified = false);
1533
1534	/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1535	/// instructions \p I1 and \p I2 can and should be hoisted.
1536	static bool shouldHoistCommonInstructions(Instruction I1, Instruction I2,
1537	const TargetTransformInfo &TTI) {
1538	// If we're going to hoist a call, make sure that the two instructions
1539	// we're commoning/hoisting are both marked with musttail, or neither of
1540	// them is marked as such. Otherwise, we might end up in a situation where
1541	// we hoist from a block where the terminator is a `ret` to a block where
1542	// the terminator is a `br`, and `musttail` calls expect to be followed by
1543	// a return.
1544	auto *C1 = dyn_cast<CallInst>(Val: I1);
1545	auto *C2 = dyn_cast<CallInst>(Val: I2);
1546	if (C1 && C2)
1547	if (C1->isMustTailCall() != C2->isMustTailCall())
1548	return false;
1549
1550	if (!TTI.isProfitableToHoist(I: I1) \|\| !TTI.isProfitableToHoist(I: I2))
1551	return false;
1552
1553	// If any of the two call sites has nomerge or convergent attribute, stop
1554	// hoisting.
1555	if (const auto *CB1 = dyn_cast<CallBase>(Val: I1))
1556	if (CB1->cannotMerge() \|\| CB1->isConvergent())
1557	return false;
1558	if (const auto *CB2 = dyn_cast<CallBase>(Val: I2))
1559	if (CB2->cannotMerge() \|\| CB2->isConvergent())
1560	return false;
1561
1562	return true;
1563	}
1564
1565	/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1566	/// in lock-step to \p TI. This matches how dbg. intrinsics are hoisting in*
1567	/// hoistCommonCodeFromSuccessors. e.g. The input:
1568	/// I1 DVRs: { x, z },
1569	/// OtherInsts: { I2 DVRs: { x, y, z } }
1570	/// would result in hoisting only DbgVariableRecord x.
1571	static void hoistLockstepIdenticalDbgVariableRecords(
1572	Instruction TI, Instruction I1,
1573	SmallVectorImpl<Instruction *> &OtherInsts) {
1574	if (!I1->hasDbgRecords())
1575	return;
1576	using CurrentAndEndIt =
1577	std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1578	// Vector of {Current, End} iterators.
1579	SmallVector<CurrentAndEndIt> Itrs;
1580	Itrs.reserve(N: OtherInsts.size() + `1`);
1581	// Helper lambdas for lock-step checks:
1582	// Return true if this Current == End.
1583	auto atEnd = [](const CurrentAndEndIt &Pair) {
1584	return Pair.first == Pair.second;
1585	};
1586	// Return true if all Current are identical.
1587	auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1588	return all_of(Range: make_first_range(c: ArrayRef(Itrs).drop_front()),
1589	P: [&](DbgRecord::self_iterator I) {
1590	return Itrs [`0`].first ->isIdenticalToWhenDefined(R: *I);
1591	});
1592	};
1593
1594	// Collect the iterators.
1595	Itrs.push_back(
1596	Elt: {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1597	for (Instruction *Other : OtherInsts) {
1598	if (!Other->hasDbgRecords())
1599	return;
1600	Itrs.push_back(
1601	Elt: {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1602	}
1603
1604	// Iterate in lock-step until any of the DbgRecord lists are exausted. If
1605	// the lock-step DbgRecord are identical, hoist all of them to TI.
1606	// This replicates the dbg. intrinsic behaviour in*
1607	// hoistCommonCodeFromSuccessors.
1608	while (none_of(Range&: Itrs, P: atEnd)) {
1609	bool HoistDVRs = allIdentical (Itrs);
1610	for (CurrentAndEndIt &Pair : Itrs) {
1611	// Increment Current iterator now as we may be about to move the
1612	// DbgRecord.
1613	DbgRecord &DR = *Pair.first ++;
1614	if (HoistDVRs) {
1615	DR.removeFromParent();
1616	TI->getParent()->insertDbgRecordBefore(DR: &DR, Here: TI->getIterator());
1617	}
1618	}
1619	}
1620	}
1621
1622	static bool areIdenticalUpToCommutativity(const Instruction *I1,
1623	const Instruction *I2) {
1624	if (I1->isIdenticalToWhenDefined(I: I2, /IntersectAttrs=/true))
1625	return true;
1626
1627	if (auto *Cmp1 = dyn_cast<CmpInst>(Val: I1))
1628	if (auto *Cmp2 = dyn_cast<CmpInst>(Val: I2))
1629	return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1630	Cmp1->getOperand(i_nocapture: `0`) == Cmp2->getOperand(i_nocapture: `1`) &&
1631	Cmp1->getOperand(i_nocapture: `1`) == Cmp2->getOperand(i_nocapture: `0`);
1632
1633	if (I1->isCommutative() && I1->isSameOperationAs(I: I2)) {
1634	return I1->getOperand(i: `0`) == I2->getOperand(i: `1`) &&
1635	I1->getOperand(i: `1`) == I2->getOperand(i: `0`) &&
1636	equal(LRange: drop_begin(RangeOrContainer: I1->operands(), N: `2`), RRange: drop_begin(RangeOrContainer: I2->operands(), N: `2`));
1637	}
1638
1639	return false;
1640	}
1641
1642	/// If the target supports conditional faulting,
1643	/// we look for the following pattern:
1644	/// \code
1645	/// BB:
1646	/// ...
1647	/// %cond = icmp ult %x, %y
1648	/// br i1 %cond, label %TrueBB, label %FalseBB
1649	/// FalseBB:
1650	/// store i32 1, ptr %q, align 4
1651	/// ...
1652	/// TrueBB:
1653	/// %maskedloadstore = load i32, ptr %b, align 4
1654	/// store i32 %maskedloadstore, ptr %p, align 4
1655	/// ...
1656	/// \endcode
1657	///
1658	/// and transform it into:
1659	///
1660	/// \code
1661	/// BB:
1662	/// ...
1663	/// %cond = icmp ult %x, %y
1664	/// %maskedloadstore = cload i32, ptr %b, %cond
1665	/// cstore i32 %maskedloadstore, ptr %p, %cond
1666	/// cstore i32 1, ptr %q, ~%cond
1667	/// br i1 %cond, label %TrueBB, label %FalseBB
1668	/// FalseBB:
1669	/// ...
1670	/// TrueBB:
1671	/// ...
1672	/// \endcode
1673	///
1674	/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1675	/// e.g.
1676	///
1677	/// \code
1678	/// %vcond = bitcast i1 %cond to <1 x i1>
1679	/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1680	/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1681	/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1682	/// call void @llvm.masked.store.v1i32.p0
1683	/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1684	/// %cond.not = xor i1 %cond, true
1685	/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1686	/// call void @llvm.masked.store.v1i32.p0
1687	/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1688	/// \endcode
1689	///
1690	/// So we need to turn hoisted load/store into cload/cstore.
1691	///
1692	/// \param BI The branch instruction.
1693	/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1694	/// will be speculated.
1695	/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1696	static void hoistConditionalLoadsStores(
1697	BranchInst *BI,
1698	SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1699	std::optional<bool> Invert, Instruction *Sel) {
1700	auto &Context = BI->getParent()->getContext();
1701	auto *VCondTy = FixedVectorType::get(ElementType: Type::getInt1Ty(C&: Context), NumElts: `1`);
1702	auto *Cond = BI->getOperand(i_nocapture: `0`);
1703	// Construct the condition if needed.
1704	BasicBlock *BB = BI->getParent();
1705	Value Mask = nullptr*;
1706	Value MaskFalse = nullptr*;
1707	Value MaskTrue = nullptr*;
1708	if (Invert.has_value()) {
1709	IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1710	Mask = Builder.CreateBitCast(
1711	V: *Invert ? Builder.CreateXor(LHS: Cond, RHS: ConstantInt::getTrue(Context)) : Cond,
1712	DestTy: VCondTy);
1713	} else {
1714	IRBuilder<> Builder(BI);
1715	MaskFalse = Builder.CreateBitCast(
1716	V: Builder.CreateXor(LHS: Cond, RHS: ConstantInt::getTrue(Context)), DestTy: VCondTy);
1717	MaskTrue = Builder.CreateBitCast(V: Cond, DestTy: VCondTy);
1718	}
1719	auto PeekThroughBitcasts = [](Value *V) {
1720	while (auto *BitCast = dyn_cast<BitCastInst>(Val: V))
1721	V = BitCast->getOperand(i_nocapture: `0`);
1722	return V;
1723	};
1724	for (auto *I : SpeculatedConditionalLoadsStores) {
1725	IRBuilder<> Builder(Invert.has_value() ? I : BI);
1726	if (!Invert.has_value())
1727	Mask = I->getParent() == BI->getSuccessor(i: `0`) ? MaskTrue : MaskFalse;
1728	// We currently assume conditional faulting load/store is supported for
1729	// scalar types only when creating new instructions. This can be easily
1730	// extended for vector types in the future.
1731	assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1732	auto *Op0 = I->getOperand(i: `0`);
1733	CallInst MaskedLoadStore = nullptr*;
1734	if (auto *LI = dyn_cast<LoadInst>(Val: I)) {
1735	// Handle Load.
1736	auto *Ty = I->getType();
1737	PHINode PN = nullptr*;
1738	Value PassThru = nullptr*;
1739	if (Invert.has_value())
1740	for (User *U : I->users()) {
1741	if ((PN = dyn_cast<PHINode>(Val: U))) {
1742	PassThru = Builder.CreateBitCast(
1743	V: PeekThroughBitcasts (PN->getIncomingValueForBlock(BB)),
1744	DestTy: FixedVectorType::get(ElementType: Ty, NumElts: `1`));
1745	} else if (auto *Ins = cast<Instruction>(Val: U);
1746	Sel && Ins->getParent() == BB) {
1747	// This happens when store or/and a speculative instruction between
1748	// load and store were hoisted to the BB. Make sure the masked load
1749	// inserted before its use.
1750	// We assume there's one of such use.
1751	Builder.SetInsertPoint(Ins);
1752	}
1753	}
1754	MaskedLoadStore = Builder.CreateMaskedLoad(
1755	Ty: FixedVectorType::get(ElementType: Ty, NumElts: `1`), Ptr: Op0, Alignment: LI->getAlign(), Mask, PassThru);
1756	Value *NewLoadStore = Builder.CreateBitCast(V: MaskedLoadStore, DestTy: Ty);
1757	if (PN)
1758	PN->setIncomingValue(i: PN->getBasicBlockIndex(BB), V: NewLoadStore);
1759	I->replaceAllUsesWith(V: NewLoadStore);
1760	} else {
1761	// Handle Store.
1762	auto *StoredVal = Builder.CreateBitCast(
1763	V: PeekThroughBitcasts (Op0), DestTy: FixedVectorType::get(ElementType: Op0->getType(), NumElts: `1`));
1764	MaskedLoadStore = Builder.CreateMaskedStore(
1765	Val: StoredVal, Ptr: I->getOperand(i: `1`), Alignment: cast<StoreInst>(Val: I)->getAlign(), Mask);
1766	}
1767	// For non-debug metadata, only !annotation, !range, !nonnull and !align are
1768	// kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1769	//
1770	// !nonnull, !align : Not support pointer type, no need to keep.
1771	// !range: Load type is changed from scalar to vector, but the metadata on
1772	// vector specifies a per-element range, so the semantics stay the
1773	// same. Keep it.
1774	// !annotation: Not impact semantics. Keep it.
1775	if (const MDNode *Ranges = I->getMetadata(KindID: LLVMContext::MD_range))
1776	MaskedLoadStore->addRangeRetAttr(CR: getConstantRangeFromMetadata(RangeMD: *Ranges));
1777	I->dropUBImplyingAttrsAndUnknownMetadata(KnownIDs: {LLVMContext::MD_annotation});
1778	// FIXME: DIAssignID is not supported for masked store yet.
1779	// (Verifier::visitDIAssignIDMetadata)
1780	at::deleteAssignmentMarkers(Inst: I);
1781	I->eraseMetadataIf(Pred: [](unsigned MDKind, MDNode *Node) {
1782	return Node->getMetadataID() == Metadata::DIAssignIDKind;
1783	});
1784	MaskedLoadStore->copyMetadata(SrcInst: *I);
1785	I->eraseFromParent();
1786	}
1787	}
1788
1789	static bool isSafeCheapLoadStore(const Instruction *I,
1790	const TargetTransformInfo &TTI) {
1791	// Not handle volatile or atomic.
1792	bool IsStore = false;
1793	if (auto *L = dyn_cast<LoadInst>(Val: I)) {
1794	if (!L->isSimple() \|\| !HoistLoadsWithCondFaulting)
1795	return false;
1796	} else if (auto *S = dyn_cast<StoreInst>(Val: I)) {
1797	if (!S->isSimple() \|\| !HoistStoresWithCondFaulting)
1798	return false;
1799	IsStore = true;
1800	} else
1801	return false;
1802
1803	// llvm.masked.load/store use i32 for alignment while load/store use i64.
1804	// That's why we have the alignment limitation.
1805	// FIXME: Update the prototype of the intrinsics?
1806	return TTI.hasConditionalLoadStoreForType(Ty: getLoadStoreType(I), IsStore) &&
1807	getLoadStoreAlignment(I) < Value::MaximumAlignment;
1808	}
1809
1810	/// Hoist any common code in the successor blocks up into the block. This
1811	/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1812	/// given, only perform hoisting in case all successors blocks contain matching
1813	/// instructions only. In that case, all instructions can be hoisted and the
1814	/// original branch will be replaced and selects for PHIs are added.
1815	bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1816	bool AllInstsEqOnly) {
1817	// This does very trivial matching, with limited scanning, to find identical
1818	// instructions in the two blocks. In particular, we don't want to get into
1819	// O(N1N2...) situations here where Ni are the sizes of these successors. As
1820	// such, we currently just scan for obviously identical instructions in an
1821	// identical order, possibly separated by the same number of non-identical
1822	// instructions.
1823	BasicBlock *BB = TI->getParent();
1824	unsigned int SuccSize = succ_size(BB);
1825	if (SuccSize < `2`)
1826	return false;
1827
1828	// If either of the blocks has it's address taken, then we can't do this fold,
1829	// because the code we'd hoist would no longer run when we jump into the block
1830	// by it's address.
1831	for (auto *Succ : successors(BB))
1832	if (Succ->hasAddressTaken() \|\| !Succ->getSinglePredecessor())
1833	return false;
1834
1835	// The second of pair is a SkipFlags bitmask.
1836	using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1837	SmallVector<SuccIterPair, `8`> SuccIterPairs;
1838	for (auto *Succ : successors(BB)) {
1839	BasicBlock::iterator SuccItr = Succ->begin();
1840	if (isa<PHINode>(Val: *SuccItr))
1841	return false;
1842	SuccIterPairs.push_back(Elt: SuccIterPair (SuccItr, `0`));
1843	}
1844
1845	if (AllInstsEqOnly) {
1846	// Check if all instructions in the successor blocks match. This allows
1847	// hoisting all instructions and removing the blocks we are hoisting from,
1848	// so does not add any new instructions.
1849	SmallVector<BasicBlock *> Succs = to_vector(Range: successors(BB));
1850	// Check if sizes and terminators of all successors match.
1851	bool AllSame = none_of(Range&: Succs, P: [&Succs](BasicBlock *Succ) {
1852	Instruction *Term0 = Succs [`0`]->getTerminator();
1853	Instruction *Term = Succ->getTerminator();
1854	return !Term->isSameOperationAs(I: Term0) \|\|
1855	!equal(LRange: Term->operands(), RRange: Term0->operands()) \|\|
1856	Succs [`0`]->size() != Succ->size();
1857	});
1858	if (!AllSame)
1859	return false;
1860	if (AllSame) {
1861	LockstepReverseIterator<true> LRI(Succs);
1862	while (LRI.isValid()) {
1863	Instruction I0 = (LRI)[`0`];
1864	if (any_of(Range: LRI, P: [I0](Instruction I) {
1865	return !areIdenticalUpToCommutativity(I1: I0, I2: I);
1866	})) {
1867	return false;
1868	}
1869	--LRI;
1870	}
1871	}
1872	// Now we know that all instructions in all successors can be hoisted. Let
1873	// the loop below handle the hoisting.
1874	}
1875
1876	// Count how many instructions were not hoisted so far. There's a limit on how
1877	// many instructions we skip, serving as a compilation time control as well as
1878	// preventing excessive increase of life ranges.
1879	unsigned NumSkipped = `0`;
1880	// If we find an unreachable instruction at the beginning of a basic block, we
1881	// can still hoist instructions from the rest of the basic blocks.
1882	if (SuccIterPairs.size() > `2`) {
1883	erase_if(C&: SuccIterPairs,
1884	P: [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1885	if (SuccIterPairs.size() < `2`)
1886	return false;
1887	}
1888
1889	bool Changed = false;
1890
1891	for (;;) {
1892	auto *SuccIterPairBegin = SuccIterPairs.begin();
1893	auto &BB1ItrPair = *SuccIterPairBegin++;
1894	auto OtherSuccIterPairRange =
1895	iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1896	auto OtherSuccIterRange = make_first_range(c&: OtherSuccIterPairRange);
1897
1898	Instruction I1 = &BB1ItrPair.first;
1899
1900	bool AllInstsAreIdentical = true;
1901	bool HasTerminator = I1->isTerminator();
1902	for (auto &SuccIter : OtherSuccIterRange) {
1903	Instruction I2 = &SuccIter;
1904	HasTerminator \|= I2->isTerminator();
1905	if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) \|\|
1906	MMRAMetadata (I1) != MMRAMetadata (I2)))
1907	AllInstsAreIdentical = false;
1908	}
1909
1910	SmallVector<Instruction *, `8`> OtherInsts;
1911	for (auto &SuccIter : OtherSuccIterRange)
1912	OtherInsts.push_back(Elt: &*SuccIter);
1913
1914	// If we are hoisting the terminator instruction, don't move one (making a
1915	// broken BB), instead clone it, and remove BI.
1916	if (HasTerminator) {
1917	// Even if BB, which contains only one unreachable instruction, is ignored
1918	// at the beginning of the loop, we can hoist the terminator instruction.
1919	// If any instructions remain in the block, we cannot hoist terminators.
1920	if (NumSkipped \|\| !AllInstsAreIdentical) {
1921	hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1922	return Changed;
1923	}
1924
1925	return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherSuccTIs&: OtherInsts) \|\|
1926	Changed;
1927	}
1928
1929	if (AllInstsAreIdentical) {
1930	unsigned SkipFlagsBB1 = BB1ItrPair.second;
1931	AllInstsAreIdentical =
1932	isSafeToHoistInstr(I: I1, Flags: SkipFlagsBB1) &&
1933	all_of(Range&: OtherSuccIterPairRange, P: [=](const auto &Pair) {
1934	Instruction I2 = &Pair.first;
1935	unsigned SkipFlagsBB2 = Pair.second;
1936	// Even if the instructions are identical, it may not
1937	// be safe to hoist them if we have skipped over
1938	// instructions with side effects or their operands
1939	// weren't hoisted.
1940	return isSafeToHoistInstr(I: I2, Flags: SkipFlagsBB2) &&
1941	shouldHoistCommonInstructions(I1, I2, TTI);
1942	});
1943	}
1944
1945	if (AllInstsAreIdentical) {
1946	BB1ItrPair.first ++;
1947	// For a normal instruction, we just move one to right before the
1948	// branch, then replace all uses of the other with the first. Finally,
1949	// we remove the now redundant second instruction.
1950	hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1951	// We've just hoisted DbgVariableRecords; move I1 after them (before TI)
1952	// and leave any that were not hoisted behind (by calling moveBefore
1953	// rather than moveBeforePreserving).
1954	I1->moveBefore(InsertPos: TI->getIterator());
1955	for (auto &SuccIter : OtherSuccIterRange) {
1956	Instruction I2 = &SuccIter ++;
1957	assert(I2 != I1);
1958	if (!I2->use_empty())
1959	I2->replaceAllUsesWith(V: I1);
1960	I1->andIRFlags(V: I2);
1961	if (auto *CB = dyn_cast<CallBase>(Val: I1)) {
1962	bool Success = CB->tryIntersectAttributes(Other: cast<CallBase>(Val: I2));
1963	assert(Success && "We should not be trying to hoist callbases "
1964	"with non-intersectable attributes");
1965	// For NDEBUG Compile.
1966	(void)Success;
1967	}
1968
1969	combineMetadataForCSE(K: I1, J: I2, DoesKMove: true);
1970	// I1 and I2 are being combined into a single instruction. Its debug
1971	// location is the merged locations of the original instructions.
1972	I1->applyMergedLocation(LocA: I1->getDebugLoc(), LocB: I2->getDebugLoc());
1973	I2->eraseFromParent();
1974	}
1975	if (!Changed)
1976	NumHoistCommonCode += SuccIterPairs.size();
1977	Changed = true;
1978	NumHoistCommonInstrs += SuccIterPairs.size();
1979	} else {
1980	if (NumSkipped >= HoistCommonSkipLimit) {
1981	hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1982	return Changed;
1983	}
1984	// We are about to skip over a pair of non-identical instructions. Record
1985	// if any have characteristics that would prevent reordering instructions
1986	// across them.
1987	for (auto &SuccIterPair : SuccIterPairs) {
1988	Instruction I = &SuccIterPair.first ++;
1989	SuccIterPair.second \|= skippedInstrFlags(I);
1990	}
1991	++NumSkipped;
1992	}
1993	}
1994	}
1995
1996	bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
1997	Instruction TI, Instruction I1,
1998	SmallVectorImpl<Instruction *> &OtherSuccTIs) {
1999
2000	auto *BI = dyn_cast<BranchInst>(Val: TI);
2001
2002	bool Changed = false;
2003	BasicBlock *TIParent = TI->getParent();
2004	BasicBlock *BB1 = I1->getParent();
2005
2006	// Use only for an if statement.
2007	auto I2 = OtherSuccTIs.begin();
2008	auto *BB2 = I2->getParent();
2009	if (BI) {
2010	assert(OtherSuccTIs.size() == `1`);
2011	assert(BI->getSuccessor(`0`) == I1->getParent());
2012	assert(BI->getSuccessor(`1`) == I2->getParent());
2013	}
2014
2015	// In the case of an if statement, we try to hoist an invoke.
2016	// FIXME: Can we define a safety predicate for CallBr?
2017	// FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2018	// removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2019	if (isa<InvokeInst>(Val: I1) && (!BI \|\| !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2020	return false;
2021
2022	// TODO: callbr hoisting currently disabled pending further study.
2023	if (isa<CallBrInst>(Val: I1))
2024	return false;
2025
2026	for (BasicBlock *Succ : successors(BB: BB1)) {
2027	for (PHINode &PN : Succ->phis()) {
2028	Value *BB1V = PN.getIncomingValueForBlock(BB: BB1);
2029	for (Instruction *OtherSuccTI : OtherSuccTIs) {
2030	Value *BB2V = PN.getIncomingValueForBlock(BB: OtherSuccTI->getParent());
2031	if (BB1V == BB2V)
2032	continue;
2033
2034	// In the case of an if statement, check for
2035	// passingValueIsAlwaysUndefined here because we would rather eliminate
2036	// undefined control flow then converting it to a select.
2037	if (!BI \|\| passingValueIsAlwaysUndefined(V: BB1V, I: &PN) \|\|
2038	passingValueIsAlwaysUndefined(V: BB2V, I: &PN))
2039	return false;
2040	}
2041	}
2042	}
2043
2044	// Hoist DbgVariableRecords attached to the terminator to match dbg.*
2045	// intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2046	hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts&: OtherSuccTIs);
2047	// Clone the terminator and hoist it into the pred, without any debug info.
2048	Instruction *NT = I1->clone();
2049	NT->insertInto(ParentBB: TIParent, It: TI->getIterator());
2050	if (!NT->getType()->isVoidTy()) {
2051	I1->replaceAllUsesWith(V: NT);
2052	for (Instruction *OtherSuccTI : OtherSuccTIs)
2053	OtherSuccTI->replaceAllUsesWith(V: NT);
2054	NT->takeName(V: I1);
2055	}
2056	Changed = true;
2057	NumHoistCommonInstrs += OtherSuccTIs.size() + `1`;
2058
2059	// Ensure terminator gets a debug location, even an unknown one, in case
2060	// it involves inlinable calls.
2061	SmallVector<DebugLoc, `4`> Locs;
2062	Locs.push_back(Elt: I1->getDebugLoc());
2063	for (auto *OtherSuccTI : OtherSuccTIs)
2064	Locs.push_back(Elt: OtherSuccTI->getDebugLoc());
2065	NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2066
2067	// PHIs created below will adopt NT's merged DebugLoc.
2068	IRBuilder<NoFolder> Builder(NT);
2069
2070	// In the case of an if statement, hoisting one of the terminators from our
2071	// successor is a great thing. Unfortunately, the successors of the if/else
2072	// blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2073	// must agree for all PHI nodes, so we insert select instruction to compute
2074	// the final result.
2075	if (BI) {
2076	std::map<std::pair<Value , Value >, SelectInst *> InsertedSelects;
2077	for (BasicBlock *Succ : successors(BB: BB1)) {
2078	for (PHINode &PN : Succ->phis()) {
2079	Value *BB1V = PN.getIncomingValueForBlock(BB: BB1);
2080	Value *BB2V = PN.getIncomingValueForBlock(BB: BB2);
2081	if (BB1V == BB2V)
2082	continue;
2083
2084	// These values do not agree. Insert a select instruction before NT
2085	// that determines the right value.
2086	SelectInst *&SI = InsertedSelects [std::make_pair(x&: BB1V, y&: BB2V)];
2087	if (!SI) {
2088	// Propagate fast-math-flags from phi node to its replacement select.
2089	SI = cast<SelectInst>(Val: Builder.CreateSelectFMF(
2090	C: BI->getCondition(), True: BB1V, False: BB2V,
2091	FMFSource: isa<FPMathOperator>(Val: PN) ? &PN : nullptr,
2092	Name: BB1V->getName() + "." + BB2V->getName(), MDFrom: BI));
2093	}
2094
2095	// Make the PHI node use the select for all incoming values for BB1/BB2
2096	for (unsigned i = `0`, e = PN.getNumIncomingValues(); i != e; ++i)
2097	if (PN.getIncomingBlock(i) == BB1 \|\| PN.getIncomingBlock(i) == BB2)
2098	PN.setIncomingValue(i, V: SI);
2099	}
2100	}
2101	}
2102
2103	SmallVector<DominatorTree::UpdateType, `4`> Updates;
2104
2105	// Update any PHI nodes in our new successors.
2106	for (BasicBlock *Succ : successors(BB: BB1)) {
2107	addPredecessorToBlock(Succ, NewPred: TIParent, ExistPred: BB1);
2108	if (DTU)
2109	Updates.push_back(Elt: {DominatorTree::Insert, TIParent, Succ});
2110	}
2111
2112	if (DTU)
2113	for (BasicBlock *Succ : successors(I: TI))
2114	Updates.push_back(Elt: {DominatorTree::Delete, TIParent, Succ});
2115
2116	eraseTerminatorAndDCECond(TI);
2117	if (DTU)
2118	DTU->applyUpdates(Updates);
2119	return Changed;
2120	}
2121
2122	// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2123	// into variables.
2124	static bool replacingOperandWithVariableIsCheap(const Instruction *I,
2125	int OpIdx) {
2126	// Divide/Remainder by constant is typically much cheaper than by variable.
2127	if (I->isIntDivRem())
2128	return OpIdx != `1`;
2129	return !isa<IntrinsicInst>(Val: I);
2130	}
2131
2132	// All instructions in Insts belong to different blocks that all unconditionally
2133	// branch to a common successor. Analyze each instruction and return true if it
2134	// would be possible to sink them into their successor, creating one common
2135	// instruction instead. For every value that would be required to be provided by
2136	// PHI node (because an operand varies in each input block), add to PHIOperands.
2137	static bool canSinkInstructions(
2138	ArrayRef<Instruction *> Insts,
2139	DenseMap<const Use , SmallVector<Value , `4`>> &PHIOperands) {
2140	// Prune out obviously bad instructions to move. Each instruction must have
2141	// the same number of uses, and we check later that the uses are consistent.
2142	std::optional<unsigned> NumUses;
2143	for (auto *I : Insts) {
2144	// These instructions may change or break semantics if moved.
2145	if (isa<PHINode>(Val: I) \|\| I->isEHPad() \|\| isa<AllocaInst>(Val: I) \|\|
2146	I->getType()->isTokenTy())
2147	return false;
2148
2149	// Do not try to sink an instruction in an infinite loop - it can cause
2150	// this algorithm to infinite loop.
2151	if (I->getParent()->getSingleSuccessor() == I->getParent())
2152	return false;
2153
2154	// Conservatively return false if I is an inline-asm instruction. Sinking
2155	// and merging inline-asm instructions can potentially create arguments
2156	// that cannot satisfy the inline-asm constraints.
2157	// If the instruction has nomerge or convergent attribute, return false.
2158	if (const auto *C = dyn_cast<CallBase>(Val: I))
2159	if (C->isInlineAsm() \|\| C->cannotMerge() \|\| C->isConvergent())
2160	return false;
2161
2162	if (!NumUses)
2163	NumUses = I->getNumUses();
2164	else if (NumUses != I->getNumUses())
2165	return false;
2166	}
2167
2168	const Instruction *I0 = Insts.front();
2169	const auto I0MMRA = MMRAMetadata (*I0);
2170	for (auto *I : Insts) {
2171	if (!I->isSameOperationAs(I: I0, flags: Instruction::CompareUsingIntersectedAttrs))
2172	return false;
2173
2174	// Treat MMRAs conservatively. This pass can be quite aggressive and
2175	// could drop a lot of MMRAs otherwise.
2176	if (MMRAMetadata (*I) != I0MMRA)
2177	return false;
2178	}
2179
2180	// Uses must be consistent: If I0 is used in a phi node in the sink target,
2181	// then the other phi operands must match the instructions from Insts. This
2182	// also has to hold true for any phi nodes that would be created as a result
2183	// of sinking. Both of these cases are represented by PhiOperands.
2184	for (const Use &U : I0->uses()) {
2185	auto It = PHIOperands.find(Val: &U);
2186	if (It == PHIOperands.end())
2187	// There may be uses in other blocks when sinking into a loop header.
2188	return false;
2189	if (!equal(LRange&: Insts, RRange&: It ->second))
2190	return false;
2191	}
2192
2193	// For calls to be sinkable, they must all be indirect, or have same callee.
2194	// I.e. if we have two direct calls to different callees, we don't want to
2195	// turn that into an indirect call. Likewise, if we have an indirect call,
2196	// and a direct call, we don't actually want to have a single indirect call.
2197	if (isa<CallBase>(Val: I0)) {
2198	auto IsIndirectCall = [](const Instruction *I) {
2199	return cast<CallBase>(Val: I)->isIndirectCall();
2200	};
2201	bool HaveIndirectCalls = any_of(Range&: Insts, P: IsIndirectCall);
2202	bool AllCallsAreIndirect = all_of(Range&: Insts, P: IsIndirectCall);
2203	if (HaveIndirectCalls) {
2204	if (!AllCallsAreIndirect)
2205	return false;
2206	} else {
2207	// All callees must be identical.
2208	Value Callee = nullptr*;
2209	for (const Instruction *I : Insts) {
2210	Value *CurrCallee = cast<CallBase>(Val: I)->getCalledOperand();
2211	if (!Callee)
2212	Callee = CurrCallee;
2213	else if (Callee != CurrCallee)
2214	return false;
2215	}
2216	}
2217	}
2218
2219	for (unsigned OI = `0`, OE = I0->getNumOperands(); OI != OE; ++OI) {
2220	Value *Op = I0->getOperand(i: OI);
2221	if (Op->getType()->isTokenTy())
2222	// Don't touch any operand of token type.
2223	return false;
2224
2225	auto SameAsI0 = [&I0, OI](const Instruction *I) {
2226	assert(I->getNumOperands() == I0->getNumOperands());
2227	return I->getOperand(i: OI) == I0->getOperand(i: OI);
2228	};
2229	if (!all_of(Range&: Insts, P: SameAsI0)) {
2230	// SROA can't speculate lifetime markers of selects/phis, and the
2231	// backend may handle such lifetimes incorrectly as well (#104776).
2232	// Don't sink lifetimes if it would introduce a phi on the pointer
2233	// argument.
2234	if (isa<LifetimeIntrinsic>(Val: I0) && OI == `1` &&
2235	any_of(Range&: Insts, P: [](const Instruction *I) {
2236	return isa<AllocaInst>(Val: I->getOperand(i: `1`)->stripPointerCasts());
2237	}))
2238	return false;
2239
2240	if ((isa<Constant>(Val: Op) && !replacingOperandWithVariableIsCheap(I: I0, OpIdx: OI)) \|\|
2241	!canReplaceOperandWithVariable(I: I0, OpIdx: OI))
2242	// We can't create a PHI from this GEP.
2243	return false;
2244	auto &Ops = PHIOperands [&I0->getOperandUse(i: OI)];
2245	for (auto *I : Insts)
2246	Ops.push_back(Elt: I->getOperand(i: OI));
2247	}
2248	}
2249	return true;
2250	}
2251
2252	// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2253	// instruction of every block in Blocks to their common successor, commoning
2254	// into one instruction.
2255	static void sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
2256	auto *BBEnd = Blocks [`0`]->getTerminator()->getSuccessor(Idx: `0`);
2257
2258	// canSinkInstructions returning true guarantees that every block has at
2259	// least one non-terminator instruction.
2260	SmallVector<Instruction*,`4`> Insts;
2261	for (auto *BB : Blocks) {
2262	Instruction *I = BB->getTerminator();
2263	I = I->getPrevNode();
2264	Insts.push_back(Elt: I);
2265	}
2266
2267	// We don't need to do any more checking here; canSinkInstructions should
2268	// have done it all for us.
2269	SmallVector<Value*, `4`> NewOperands;
2270	Instruction *I0 = Insts.front();
2271	for (unsigned O = `0`, E = I0->getNumOperands(); O != E; ++O) {
2272	// This check is different to that in canSinkInstructions. There, we
2273	// cared about the global view once simplifycfg (and instcombine) have
2274	// completed - it takes into account PHIs that become trivially
2275	// simplifiable. However here we need a more local view; if an operand
2276	// differs we create a PHI and rely on instcombine to clean up the very
2277	// small mess we may make.
2278	bool NeedPHI = any_of(Range&: Insts, P: [&I0, O](const Instruction *I) {
2279	return I->getOperand(i: O) != I0->getOperand(i: O);
2280	});
2281	if (!NeedPHI) {
2282	NewOperands.push_back(Elt: I0->getOperand(i: O));
2283	continue;
2284	}
2285
2286	// Create a new PHI in the successor block and populate it.
2287	auto *Op = I0->getOperand(i: O);
2288	assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2289	auto *PN =
2290	PHINode::Create(Ty: Op->getType(), NumReservedValues: Insts.size(), NameStr: Op->getName() + ".sink");
2291	PN->insertBefore(InsertPos: BBEnd->begin());
2292	for (auto *I : Insts)
2293	PN->addIncoming(V: I->getOperand(i: O), BB: I->getParent());
2294	NewOperands.push_back(Elt: PN);
2295	}
2296
2297	// Arbitrarily use I0 as the new "common" instruction; remap its operands
2298	// and move it to the start of the successor block.
2299	for (unsigned O = `0`, E = I0->getNumOperands(); O != E; ++O)
2300	I0->getOperandUse(i: O).set(NewOperands [O]);
2301
2302	I0->moveBefore(BB&: *BBEnd, I: BBEnd->getFirstInsertionPt());
2303
2304	// Update metadata and IR flags, and merge debug locations.
2305	for (auto *I : Insts)
2306	if (I != I0) {
2307	// The debug location for the "common" instruction is the merged locations
2308	// of all the commoned instructions. We start with the original location
2309	// of the "common" instruction and iteratively merge each location in the
2310	// loop below.
2311	// This is an N-way merge, which will be inefficient if I0 is a CallInst.
2312	// However, as N-way merge for CallInst is rare, so we use simplified API
2313	// instead of using complex API for N-way merge.
2314	I0->applyMergedLocation(LocA: I0->getDebugLoc(), LocB: I->getDebugLoc());
2315	combineMetadataForCSE(K: I0, J: I, DoesKMove: true);
2316	I0->andIRFlags(V: I);
2317	if (auto *CB = dyn_cast<CallBase>(Val: I0)) {
2318	bool Success = CB->tryIntersectAttributes(Other: cast<CallBase>(Val: I));
2319	assert(Success && "We should not be trying to sink callbases "
2320	"with non-intersectable attributes");
2321	// For NDEBUG Compile.
2322	(void)Success;
2323	}
2324	}
2325
2326	for (User *U : make_early_inc_range(Range: I0->users())) {
2327	// canSinkLastInstruction checked that all instructions are only used by
2328	// phi nodes in a way that allows replacing the phi node with the common
2329	// instruction.
2330	auto *PN = cast<PHINode>(Val: U);
2331	PN->replaceAllUsesWith(V: I0);
2332	PN->eraseFromParent();
2333	}
2334
2335	// Finally nuke all instructions apart from the common instruction.
2336	for (auto *I : Insts) {
2337	if (I == I0)
2338	continue;
2339	// The remaining uses are debug users, replace those with the common inst.
2340	// In most (all?) cases this just introduces a use-before-def.
2341	assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2342	I->replaceAllUsesWith(V: I0);
2343	I->eraseFromParent();
2344	}
2345	}
2346
2347	/// Check whether BB's predecessors end with unconditional branches. If it is
2348	/// true, sink any common code from the predecessors to BB.
2349	static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
2350	DomTreeUpdater *DTU) {
2351	// We support two situations:
2352	// (1) all incoming arcs are unconditional
2353	// (2) there are non-unconditional incoming arcs
2354	//
2355	// (2) is very common in switch defaults and
2356	// else-if patterns;
2357	//
2358	// if (a) f(1);
2359	// else if (b) f(2);
2360	//
2361	// produces:
2362	//
2363	// [if]
2364	// / \
2365	// [f(1)] [if]
2366	// \| \| \
2367	// \| \| \|
2368	// \| [f(2)]\|
2369	// \ \| /
2370	// [ end ]
2371	//
2372	// [end] has two unconditional predecessor arcs and one conditional. The
2373	// conditional refers to the implicit empty 'else' arc. This conditional
2374	// arc can also be caused by an empty default block in a switch.
2375	//
2376	// In this case, we attempt to sink code from all unconditional* arcs.*
2377	// If we can sink instructions from these arcs (determined during the scan
2378	// phase below) we insert a common successor for all unconditional arcs and
2379	// connect that to [end], to enable sinking:
2380	//
2381	// [if]
2382	// / \
2383	// [x(1)] [if]
2384	// \| \| \
2385	// \| \| \
2386	// \| [x(2)] \|
2387	// \ / \|
2388	// [sink.split] \|
2389	// \ /
2390	// [ end ]
2391	//
2392	SmallVector<BasicBlock*,`4`> UnconditionalPreds;
2393	bool HaveNonUnconditionalPredecessors = false;
2394	for (auto *PredBB : predecessors(BB)) {
2395	auto *PredBr = dyn_cast<BranchInst>(Val: PredBB->getTerminator());
2396	if (PredBr && PredBr->isUnconditional())
2397	UnconditionalPreds.push_back(Elt: PredBB);
2398	else
2399	HaveNonUnconditionalPredecessors = true;
2400	}
2401	if (UnconditionalPreds.size() < `2`)
2402	return false;
2403
2404	// We take a two-step approach to tail sinking. First we scan from the end of
2405	// each block upwards in lockstep. If the n'th instruction from the end of each
2406	// block can be sunk, those instructions are added to ValuesToSink and we
2407	// carry on. If we can sink an instruction but need to PHI-merge some operands
2408	// (because they're not identical in each instruction) we add these to
2409	// PHIOperands.
2410	// We prepopulate PHIOperands with the phis that already exist in BB.
2411	DenseMap<const Use , SmallVector<Value , `4`>> PHIOperands;
2412	for (PHINode &PN : BB->phis()) {
2413	SmallDenseMap<BasicBlock , const* Use *, `4`> IncomingVals;
2414	for (const Use &U : PN.incoming_values())
2415	IncomingVals.insert(KV: {PN.getIncomingBlock(U), &U});
2416	auto &Ops = PHIOperands [IncomingVals [UnconditionalPreds [`0`]]];
2417	for (BasicBlock *Pred : UnconditionalPreds)
2418	Ops.push_back(Elt: *IncomingVals [Pred]);
2419	}
2420
2421	int ScanIdx = `0`;
2422	SmallPtrSet<Value*,`4`> InstructionsToSink;
2423	LockstepReverseIterator<true> LRI(UnconditionalPreds);
2424	while (LRI.isValid() &&
2425	canSinkInstructions(Insts: *LRI, PHIOperands)) {
2426	LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << (LRI)[`0`]
2427	<< "\n");
2428	InstructionsToSink.insert_range(R: *LRI);
2429	++ScanIdx;
2430	--LRI;
2431	}
2432
2433	// If no instructions can be sunk, early-return.
2434	if (ScanIdx == `0`)
2435	return false;
2436
2437	bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2438
2439	if (!followedByDeoptOrUnreachable) {
2440	// Check whether this is the pointer operand of a load/store.
2441	auto IsMemOperand = [](Use &U) {
2442	auto *I = cast<Instruction>(Val: U.getUser());
2443	if (isa<LoadInst>(Val: I))
2444	return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2445	if (isa<StoreInst>(Val: I))
2446	return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2447	return false;
2448	};
2449
2450	// Okay, we could* sink last ScanIdx instructions. But how many can we*
2451	// actually sink before encountering instruction that is unprofitable to
2452	// sink?
2453	auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2454	unsigned NumPHIInsts = `0`;
2455	for (Use &U : (*LRI)[`0`]->operands()) {
2456	auto It = PHIOperands.find(Val: &U);
2457	if (It != PHIOperands.end() && !all_of(Range&: It ->second, P: [&](Value *V) {
2458	return InstructionsToSink.contains(Ptr: V);
2459	})) {
2460	++NumPHIInsts;
2461	// Do not separate a load/store from the gep producing the address.
2462	// The gep can likely be folded into the load/store as an addressing
2463	// mode. Additionally, a load of a gep is easier to analyze than a
2464	// load of a phi.
2465	if (IsMemOperand (U) &&
2466	any_of(Range&: It ->second, P: [](Value V) { return* isa<GEPOperator>(Val: V); }))
2467	return false;
2468	// FIXME: this check is overly optimistic. We may end up not sinking
2469	// said instruction, due to the very same profitability check.
2470	// See @creating_too_many_phis in sink-common-code.ll.
2471	}
2472	}
2473	LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2474	return NumPHIInsts <= `1`;
2475	};
2476
2477	// We've determined that we are going to sink last ScanIdx instructions,
2478	// and recorded them in InstructionsToSink. Now, some instructions may be
2479	// unprofitable to sink. But that determination depends on the instructions
2480	// that we are going to sink.
2481
2482	// First, forward scan: find the first instruction unprofitable to sink,
2483	// recording all the ones that are profitable to sink.
2484	// FIXME: would it be better, after we detect that not all are profitable.
2485	// to either record the profitable ones, or erase the unprofitable ones?
2486	// Maybe we need to choose (at runtime) the one that will touch least
2487	// instrs?
2488	LRI.reset();
2489	int Idx = `0`;
2490	SmallPtrSet<Value *, `4`> InstructionsProfitableToSink;
2491	while (Idx < ScanIdx) {
2492	if (!ProfitableToSinkInstruction (LRI)) {
2493	// Too many PHIs would be created.
2494	LLVM_DEBUG(
2495	dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2496	break;
2497	}
2498	InstructionsProfitableToSink.insert_range(R: *LRI);
2499	--LRI;
2500	++Idx;
2501	}
2502
2503	// If no instructions can be sunk, early-return.
2504	if (Idx == `0`)
2505	return false;
2506
2507	// Did we determine that (only) some instructions are unprofitable to sink?
2508	if (Idx < ScanIdx) {
2509	// Okay, some instructions are unprofitable.
2510	ScanIdx = Idx;
2511	InstructionsToSink = InstructionsProfitableToSink;
2512
2513	// But, that may make other instructions unprofitable, too.
2514	// So, do a backward scan, do any earlier instructions become
2515	// unprofitable?
2516	assert(
2517	!ProfitableToSinkInstruction(LRI) &&
2518	"We already know that the last instruction is unprofitable to sink");
2519	++LRI;
2520	--Idx;
2521	while (Idx >= `0`) {
2522	// If we detect that an instruction becomes unprofitable to sink,
2523	// all earlier instructions won't be sunk either,
2524	// so preemptively keep InstructionsProfitableToSink in sync.
2525	// FIXME: is this the most performant approach?
2526	for (auto I : LRI)
2527	InstructionsProfitableToSink.erase(Ptr: I);
2528	if (!ProfitableToSinkInstruction (LRI)) {
2529	// Everything starting with this instruction won't be sunk.
2530	ScanIdx = Idx;
2531	InstructionsToSink = InstructionsProfitableToSink;
2532	}
2533	++LRI;
2534	--Idx;
2535	}
2536	}
2537
2538	// If no instructions can be sunk, early-return.
2539	if (ScanIdx == `0`)
2540	return false;
2541	}
2542
2543	bool Changed = false;
2544
2545	if (HaveNonUnconditionalPredecessors) {
2546	if (!followedByDeoptOrUnreachable) {
2547	// It is always legal to sink common instructions from unconditional
2548	// predecessors. However, if not all predecessors are unconditional,
2549	// this transformation might be pessimizing. So as a rule of thumb,
2550	// don't do it unless we'd sink at least one non-speculatable instruction.
2551	// See https://bugs.llvm.org/show_bug.cgi?id=30244
2552	LRI.reset();
2553	int Idx = `0`;
2554	bool Profitable = false;
2555	while (Idx < ScanIdx) {
2556	if (!isSafeToSpeculativelyExecute(I: (*LRI)[`0`])) {
2557	Profitable = true;
2558	break;
2559	}
2560	--LRI;
2561	++Idx;
2562	}
2563	if (!Profitable)
2564	return false;
2565	}
2566
2567	LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2568	// We have a conditional edge and we're going to sink some instructions.
2569	// Insert a new block postdominating all blocks we're going to sink from.
2570	if (!SplitBlockPredecessors(BB, Preds: UnconditionalPreds, Suffix: ".sink.split", DTU))
2571	// Edges couldn't be split.
2572	return false;
2573	Changed = true;
2574	}
2575
2576	// Now that we've analyzed all potential sinking candidates, perform the
2577	// actual sink. We iteratively sink the last non-terminator of the source
2578	// blocks into their common successor unless doing so would require too
2579	// many PHI instructions to be generated (currently only one PHI is allowed
2580	// per sunk instruction).
2581	//
2582	// We can use InstructionsToSink to discount values needing PHI-merging that will
2583	// actually be sunk in a later iteration. This allows us to be more
2584	// aggressive in what we sink. This does allow a false positive where we
2585	// sink presuming a later value will also be sunk, but stop half way through
2586	// and never actually sink it which means we produce more PHIs than intended.
2587	// This is unlikely in practice though.
2588	int SinkIdx = `0`;
2589	for (; SinkIdx != ScanIdx; ++SinkIdx) {
2590	LLVM_DEBUG(dbgs() << "SINK: Sink: "
2591	<< *UnconditionalPreds[`0`]->getTerminator()->getPrevNode()
2592	<< "\n");
2593
2594	// Because we've sunk every instruction in turn, the current instruction to
2595	// sink is always at index 0.
2596	LRI.reset();
2597
2598	sinkLastInstruction(Blocks: UnconditionalPreds);
2599	NumSinkCommonInstrs ++;
2600	Changed = true;
2601	}
2602	if (SinkIdx != `0`)
2603	++NumSinkCommonCode;
2604	return Changed;
2605	}
2606
2607	namespace {
2608
2609	struct CompatibleSets {
2610	using SetTy = SmallVector<InvokeInst *, `2`>;
2611
2612	SmallVector<SetTy, `1`> Sets;
2613
2614	static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2615
2616	SetTy &getCompatibleSet(InvokeInst *II);
2617
2618	void insert(InvokeInst *II);
2619	};
2620
2621	CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2622	// Perform a linear scan over all the existing sets, see if the new `invoke`
2623	// is compatible with any particular set. Since we know that all the `invokes`
2624	// within a set are compatible, only check the first `invoke` in each set.
2625	// WARNING: at worst, this has quadratic complexity.
2626	for (CompatibleSets::SetTy &Set : Sets) {
2627	if (CompatibleSets::shouldBelongToSameSet(Invokes: {Set.front(), II}))
2628	return Set;
2629	}
2630
2631	// Otherwise, we either had no sets yet, or this invoke forms a new set.
2632	return Sets.emplace_back();
2633	}
2634
2635	void CompatibleSets::insert(InvokeInst *II) {
2636	getCompatibleSet(II).emplace_back(Args&: II);
2637	}
2638
2639	bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2640	assert(Invokes.size() == `2` && "Always called with exactly two candidates.");
2641
2642	// Can we theoretically merge these `invoke`s?
2643	auto IsIllegalToMerge = [](InvokeInst *II) {
2644	return II->cannotMerge() \|\| II->isInlineAsm();
2645	};
2646	if (any_of(Range&: Invokes, P: IsIllegalToMerge))
2647	return false;
2648
2649	// Either both `invoke`s must be direct,
2650	// or both `invoke`s must be indirect.
2651	auto IsIndirectCall = [](InvokeInst II) { return* II->isIndirectCall(); };
2652	bool HaveIndirectCalls = any_of(Range&: Invokes, P: IsIndirectCall);
2653	bool AllCallsAreIndirect = all_of(Range&: Invokes, P: IsIndirectCall);
2654	if (HaveIndirectCalls) {
2655	if (!AllCallsAreIndirect)
2656	return false;
2657	} else {
2658	// All callees must be identical.
2659	Value Callee = nullptr*;
2660	for (InvokeInst *II : Invokes) {
2661	Value *CurrCallee = II->getCalledOperand();
2662	assert(CurrCallee && "There is always a called operand.");
2663	if (!Callee)
2664	Callee = CurrCallee;
2665	else if (Callee != CurrCallee)
2666	return false;
2667	}
2668	}
2669
2670	// Either both `invoke`s must not have a normal destination,
2671	// or both `invoke`s must have a normal destination,
2672	auto HasNormalDest = [](InvokeInst *II) {
2673	return !isa<UnreachableInst>(Val: II->getNormalDest()->getFirstNonPHIOrDbg());
2674	};
2675	if (any_of(Range&: Invokes, P: HasNormalDest)) {
2676	// Do not merge `invoke` that does not have a normal destination with one
2677	// that does have a normal destination, even though doing so would be legal.
2678	if (!all_of(Range&: Invokes, P: HasNormalDest))
2679	return false;
2680
2681	// All normal destinations must be identical.
2682	BasicBlock NormalBB = nullptr*;
2683	for (InvokeInst *II : Invokes) {
2684	BasicBlock *CurrNormalBB = II->getNormalDest();
2685	assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2686	if (!NormalBB)
2687	NormalBB = CurrNormalBB;
2688	else if (NormalBB != CurrNormalBB)
2689	return false;
2690	}
2691
2692	// In the normal destination, the incoming values for these two `invoke`s
2693	// must be compatible.
2694	SmallPtrSet<Value *, `16`> EquivalenceSet(llvm::from_range, Invokes);
2695	if (!incomingValuesAreCompatible(
2696	BB: NormalBB, IncomingBlocks: {Invokes [`0`]->getParent(), Invokes [`1`]->getParent()},
2697	EquivalenceSet: &EquivalenceSet))
2698	return false;
2699	}
2700
2701	#ifndef NDEBUG
2702	// All unwind destinations must be identical.
2703	// We know that because we have started from said unwind destination.
2704	BasicBlock UnwindBB = nullptr*;
2705	for (InvokeInst *II : Invokes) {
2706	BasicBlock *CurrUnwindBB = II->getUnwindDest();
2707	assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2708	if (!UnwindBB)
2709	UnwindBB = CurrUnwindBB;
2710	else
2711	assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2712	}
2713	#endif
2714
2715	// In the unwind destination, the incoming values for these two `invoke`s
2716	// must be compatible.
2717	if (!incomingValuesAreCompatible(
2718	BB: Invokes.front()->getUnwindDest(),
2719	IncomingBlocks: {Invokes [`0`]->getParent(), Invokes [`1`]->getParent()}))
2720	return false;
2721
2722	// Ignoring arguments, these `invoke`s must be identical,
2723	// including operand bundles.
2724	const InvokeInst *II0 = Invokes.front();
2725	for (auto *II : Invokes.drop_front())
2726	if (!II->isSameOperationAs(I: II0, flags: Instruction::CompareUsingIntersectedAttrs))
2727	return false;
2728
2729	// Can we theoretically form the data operands for the merged `invoke`?
2730	auto IsIllegalToMergeArguments = [](auto Ops) {
2731	Use &U0 = std::get<`0`>(Ops);
2732	Use &U1 = std::get<`1`>(Ops);
2733	if (U0 == U1)
2734	return false;
2735	return U0 ->getType()->isTokenTy() \|\|
2736	!canReplaceOperandWithVariable(I: cast<Instruction>(Val: U0.getUser()),
2737	OpIdx: U0.getOperandNo());
2738	};
2739	assert(Invokes.size() == `2` && "Always called with exactly two candidates.");
2740	if (any_of(Range: zip(t: Invokes [`0`]->data_ops(), u: Invokes [`1`]->data_ops()),
2741	P: IsIllegalToMergeArguments))
2742	return false;
2743
2744	return true;
2745	}
2746
2747	} // namespace
2748
2749	// Merge all invokes in the provided set, all of which are compatible
2750	// as per the `CompatibleSets::shouldBelongToSameSet()`.
2751	static void mergeCompatibleInvokesImpl(ArrayRef<InvokeInst *> Invokes,
2752	DomTreeUpdater *DTU) {
2753	assert(Invokes.size() >= `2` && "Must have at least two invokes to merge.");
2754
2755	SmallVector<DominatorTree::UpdateType, `8`> Updates;
2756	if (DTU)
2757	Updates.reserve(N: `2` + `3` * Invokes.size());
2758
2759	bool HasNormalDest =
2760	!isa<UnreachableInst>(Val: Invokes [`0`]->getNormalDest()->getFirstNonPHIOrDbg());
2761
2762	// Clone one of the invokes into a new basic block.
2763	// Since they are all compatible, it doesn't matter which invoke is cloned.
2764	InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2765	InvokeInst *II0 = Invokes.front();
2766	BasicBlock *II0BB = II0->getParent();
2767	BasicBlock *InsertBeforeBlock =
2768	II0->getParent()->getIterator()->getNextNode();
2769	Function *Func = II0BB->getParent();
2770	LLVMContext &Ctx = II0->getContext();
2771
2772	BasicBlock *MergedInvokeBB = BasicBlock::Create(
2773	Context&: Ctx, Name: II0BB->getName() + ".invoke", Parent: Func, InsertBefore: InsertBeforeBlock);
2774
2775	auto *MergedInvoke = cast<InvokeInst>(Val: II0->clone());
2776	// NOTE: all invokes have the same attributes, so no handling needed.
2777	MergedInvoke->insertInto(ParentBB: MergedInvokeBB, It: MergedInvokeBB->end());
2778
2779	if (!HasNormalDest) {
2780	// This set does not have a normal destination,
2781	// so just form a new block with unreachable terminator.
2782	BasicBlock *MergedNormalDest = BasicBlock::Create(
2783	Context&: Ctx, Name: II0BB->getName() + ".cont", Parent: Func, InsertBefore: InsertBeforeBlock);
2784	auto UI = new* UnreachableInst (Ctx, MergedNormalDest);
2785	UI->setDebugLoc(DebugLoc::getTemporary());
2786	MergedInvoke->setNormalDest(MergedNormalDest);
2787	}
2788
2789	// The unwind destination, however, remainds identical for all invokes here.
2790
2791	return MergedInvoke;
2792	}();
2793
2794	if (DTU) {
2795	// Predecessor blocks that contained these invokes will now branch to
2796	// the new block that contains the merged invoke, ...
2797	for (InvokeInst *II : Invokes)
2798	Updates.push_back(
2799	Elt: {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2800
2801	// ... which has the new `unreachable` block as normal destination,
2802	// or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2803	for (BasicBlock *SuccBBOfMergedInvoke : successors(I: MergedInvoke))
2804	Updates.push_back(Elt: {DominatorTree::Insert, MergedInvoke->getParent(),
2805	SuccBBOfMergedInvoke});
2806
2807	// Since predecessor blocks now unconditionally branch to a new block,
2808	// they no longer branch to their original successors.
2809	for (InvokeInst *II : Invokes)
2810	for (BasicBlock *SuccOfPredBB : successors(BB: II->getParent()))
2811	Updates.push_back(
2812	Elt: {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2813	}
2814
2815	bool IsIndirectCall = Invokes [`0`]->isIndirectCall();
2816
2817	// Form the merged operands for the merged invoke.
2818	for (Use &U : MergedInvoke->operands()) {
2819	// Only PHI together the indirect callees and data operands.
2820	if (MergedInvoke->isCallee(U: &U)) {
2821	if (!IsIndirectCall)
2822	continue;
2823	} else if (!MergedInvoke->isDataOperand(U: &U))
2824	continue;
2825
2826	// Don't create trivial PHI's with all-identical incoming values.
2827	bool NeedPHI = any_of(Range&: Invokes, P: [&U](InvokeInst *II) {
2828	return II->getOperand(i_nocapture: U.getOperandNo()) != U.get();
2829	});
2830	if (!NeedPHI)
2831	continue;
2832
2833	// Form a PHI out of all the data ops under this index.
2834	PHINode *PN = PHINode::Create(
2835	Ty: U ->getType(), /NumReservedValues=/Invokes.size(), NameStr: "", InsertBefore: MergedInvoke->getIterator());
2836	for (InvokeInst *II : Invokes)
2837	PN->addIncoming(V: II->getOperand(i_nocapture: U.getOperandNo()), BB: II->getParent());
2838
2839	U.set(PN);
2840	}
2841
2842	// We've ensured that each PHI node has compatible (identical) incoming values
2843	// when coming from each of the `invoke`s in the current merge set,
2844	// so update the PHI nodes accordingly.
2845	for (BasicBlock *Succ : successors(I: MergedInvoke))
2846	addPredecessorToBlock(Succ, /NewPred=/MergedInvoke->getParent(),
2847	/ExistPred=/Invokes.front()->getParent());
2848
2849	// And finally, replace the original `invoke`s with an unconditional branch
2850	// to the block with the merged `invoke`. Also, give that merged `invoke`
2851	// the merged debugloc of all the original `invoke`s.
2852	DILocation MergedDebugLoc = nullptr*;
2853	for (InvokeInst *II : Invokes) {
2854	// Compute the debug location common to all the original `invoke`s.
2855	if (!MergedDebugLoc)
2856	MergedDebugLoc = II->getDebugLoc();
2857	else
2858	MergedDebugLoc =
2859	DebugLoc::getMergedLocation(LocA: MergedDebugLoc, LocB: II->getDebugLoc());
2860
2861	// And replace the old `invoke` with an unconditionally branch
2862	// to the block with the merged `invoke`.
2863	for (BasicBlock *OrigSuccBB : successors(BB: II->getParent()))
2864	OrigSuccBB->removePredecessor(Pred: II->getParent());
2865	auto *BI = BranchInst::Create(IfTrue: MergedInvoke->getParent(), InsertBefore: II->getParent());
2866	// The unconditional branch is part of the replacement for the original
2867	// invoke, so should use its DebugLoc.
2868	BI->setDebugLoc(II->getDebugLoc());
2869	bool Success = MergedInvoke->tryIntersectAttributes(Other: II);
2870	assert(Success && "Merged invokes with incompatible attributes");
2871	// For NDEBUG Compile
2872	(void)Success;
2873	II->replaceAllUsesWith(V: MergedInvoke);
2874	II->eraseFromParent();
2875	++NumInvokesMerged;
2876	}
2877	MergedInvoke->setDebugLoc(MergedDebugLoc);
2878	++NumInvokeSetsFormed;
2879
2880	if (DTU)
2881	DTU->applyUpdates(Updates);
2882	}
2883
2884	/// If this block is a `landingpad` exception handling block, categorize all
2885	/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2886	/// being "mergeable" together, and then merge invokes in each set together.
2887	///
2888	/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2889	/// [...] [...]
2890	/// \| \|
2891	/// [invoke0] [invoke1]
2892	/// / \ / \
2893	/// [cont0] [landingpad] [cont1]
2894	/// to:
2895	/// [...] [...]
2896	/// \ /
2897	/// [invoke]
2898	/// / \
2899	/// [cont] [landingpad]
2900	///
2901	/// But of course we can only do that if the invokes share the `landingpad`,
2902	/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2903	/// and the invoked functions are "compatible".
2904	static bool mergeCompatibleInvokes(BasicBlock BB, DomTreeUpdater DTU) {
2905	if (!EnableMergeCompatibleInvokes)
2906	return false;
2907
2908	bool Changed = false;
2909
2910	// FIXME: generalize to all exception handling blocks?
2911	if (!BB->isLandingPad())
2912	return Changed;
2913
2914	CompatibleSets Grouper;
2915
2916	// Record all the predecessors of this `landingpad`. As per verifier,
2917	// the only allowed predecessor is the unwind edge of an `invoke`.
2918	// We want to group "compatible" `invokes` into the same set to be merged.
2919	for (BasicBlock *PredBB : predecessors(BB))
2920	Grouper.insert(II: cast<InvokeInst>(Val: PredBB->getTerminator()));
2921
2922	// And now, merge `invoke`s that were grouped togeter.
2923	for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2924	if (Invokes.size() < `2`)
2925	continue;
2926	Changed = true;
2927	mergeCompatibleInvokesImpl(Invokes, DTU);
2928	}
2929
2930	return Changed;
2931	}
2932
2933	namespace {
2934	/// Track ephemeral values, which should be ignored for cost-modelling
2935	/// purposes. Requires walking instructions in reverse order.
2936	class EphemeralValueTracker {
2937	SmallPtrSet<const Instruction *, `32`> EphValues;
2938
2939	bool isEphemeral(const Instruction *I) {
2940	if (isa<AssumeInst>(Val: I))
2941	return true;
2942	return !I->mayHaveSideEffects() && !I->isTerminator() &&
2943	all_of(Range: I->users(), P: [&](const User *U) {
2944	return EphValues.count(Ptr: cast<Instruction>(Val: U));
2945	});
2946	}
2947
2948	public:
2949	bool track(const Instruction *I) {
2950	if (isEphemeral(I)) {
2951	EphValues.insert(Ptr: I);
2952	return true;
2953	}
2954	return false;
2955	}
2956
2957	bool contains(const Instruction I) const* { return EphValues.contains(Ptr: I); }
2958	};
2959	} // namespace
2960
2961	/// Determine if we can hoist sink a sole store instruction out of a
2962	/// conditional block.
2963	///
2964	/// We are looking for code like the following:
2965	/// BrBB:
2966	/// store i32 %add, i32 %arrayidx2*
2967	/// ... // No other stores or function calls (we could be calling a memory
2968	/// ... // function).
2969	/// %cmp = icmp ult %x, %y
2970	/// br i1 %cmp, label %EndBB, label %ThenBB
2971	/// ThenBB:
2972	/// store i32 %add5, i32 %arrayidx2*
2973	/// br label EndBB
2974	/// EndBB:
2975	/// ...
2976	/// We are going to transform this into:
2977	/// BrBB:
2978	/// store i32 %add, i32 %arrayidx2*
2979	/// ... //
2980	/// %cmp = icmp ult %x, %y
2981	/// %add.add5 = select i1 %cmp, i32 %add, %add5
2982	/// store i32 %add.add5, i32 %arrayidx2*
2983	/// ...
2984	///
2985	/// \return The pointer to the value of the previous store if the store can be
2986	/// hoisted into the predecessor block. 0 otherwise.
2987	static Value isSafeToSpeculateStore(Instruction I, BasicBlock *BrBB,
2988	BasicBlock StoreBB, BasicBlock EndBB) {
2989	StoreInst *StoreToHoist = dyn_cast<StoreInst>(Val: I);
2990	if (!StoreToHoist)
2991	return nullptr;
2992
2993	// Volatile or atomic.
2994	if (!StoreToHoist->isSimple())
2995	return nullptr;
2996
2997	Value *StorePtr = StoreToHoist->getPointerOperand();
2998	Type *StoreTy = StoreToHoist->getValueOperand()->getType();
2999
3000	// Look for a store to the same pointer in BrBB.
3001	unsigned MaxNumInstToLookAt = `9`;
3002	// Skip pseudo probe intrinsic calls which are not really killing any memory
3003	// accesses.
3004	for (Instruction &CurI : reverse(C: BrBB->instructionsWithoutDebug(SkipPseudoOp: true))) {
3005	if (!MaxNumInstToLookAt)
3006	break;
3007	--MaxNumInstToLookAt;
3008
3009	// Could be calling an instruction that affects memory like free().
3010	if (CurI.mayWriteToMemory() && !isa<StoreInst>(Val: CurI))
3011	return nullptr;
3012
3013	if (auto *SI = dyn_cast<StoreInst>(Val: &CurI)) {
3014	// Found the previous store to same location and type. Make sure it is
3015	// simple, to avoid introducing a spurious non-atomic write after an
3016	// atomic write.
3017	if (SI->getPointerOperand() == StorePtr &&
3018	SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3019	SI->getAlign() >= StoreToHoist->getAlign())
3020	// Found the previous store, return its value operand.
3021	return SI->getValueOperand();
3022	return nullptr; // Unknown store.
3023	}
3024
3025	if (auto *LI = dyn_cast<LoadInst>(Val: &CurI)) {
3026	if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3027	LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3028	Value *Obj = getUnderlyingObject(V: StorePtr);
3029	bool ExplicitlyDereferenceableOnly;
3030	if (isWritableObject(Object: Obj, ExplicitlyDereferenceableOnly) &&
3031	capturesNothing(
3032	CC: PointerMayBeCaptured(V: Obj, /ReturnCaptures=/false,
3033	Mask: CaptureComponents::Provenance)) &&
3034	(!ExplicitlyDereferenceableOnly \|\|
3035	isDereferenceablePointer(V: StorePtr, Ty: StoreTy,
3036	DL: LI->getDataLayout()))) {
3037	// Found a previous load, return it.
3038	return LI;
3039	}
3040	}
3041	// The load didn't work out, but we may still find a store.
3042	}
3043	}
3044
3045	return nullptr;
3046	}
3047
3048	/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3049	/// converted to selects.
3050	static bool validateAndCostRequiredSelects(BasicBlock BB, BasicBlock ThenBB,
3051	BasicBlock *EndBB,
3052	unsigned &SpeculatedInstructions,
3053	InstructionCost &Cost,
3054	const TargetTransformInfo &TTI) {
3055	TargetTransformInfo::TargetCostKind CostKind =
3056	BB->getParent()->hasMinSize()
3057	? TargetTransformInfo::TCK_CodeSize
3058	: TargetTransformInfo::TCK_SizeAndLatency;
3059
3060	bool HaveRewritablePHIs = false;
3061	for (PHINode &PN : EndBB->phis()) {
3062	Value *OrigV = PN.getIncomingValueForBlock(BB);
3063	Value *ThenV = PN.getIncomingValueForBlock(BB: ThenBB);
3064
3065	// FIXME: Try to remove some of the duplication with
3066	// hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3067	if (ThenV == OrigV)
3068	continue;
3069
3070	Cost += TTI.getCmpSelInstrCost(Opcode: Instruction::Select, ValTy: PN.getType(),
3071	CondTy: CmpInst::makeCmpResultType(opnd_type: PN.getType()),
3072	VecPred: CmpInst::BAD_ICMP_PREDICATE, CostKind);
3073
3074	// Don't convert to selects if we could remove undefined behavior instead.
3075	if (passingValueIsAlwaysUndefined(V: OrigV, I: &PN) \|\|
3076	passingValueIsAlwaysUndefined(V: ThenV, I: &PN))
3077	return false;
3078
3079	HaveRewritablePHIs = true;
3080	ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(Val: OrigV);
3081	ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(Val: ThenV);
3082	if (!OrigCE && !ThenCE)
3083	continue; // Known cheap (FIXME: Maybe not true for aggregates).
3084
3085	InstructionCost OrigCost = OrigCE ? computeSpeculationCost(I: OrigCE, TTI) : `0`;
3086	InstructionCost ThenCost = ThenCE ? computeSpeculationCost(I: ThenCE, TTI) : `0`;
3087	InstructionCost MaxCost =
3088	`2` * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3089	if (OrigCost + ThenCost > MaxCost)
3090	return false;
3091
3092	// Account for the cost of an unfolded ConstantExpr which could end up
3093	// getting expanded into Instructions.
3094	// FIXME: This doesn't account for how many operations are combined in the
3095	// constant expression.
3096	++SpeculatedInstructions;
3097	if (SpeculatedInstructions > `1`)
3098	return false;
3099	}
3100
3101	return HaveRewritablePHIs;
3102	}
3103
3104	static bool isProfitableToSpeculate(const BranchInst *BI,
3105	std::optional<bool> Invert,
3106	const TargetTransformInfo &TTI) {
3107	// If the branch is non-unpredictable, and is predicted to not* branch to*
3108	// the `then` block, then avoid speculating it.
3109	if (BI->getMetadata(KindID: LLVMContext::MD_unpredictable))
3110	return true;
3111
3112	uint64_t TWeight, FWeight;
3113	if (!extractBranchWeights(I: *BI, TrueVal&: TWeight, FalseVal&: FWeight) \|\| (TWeight + FWeight) == `0`)
3114	return true;
3115
3116	if (!Invert.has_value())
3117	return false;
3118
3119	uint64_t EndWeight = *Invert ? TWeight : FWeight;
3120	BranchProbability BIEndProb =
3121	BranchProbability::getBranchProbability(Numerator: EndWeight, Denominator: TWeight + FWeight);
3122	BranchProbability Likely = TTI.getPredictableBranchThreshold();
3123	return BIEndProb < Likely;
3124	}
3125
3126	/// Speculate a conditional basic block flattening the CFG.
3127	///
3128	/// Note that this is a very risky transform currently. Speculating
3129	/// instructions like this is most often not desirable. Instead, there is an MI
3130	/// pass which can do it with full awareness of the resource constraints.
3131	/// However, some cases are "obvious" and we should do directly. An example of
3132	/// this is speculating a single, reasonably cheap instruction.
3133	///
3134	/// There is only one distinct advantage to flattening the CFG at the IR level:
3135	/// it makes very common but simplistic optimizations such as are common in
3136	/// instcombine and the DAG combiner more powerful by removing CFG edges and
3137	/// modeling their effects with easier to reason about SSA value graphs.
3138	///
3139	///
3140	/// An illustration of this transform is turning this IR:
3141	/// \code
3142	/// BB:
3143	/// %cmp = icmp ult %x, %y
3144	/// br i1 %cmp, label %EndBB, label %ThenBB
3145	/// ThenBB:
3146	/// %sub = sub %x, %y
3147	/// br label BB2
3148	/// EndBB:
3149	/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3150	/// ...
3151	/// \endcode
3152	///
3153	/// Into this IR:
3154	/// \code
3155	/// BB:
3156	/// %cmp = icmp ult %x, %y
3157	/// %sub = sub %x, %y
3158	/// %cond = select i1 %cmp, 0, %sub
3159	/// ...
3160	/// \endcode
3161	///
3162	/// \returns true if the conditional block is removed.
3163	bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3164	BasicBlock *ThenBB) {
3165	if (!Options.SpeculateBlocks)
3166	return false;
3167
3168	// Be conservative for now. FP select instruction can often be expensive.
3169	Value *BrCond = BI->getCondition();
3170	if (isa<FCmpInst>(Val: BrCond))
3171	return false;
3172
3173	BasicBlock *BB = BI->getParent();
3174	BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(Idx: `0`);
3175	InstructionCost Budget =
3176	PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3177
3178	// If ThenBB is actually on the false edge of the conditional branch, remember
3179	// to swap the select operands later.
3180	bool Invert = false;
3181	if (ThenBB != BI->getSuccessor(i: `0`)) {
3182	assert(ThenBB == BI->getSuccessor(`1`) && "No edge from 'if' block?");
3183	Invert = true;
3184	}
3185	assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3186
3187	if (!isProfitableToSpeculate(BI, Invert, TTI))
3188	return false;
3189
3190	// Keep a count of how many times instructions are used within ThenBB when
3191	// they are candidates for sinking into ThenBB. Specifically:
3192	// - They are defined in BB, and
3193	// - They have no side effects, and
3194	// - All of their uses are in ThenBB.
3195	SmallDenseMap<Instruction , unsigned*, `4`> SinkCandidateUseCounts;
3196
3197	SmallVector<Instruction *, `4`> SpeculatedPseudoProbes;
3198
3199	unsigned SpeculatedInstructions = `0`;
3200	bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3201	SmallVector<Instruction *, `2`> SpeculatedConditionalLoadsStores;
3202	Value SpeculatedStoreValue = nullptr*;
3203	StoreInst SpeculatedStore = nullptr*;
3204	EphemeralValueTracker EphTracker;
3205	for (Instruction &I : reverse(C: drop_end(RangeOrContainer&: *ThenBB))) {
3206	// Skip pseudo probes. The consequence is we lose track of the branch
3207	// probability for ThenBB, which is fine since the optimization here takes
3208	// place regardless of the branch probability.
3209	if (isa<PseudoProbeInst>(Val: I)) {
3210	// The probe should be deleted so that it will not be over-counted when
3211	// the samples collected on the non-conditional path are counted towards
3212	// the conditional path. We leave it for the counts inference algorithm to
3213	// figure out a proper count for an unknown probe.
3214	SpeculatedPseudoProbes.push_back(Elt: &I);
3215	continue;
3216	}
3217
3218	// Ignore ephemeral values, they will be dropped by the transform.
3219	if (EphTracker.track(I: &I))
3220	continue;
3221
3222	// Only speculatively execute a single instruction (not counting the
3223	// terminator) for now.
3224	bool IsSafeCheapLoadStore = HoistLoadsStores &&
3225	isSafeCheapLoadStore(I: &I, TTI) &&
3226	SpeculatedConditionalLoadsStores.size() <
3227	HoistLoadsStoresWithCondFaultingThreshold;
3228	// Not count load/store into cost if target supports conditional faulting
3229	// b/c it's cheap to speculate it.
3230	if (IsSafeCheapLoadStore)
3231	SpeculatedConditionalLoadsStores.push_back(Elt: &I);
3232	else
3233	++SpeculatedInstructions;
3234
3235	if (SpeculatedInstructions > `1`)
3236	return false;
3237
3238	// Don't hoist the instruction if it's unsafe or expensive.
3239	if (!IsSafeCheapLoadStore &&
3240	!isSafeToSpeculativelyExecute(I: &I, CtxI: BI, AC: Options.AC) &&
3241	!(HoistCondStores && !SpeculatedStoreValue &&
3242	(SpeculatedStoreValue =
3243	isSafeToSpeculateStore(I: &I, BrBB: BB, StoreBB: ThenBB, EndBB))))
3244	return false;
3245	if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3246	computeSpeculationCost(I: &I, TTI) >
3247	PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
3248	return false;
3249
3250	// Store the store speculation candidate.
3251	if (!SpeculatedStore && SpeculatedStoreValue)
3252	SpeculatedStore = cast<StoreInst>(Val: &I);
3253
3254	// Do not hoist the instruction if any of its operands are defined but not
3255	// used in BB. The transformation will prevent the operand from
3256	// being sunk into the use block.
3257	for (Use &Op : I.operands()) {
3258	Instruction *OpI = dyn_cast<Instruction>(Val&: Op);
3259	if (!OpI \|\| OpI->getParent() != BB \|\| OpI->mayHaveSideEffects())
3260	continue; // Not a candidate for sinking.
3261
3262	++SinkCandidateUseCounts [OpI];
3263	}
3264	}
3265
3266	// Consider any sink candidates which are only used in ThenBB as costs for
3267	// speculation. Note, while we iterate over a DenseMap here, we are summing
3268	// and so iteration order isn't significant.
3269	for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3270	if (Inst->hasNUses(N: Count)) {
3271	++SpeculatedInstructions;
3272	if (SpeculatedInstructions > `1`)
3273	return false;
3274	}
3275
3276	// Check that we can insert the selects and that it's not too expensive to do
3277	// so.
3278	bool Convert =
3279	SpeculatedStore != nullptr \|\| !SpeculatedConditionalLoadsStores.empty();
3280	InstructionCost Cost = `0`;
3281	Convert \|= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3282	SpeculatedInstructions, Cost, TTI);
3283	if (!Convert \|\| Cost > Budget)
3284	return false;
3285
3286	// If we get here, we can hoist the instruction and if-convert.
3287	LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3288
3289	Instruction Sel = nullptr*;
3290	// Insert a select of the value of the speculated store.
3291	if (SpeculatedStoreValue) {
3292	IRBuilder<NoFolder> Builder(BI);
3293	Value *OrigV = SpeculatedStore->getValueOperand();
3294	Value *TrueV = SpeculatedStore->getValueOperand();
3295	Value *FalseV = SpeculatedStoreValue;
3296	if (Invert)
3297	std::swap(a&: TrueV, b&: FalseV);
3298	Value *S = Builder.CreateSelect(
3299	C: BrCond, True: TrueV, False: FalseV, Name: "spec.store.select", MDFrom: BI);
3300	Sel = cast<Instruction>(Val: S);
3301	SpeculatedStore->setOperand(i_nocapture: `0`, Val_nocapture: S);
3302	SpeculatedStore->applyMergedLocation(LocA: BI->getDebugLoc(),
3303	LocB: SpeculatedStore->getDebugLoc());
3304	// The value stored is still conditional, but the store itself is now
3305	// unconditonally executed, so we must be sure that any linked dbg.assign
3306	// intrinsics are tracking the new stored value (the result of the
3307	// select). If we don't, and the store were to be removed by another pass
3308	// (e.g. DSE), then we'd eventually end up emitting a location describing
3309	// the conditional value, unconditionally.
3310	//
3311	// === Before this transformation ===
3312	// pred:
3313	// store %one, %x.dest, !DIAssignID !1
3314	// dbg.assign %one, "x", ..., !1, ...
3315	// br %cond if.then
3316	//
3317	// if.then:
3318	// store %two, %x.dest, !DIAssignID !2
3319	// dbg.assign %two, "x", ..., !2, ...
3320	//
3321	// === After this transformation ===
3322	// pred:
3323	// store %one, %x.dest, !DIAssignID !1
3324	// dbg.assign %one, "x", ..., !1
3325	/// ...
3326	// %merge = select %cond, %two, %one
3327	// store %merge, %x.dest, !DIAssignID !2
3328	// dbg.assign %merge, "x", ..., !2
3329	auto replaceVariable = [OrigV, S](auto *DbgAssign) {
3330	if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3331	DbgAssign->replaceVariableLocationOp(OrigV, S);
3332	};
3333	for_each(Range: at::getAssignmentMarkers(Inst: SpeculatedStore), F: replaceVariable);
3334	for_each(Range: at::getDVRAssignmentMarkers(Inst: SpeculatedStore), F: replaceVariable);
3335	}
3336
3337	// Metadata can be dependent on the condition we are hoisting above.
3338	// Strip all UB-implying metadata on the instruction. Drop the debug loc
3339	// to avoid making it appear as if the condition is a constant, which would
3340	// be misleading while debugging.
3341	// Similarly strip attributes that maybe dependent on condition we are
3342	// hoisting above.
3343	for (auto &I : make_early_inc_range(Range&: *ThenBB)) {
3344	if (!SpeculatedStoreValue \|\| &I != SpeculatedStore) {
3345	I.setDebugLoc(DebugLoc::getDropped());
3346	}
3347	I.dropUBImplyingAttrsAndMetadata();
3348
3349	// Drop ephemeral values.
3350	if (EphTracker.contains(I: &I)) {
3351	I.replaceAllUsesWith(V: PoisonValue::get(T: I.getType()));
3352	I.eraseFromParent();
3353	}
3354	}
3355
3356	// Hoist the instructions.
3357	// Drop DbgVariableRecords attached to these instructions.
3358	for (auto &It : *ThenBB)
3359	for (DbgRecord &DR : make_early_inc_range(Range: It.getDbgRecordRange()))
3360	// Drop all records except assign-kind DbgVariableRecords (dbg.assign
3361	// equivalent).
3362	if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(Val: &DR);
3363	!DVR \|\| !DVR->isDbgAssign())
3364	It.dropOneDbgRecord(I: &DR);
3365	BB->splice(ToIt: BI->getIterator(), FromBB: ThenBB, FromBeginIt: ThenBB->begin(),
3366	FromEndIt: std::prev(x: ThenBB->end()));
3367
3368	if (!SpeculatedConditionalLoadsStores.empty())
3369	hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3370	Sel);
3371
3372	// Insert selects and rewrite the PHI operands.
3373	IRBuilder<NoFolder> Builder(BI);
3374	for (PHINode &PN : EndBB->phis()) {
3375	unsigned OrigI = PN.getBasicBlockIndex(BB);
3376	unsigned ThenI = PN.getBasicBlockIndex(BB: ThenBB);
3377	Value *OrigV = PN.getIncomingValue(i: OrigI);
3378	Value *ThenV = PN.getIncomingValue(i: ThenI);
3379
3380	// Skip PHIs which are trivial.
3381	if (OrigV == ThenV)
3382	continue;
3383
3384	// Create a select whose true value is the speculatively executed value and
3385	// false value is the pre-existing value. Swap them if the branch
3386	// destinations were inverted.
3387	Value TrueV = ThenV, FalseV = OrigV;
3388	if (Invert)
3389	std::swap(a&: TrueV, b&: FalseV);
3390	Value *V = Builder.CreateSelect(C: BrCond, True: TrueV, False: FalseV, Name: "spec.select", MDFrom: BI);
3391	PN.setIncomingValue(i: OrigI, V);
3392	PN.setIncomingValue(i: ThenI, V);
3393	}
3394
3395	// Remove speculated pseudo probes.
3396	for (Instruction *I : SpeculatedPseudoProbes)
3397	I->eraseFromParent();
3398
3399	++NumSpeculations;
3400	return true;
3401	}
3402
3403	/// Return true if we can thread a branch across this block.
3404	static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
3405	int Size = `0`;
3406	EphemeralValueTracker EphTracker;
3407
3408	// Walk the loop in reverse so that we can identify ephemeral values properly
3409	// (values only feeding assumes).
3410	for (Instruction &I : reverse(C: BB->instructionsWithoutDebug(SkipPseudoOp: false))) {
3411	// Can't fold blocks that contain noduplicate or convergent calls.
3412	if (CallInst *CI = dyn_cast<CallInst>(Val: &I))
3413	if (CI->cannotDuplicate() \|\| CI->isConvergent())
3414	return false;
3415
3416	// Ignore ephemeral values which are deleted during codegen.
3417	// We will delete Phis while threading, so Phis should not be accounted in
3418	// block's size.
3419	if (!EphTracker.track(I: &I) && !isa<PHINode>(Val: I)) {
3420	if (Size++ > MaxSmallBlockSize)
3421	return false; // Don't clone large BB's.
3422	}
3423
3424	// We can only support instructions that do not define values that are
3425	// live outside of the current basic block.
3426	for (User *U : I.users()) {
3427	Instruction *UI = cast<Instruction>(Val: U);
3428	if (UI->getParent() != BB \|\| isa<PHINode>(Val: UI))
3429	return false;
3430	}
3431
3432	// Looks ok, continue checking.
3433	}
3434
3435	return true;
3436	}
3437
3438	static ConstantInt getKnownValueOnEdge(Value V, BasicBlock *From,
3439	BasicBlock *To) {
3440	// Don't look past the block defining the value, we might get the value from
3441	// a previous loop iteration.
3442	auto *I = dyn_cast<Instruction>(Val: V);
3443	if (I && I->getParent() == To)
3444	return nullptr;
3445
3446	// We know the value if the From block branches on it.
3447	auto *BI = dyn_cast<BranchInst>(Val: From->getTerminator());
3448	if (BI && BI->isConditional() && BI->getCondition() == V &&
3449	BI->getSuccessor(i: `0`) != BI->getSuccessor(i: `1`))
3450	return BI->getSuccessor(i: `0`) == To ? ConstantInt::getTrue(Context&: BI->getContext())
3451	: ConstantInt::getFalse(Context&: BI->getContext());
3452
3453	return nullptr;
3454	}
3455
3456	/// If we have a conditional branch on something for which we know the constant
3457	/// value in predecessors (e.g. a phi node in the current block), thread edges
3458	/// from the predecessor to their ultimate destination.
3459	static std::optional<bool>
3460	foldCondBranchOnValueKnownInPredecessorImpl(BranchInst BI, DomTreeUpdater DTU,
3461	const DataLayout &DL,
3462	AssumptionCache *AC) {
3463	SmallMapVector<ConstantInt , SmallSetVector<BasicBlock , `2`>, `2`> KnownValues;
3464	BasicBlock *BB = BI->getParent();
3465	Value *Cond = BI->getCondition();
3466	PHINode *PN = dyn_cast<PHINode>(Val: Cond);
3467	if (PN && PN->getParent() == BB) {
3468	// Degenerate case of a single entry PHI.
3469	if (PN->getNumIncomingValues() == `1`) {
3470	FoldSingleEntryPHINodes(BB: PN->getParent());
3471	return true;
3472	}
3473
3474	for (Use &U : PN->incoming_values())
3475	if (auto *CB = dyn_cast<ConstantInt>(Val&: U))
3476	KnownValues [CB].insert(X: PN->getIncomingBlock(U));
3477	} else {
3478	for (BasicBlock *Pred : predecessors(BB)) {
3479	if (ConstantInt *CB = getKnownValueOnEdge(V: Cond, From: Pred, To: BB))
3480	KnownValues [CB].insert(X: Pred);
3481	}
3482	}
3483
3484	if (KnownValues.empty())
3485	return false;
3486
3487	// Now we know that this block has multiple preds and two succs.
3488	// Check that the block is small enough and values defined in the block are
3489	// not used outside of it.
3490	if (!blockIsSimpleEnoughToThreadThrough(BB))
3491	return false;
3492
3493	for (const auto &Pair : KnownValues) {
3494	// Okay, we now know that all edges from PredBB should be revectored to
3495	// branch to RealDest.
3496	ConstantInt *CB = Pair.first;
3497	ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3498	BasicBlock *RealDest = BI->getSuccessor(i: !CB->getZExtValue());
3499
3500	if (RealDest == BB)
3501	continue; // Skip self loops.
3502
3503	// Skip if the predecessor's terminator is an indirect branch.
3504	if (any_of(Range&: PredBBs, P: [](BasicBlock *PredBB) {
3505	return isa<IndirectBrInst>(Val: PredBB->getTerminator());
3506	}))
3507	continue;
3508
3509	LLVM_DEBUG({
3510	dbgs() << "Condition " << *Cond << " in " << BB->getName()
3511	<< " has value " << *Pair.first << " in predecessors:\n";
3512	for (const BasicBlock *PredBB : Pair.second)
3513	dbgs() << " " << PredBB->getName() << "\n";
3514	dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3515	});
3516
3517	// Split the predecessors we are threading into a new edge block. We'll
3518	// clone the instructions into this block, and then redirect it to RealDest.
3519	BasicBlock *EdgeBB = SplitBlockPredecessors(BB, Preds: PredBBs, Suffix: ".critedge", DTU);
3520
3521	// TODO: These just exist to reduce test diff, we can drop them if we like.
3522	EdgeBB->setName(RealDest->getName() + ".critedge");
3523	EdgeBB->moveBefore(MovePos: RealDest);
3524
3525	// Update PHI nodes.
3526	addPredecessorToBlock(Succ: RealDest, NewPred: EdgeBB, ExistPred: BB);
3527
3528	// BB may have instructions that are being threaded over. Clone these
3529	// instructions into EdgeBB. We know that there will be no uses of the
3530	// cloned instructions outside of EdgeBB.
3531	BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3532	ValueToValueMapTy TranslateMap; // Track translated values.
3533	TranslateMap [Cond] = CB;
3534
3535	// RemoveDIs: track instructions that we optimise away while folding, so
3536	// that we can copy DbgVariableRecords from them later.
3537	BasicBlock::iterator SrcDbgCursor = BB->begin();
3538	for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3539	if (PHINode *PN = dyn_cast<PHINode>(Val&: BBI)) {
3540	TranslateMap [PN] = PN->getIncomingValueForBlock(BB: EdgeBB);
3541	continue;
3542	}
3543	// Clone the instruction.
3544	Instruction *N = BBI ->clone();
3545	// Insert the new instruction into its new home.
3546	N->insertInto(ParentBB: EdgeBB, It: InsertPt);
3547
3548	if (BBI ->hasName())
3549	N->setName(BBI ->getName() + ".c");
3550
3551	// Update operands due to translation.
3552	// Key Instructions: Remap all the atom groups.
3553	if (const DebugLoc &DL = BBI ->getDebugLoc())
3554	mapAtomInstance(DL, VMap&: TranslateMap);
3555	RemapInstruction(I: N, VM&: TranslateMap,
3556	Flags: RF_IgnoreMissingLocals \| RF_NoModuleLevelChanges);
3557
3558	// Check for trivial simplification.
3559	if (Value V = simplifyInstruction(I: N, Q: {DL, nullptr, nullptr*, AC})) {
3560	if (!BBI ->use_empty())
3561	TranslateMap [&*BBI] = V;
3562	if (!N->mayHaveSideEffects()) {
3563	N->eraseFromParent(); // Instruction folded away, don't need actual
3564	// inst
3565	N = nullptr;
3566	}
3567	} else {
3568	if (!BBI ->use_empty())
3569	TranslateMap [&*BBI] = N;
3570	}
3571	if (N) {
3572	// Copy all debug-info attached to instructions from the last we
3573	// successfully clone, up to this instruction (they might have been
3574	// folded away).
3575	for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3576	N->cloneDebugInfoFrom(From: &*SrcDbgCursor);
3577	SrcDbgCursor = std::next(x: BBI);
3578	// Clone debug-info on this instruction too.
3579	N->cloneDebugInfoFrom(From: &*BBI);
3580
3581	// Register the new instruction with the assumption cache if necessary.
3582	if (auto *Assume = dyn_cast<AssumeInst>(Val: N))
3583	if (AC)
3584	AC->registerAssumption(CI: Assume);
3585	}
3586	}
3587
3588	for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3589	InsertPt ->cloneDebugInfoFrom(From: &*SrcDbgCursor);
3590	InsertPt ->cloneDebugInfoFrom(From: BI);
3591
3592	BB->removePredecessor(Pred: EdgeBB);
3593	BranchInst *EdgeBI = cast<BranchInst>(Val: EdgeBB->getTerminator());
3594	EdgeBI->setSuccessor(idx: `0`, NewSucc: RealDest);
3595	EdgeBI->setDebugLoc(BI->getDebugLoc());
3596
3597	if (DTU) {
3598	SmallVector<DominatorTree::UpdateType, `2`> Updates;
3599	Updates.push_back(Elt: {DominatorTree::Delete, EdgeBB, BB});
3600	Updates.push_back(Elt: {DominatorTree::Insert, EdgeBB, RealDest});
3601	DTU->applyUpdates(Updates);
3602	}
3603
3604	// For simplicity, we created a separate basic block for the edge. Merge
3605	// it back into the predecessor if possible. This not only avoids
3606	// unnecessary SimplifyCFG iterations, but also makes sure that we don't
3607	// bypass the check for trivial cycles above.
3608	MergeBlockIntoPredecessor(BB: EdgeBB, DTU);
3609
3610	// Signal repeat, simplifying any other constants.
3611	return std::nullopt;
3612	}
3613
3614	return false;
3615	}
3616
3617	static bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI,
3618	DomTreeUpdater *DTU,
3619	const DataLayout &DL,
3620	AssumptionCache *AC) {
3621	std::optional<bool> Result;
3622	bool EverChanged = false;
3623	do {
3624	// Note that None means "we changed things, but recurse further."
3625	Result = foldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC);
3626	EverChanged \|= Result == std::nullopt \|\| *Result;
3627	} while (Result == std::nullopt);
3628	return EverChanged;
3629	}
3630
3631	/// Given a BB that starts with the specified two-entry PHI node,
3632	/// see if we can eliminate it.
3633	static bool foldTwoEntryPHINode(PHINode PN, const* TargetTransformInfo &TTI,
3634	DomTreeUpdater DTU, AssumptionCache AC,
3635	const DataLayout &DL,
3636	bool SpeculateUnpredictables) {
3637	// Ok, this is a two entry PHI node. Check to see if this is a simple "if
3638	// statement", which has a very simple dominance structure. Basically, we
3639	// are trying to find the condition that is being branched on, which
3640	// subsequently causes this merge to happen. We really want control
3641	// dependence information for this check, but simplifycfg can't keep it up
3642	// to date, and this catches most of the cases we care about anyway.
3643	BasicBlock *BB = PN->getParent();
3644
3645	BasicBlock IfTrue, IfFalse;
3646	BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3647	if (!DomBI)
3648	return false;
3649	Value *IfCond = DomBI->getCondition();
3650	// Don't bother if the branch will be constant folded trivially.
3651	if (isa<ConstantInt>(Val: IfCond))
3652	return false;
3653
3654	BasicBlock *DomBlock = DomBI->getParent();
3655	SmallVector<BasicBlock *, `2`> IfBlocks;
3656	llvm::copy_if(
3657	Range: PN->blocks(), Out: std::back_inserter(x&: IfBlocks), P: [](BasicBlock *IfBlock) {
3658	return cast<BranchInst>(Val: IfBlock->getTerminator())->isUnconditional();
3659	});
3660	assert((IfBlocks.size() == `1` \|\| IfBlocks.size() == `2`) &&
3661	"Will have either one or two blocks to speculate.");
3662
3663	// If the branch is non-unpredictable, see if we either predictably jump to
3664	// the merge bb (if we have only a single 'then' block), or if we predictably
3665	// jump to one specific 'then' block (if we have two of them).
3666	// It isn't beneficial to speculatively execute the code
3667	// from the block that we know is predictably not entered.
3668	bool IsUnpredictable = DomBI->getMetadata(KindID: LLVMContext::MD_unpredictable);
3669	if (!IsUnpredictable) {
3670	uint64_t TWeight, FWeight;
3671	if (extractBranchWeights(I: *DomBI, TrueVal&: TWeight, FalseVal&: FWeight) &&
3672	(TWeight + FWeight) != `0`) {
3673	BranchProbability BITrueProb =
3674	BranchProbability::getBranchProbability(Numerator: TWeight, Denominator: TWeight + FWeight);
3675	BranchProbability Likely = TTI.getPredictableBranchThreshold();
3676	BranchProbability BIFalseProb = BITrueProb.getCompl();
3677	if (IfBlocks.size() == `1`) {
3678	BranchProbability BIBBProb =
3679	DomBI->getSuccessor(i: `0`) == BB ? BITrueProb : BIFalseProb;
3680	if (BIBBProb >= Likely)
3681	return false;
3682	} else {
3683	if (BITrueProb >= Likely \|\| BIFalseProb >= Likely)
3684	return false;
3685	}
3686	}
3687	}
3688
3689	// Don't try to fold an unreachable block. For example, the phi node itself
3690	// can't be the candidate if-condition for a select that we want to form.
3691	if (auto *IfCondPhiInst = dyn_cast<PHINode>(Val: IfCond))
3692	if (IfCondPhiInst->getParent() == BB)
3693	return false;
3694
3695	// Okay, we found that we can merge this two-entry phi node into a select.
3696	// Doing so would require us to fold all* two entry phi nodes in this block.*
3697	// At some point this becomes non-profitable (particularly if the target
3698	// doesn't support cmov's). Only do this transformation if there are two or
3699	// fewer PHI nodes in this block.
3700	unsigned NumPhis = `0`;
3701	for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(Val: I); ++NumPhis, ++I)
3702	if (NumPhis > `2`)
3703	return false;
3704
3705	// Loop over the PHI's seeing if we can promote them all to select
3706	// instructions. While we are at it, keep track of the instructions
3707	// that need to be moved to the dominating block.
3708	SmallPtrSet<Instruction *, `4`> AggressiveInsts;
3709	SmallPtrSet<Instruction *, `2`> ZeroCostInstructions;
3710	InstructionCost Cost = `0`;
3711	InstructionCost Budget =
3712	TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3713	if (SpeculateUnpredictables && IsUnpredictable)
3714	Budget += TTI.getBranchMispredictPenalty();
3715
3716	bool Changed = false;
3717	for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(Val: II);) {
3718	PHINode *PN = cast<PHINode>(Val: II ++);
3719	if (Value *V = simplifyInstruction(I: PN, Q: {DL, PN})) {
3720	PN->replaceAllUsesWith(V);
3721	PN->eraseFromParent();
3722	Changed = true;
3723	continue;
3724	}
3725
3726	if (!dominatesMergePoint(V: PN->getIncomingValue(i: `0`), BB, InsertPt: DomBI,
3727	AggressiveInsts, Cost, Budget, TTI, AC,
3728	ZeroCostInstructions) \|\|
3729	!dominatesMergePoint(V: PN->getIncomingValue(i: `1`), BB, InsertPt: DomBI,
3730	AggressiveInsts, Cost, Budget, TTI, AC,
3731	ZeroCostInstructions))
3732	return Changed;
3733	}
3734
3735	// If we folded the first phi, PN dangles at this point. Refresh it. If
3736	// we ran out of PHIs then we simplified them all.
3737	PN = dyn_cast<PHINode>(Val: BB->begin());
3738	if (!PN)
3739	return true;
3740
3741	// Return true if at least one of these is a 'not', and another is either
3742	// a 'not' too, or a constant.
3743	auto CanHoistNotFromBothValues = [](Value V0, Value V1) {
3744	if (!match(V: V0, P: m_Not(V: m_Value())))
3745	std::swap(a&: V0, b&: V1);
3746	auto Invertible = m_CombineOr(L: m_Not(V: m_Value()), R: m_AnyIntegralConstant());
3747	return match(V: V0, P: m_Not(V: m_Value())) && match(V: V1, P: Invertible);
3748	};
3749
3750	// Don't fold i1 branches on PHIs which contain binary operators or
3751	// (possibly inverted) select form of or/ands, unless one of
3752	// the incoming values is an 'not' and another one is freely invertible.
3753	// These can often be turned into switches and other things.
3754	auto IsBinOpOrAnd = [](Value *V) {
3755	return match(
3756	V, P: m_CombineOr(L: m_BinOp(), R: m_c_Select(L: m_ImmConstant(), R: m_Value())));
3757	};
3758	if (PN->getType()->isIntegerTy(Bitwidth: `1`) &&
3759	(IsBinOpOrAnd (PN->getIncomingValue(i: `0`)) \|\|
3760	IsBinOpOrAnd (PN->getIncomingValue(i: `1`)) \|\| IsBinOpOrAnd (IfCond)) &&
3761	!CanHoistNotFromBothValues (PN->getIncomingValue(i: `0`),
3762	PN->getIncomingValue(i: `1`)))
3763	return Changed;
3764
3765	// If all PHI nodes are promotable, check to make sure that all instructions
3766	// in the predecessor blocks can be promoted as well. If not, we won't be able
3767	// to get rid of the control flow, so it's not worth promoting to select
3768	// instructions.
3769	for (BasicBlock *IfBlock : IfBlocks)
3770	for (BasicBlock::iterator I = IfBlock->begin(); !I ->isTerminator(); ++I)
3771	if (!AggressiveInsts.count(Ptr: &*I) && !I ->isDebugOrPseudoInst()) {
3772	// This is not an aggressive instruction that we can promote.
3773	// Because of this, we won't be able to get rid of the control flow, so
3774	// the xform is not worth it.
3775	return Changed;
3776	}
3777
3778	// If either of the blocks has it's address taken, we can't do this fold.
3779	if (any_of(Range&: IfBlocks,
3780	P: [](BasicBlock IfBlock) { return* IfBlock->hasAddressTaken(); }))
3781	return Changed;
3782
3783	LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3784	if (IsUnpredictable) dbgs() << " (unpredictable)";
3785	dbgs() << " T: " << IfTrue->getName()
3786	<< " F: " << IfFalse->getName() << "\n");
3787
3788	// If we can still promote the PHI nodes after this gauntlet of tests,
3789	// do all of the PHI's now.
3790
3791	// Move all 'aggressive' instructions, which are defined in the
3792	// conditional parts of the if's up to the dominating block.
3793	for (BasicBlock *IfBlock : IfBlocks)
3794	hoistAllInstructionsInto(DomBlock, InsertPt: DomBI, BB: IfBlock);
3795
3796	IRBuilder<NoFolder> Builder(DomBI);
3797	// Propagate fast-math-flags from phi nodes to replacement selects.
3798	while (PHINode *PN = dyn_cast<PHINode>(Val: BB->begin())) {
3799	// Change the PHI node into a select instruction.
3800	Value *TrueVal = PN->getIncomingValueForBlock(BB: IfTrue);
3801	Value *FalseVal = PN->getIncomingValueForBlock(BB: IfFalse);
3802
3803	Value *Sel = Builder.CreateSelectFMF(C: IfCond, True: TrueVal, False: FalseVal,
3804	FMFSource: isa<FPMathOperator>(Val: PN) ? PN : nullptr,
3805	Name: "", MDFrom: DomBI);
3806	PN->replaceAllUsesWith(V: Sel);
3807	Sel->takeName(V: PN);
3808	PN->eraseFromParent();
3809	}
3810
3811	// At this point, all IfBlocks are empty, so our if statement
3812	// has been flattened. Change DomBlock to jump directly to our new block to
3813	// avoid other simplifycfg's kicking in on the diamond.
3814	Builder.CreateBr(Dest: BB);
3815
3816	SmallVector<DominatorTree::UpdateType, `3`> Updates;
3817	if (DTU) {
3818	Updates.push_back(Elt: {DominatorTree::Insert, DomBlock, BB});
3819	for (auto *Successor : successors(BB: DomBlock))
3820	Updates.push_back(Elt: {DominatorTree::Delete, DomBlock, Successor});
3821	}
3822
3823	DomBI->eraseFromParent();
3824	if (DTU)
3825	DTU->applyUpdates(Updates);
3826
3827	return true;
3828	}
3829
3830	static Value *createLogicalOp(IRBuilderBase &Builder,
3831	Instruction::BinaryOps Opc, Value *LHS,
3832	Value RHS, const* Twine &Name = "") {
3833	// Try to relax logical op to binary op.
3834	if (impliesPoison(ValAssumedPoison: RHS, V: LHS))
3835	return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3836	if (Opc == Instruction::And)
3837	return Builder.CreateLogicalAnd(Cond1: LHS, Cond2: RHS, Name);
3838	if (Opc == Instruction::Or)
3839	return Builder.CreateLogicalOr(Cond1: LHS, Cond2: RHS, Name);
3840	llvm_unreachable("Invalid logical opcode");
3841	}
3842
3843	/// Return true if either PBI or BI has branch weight available, and store
3844	/// the weights in {Pred\|Succ}{True\|False}Weight. If one of PBI and BI does
3845	/// not have branch weight, use 1:1 as its weight.
3846	static bool extractPredSuccWeights(BranchInst PBI, BranchInst BI,
3847	uint64_t &PredTrueWeight,
3848	uint64_t &PredFalseWeight,
3849	uint64_t &SuccTrueWeight,
3850	uint64_t &SuccFalseWeight) {
3851	bool PredHasWeights =
3852	extractBranchWeights(I: *PBI, TrueVal&: PredTrueWeight, FalseVal&: PredFalseWeight);
3853	bool SuccHasWeights =
3854	extractBranchWeights(I: *BI, TrueVal&: SuccTrueWeight, FalseVal&: SuccFalseWeight);
3855	if (PredHasWeights \|\| SuccHasWeights) {
3856	if (!PredHasWeights)
3857	PredTrueWeight = PredFalseWeight = `1`;
3858	if (!SuccHasWeights)
3859	SuccTrueWeight = SuccFalseWeight = `1`;
3860	return true;
3861	} else {
3862	return false;
3863	}
3864	}
3865
3866	/// Determine if the two branches share a common destination and deduce a glue
3867	/// that joins the branches' conditions to arrive at the common destination if
3868	/// that would be profitable.
3869	static std::optional<std::tuple<BasicBlock , Instruction::BinaryOps, bool*>>
3870	shouldFoldCondBranchesToCommonDestination(BranchInst BI, BranchInst PBI,
3871	const TargetTransformInfo *TTI) {
3872	assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3873	"Both blocks must end with a conditional branches.");
3874	assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
3875	"PredBB must be a predecessor of BB.");
3876
3877	// We have the potential to fold the conditions together, but if the
3878	// predecessor branch is predictable, we may not want to merge them.
3879	uint64_t PTWeight, PFWeight;
3880	BranchProbability PBITrueProb, Likely;
3881	if (TTI && !PBI->getMetadata(KindID: LLVMContext::MD_unpredictable) &&
3882	extractBranchWeights(I: *PBI, TrueVal&: PTWeight, FalseVal&: PFWeight) &&
3883	(PTWeight + PFWeight) != `0`) {
3884	PBITrueProb =
3885	BranchProbability::getBranchProbability(Numerator: PTWeight, Denominator: PTWeight + PFWeight);
3886	Likely = TTI->getPredictableBranchThreshold();
3887	}
3888
3889	if (PBI->getSuccessor(i: `0`) == BI->getSuccessor(i: `0`)) {
3890	// Speculate the 2nd condition unless the 1st is probably true.
3891	if (PBITrueProb.isUnknown() \|\| PBITrueProb < Likely)
3892	return {{BI->getSuccessor(i: `0`), Instruction::Or, false}};
3893	} else if (PBI->getSuccessor(i: `1`) == BI->getSuccessor(i: `1`)) {
3894	// Speculate the 2nd condition unless the 1st is probably false.
3895	if (PBITrueProb.isUnknown() \|\| PBITrueProb.getCompl() < Likely)
3896	return {{BI->getSuccessor(i: `1`), Instruction::And, false}};
3897	} else if (PBI->getSuccessor(i: `0`) == BI->getSuccessor(i: `1`)) {
3898	// Speculate the 2nd condition unless the 1st is probably true.
3899	if (PBITrueProb.isUnknown() \|\| PBITrueProb < Likely)
3900	return {{BI->getSuccessor(i: `1`), Instruction::And, true}};
3901	} else if (PBI->getSuccessor(i: `1`) == BI->getSuccessor(i: `0`)) {
3902	// Speculate the 2nd condition unless the 1st is probably false.
3903	if (PBITrueProb.isUnknown() \|\| PBITrueProb.getCompl() < Likely)
3904	return {{BI->getSuccessor(i: `0`), Instruction::Or, true}};
3905	}
3906	return std::nullopt;
3907	}
3908
3909	static bool performBranchToCommonDestFolding(BranchInst BI, BranchInst PBI,
3910	DomTreeUpdater *DTU,
3911	MemorySSAUpdater *MSSAU,
3912	const TargetTransformInfo *TTI) {
3913	BasicBlock *BB = BI->getParent();
3914	BasicBlock *PredBlock = PBI->getParent();
3915
3916	// Determine if the two branches share a common destination.
3917	BasicBlock *CommonSucc;
3918	Instruction::BinaryOps Opc;
3919	bool InvertPredCond;
3920	std::tie(args&: CommonSucc, args&: Opc, args&: InvertPredCond) =
3921	*shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI);
3922
3923	LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << PBI << BB);
3924
3925	IRBuilder<> Builder(PBI);
3926	// The builder is used to create instructions to eliminate the branch in BB.
3927	// If BB's terminator has !annotation metadata, add it to the new
3928	// instructions.
3929	Builder.CollectMetadataToCopy(Src: BB->getTerminator(),
3930	MetadataKinds: {LLVMContext::MD_annotation});
3931
3932	// If we need to invert the condition in the pred block to match, do so now.
3933	if (InvertPredCond) {
3934	InvertBranch(PBI, Builder);
3935	}
3936
3937	BasicBlock *UniqueSucc =
3938	PBI->getSuccessor(i: `0`) == BB ? BI->getSuccessor(i: `0`) : BI->getSuccessor(i: `1`);
3939
3940	// Before cloning instructions, notify the successor basic block that it
3941	// is about to have a new predecessor. This will update PHI nodes,
3942	// which will allow us to update live-out uses of bonus instructions.
3943	addPredecessorToBlock(Succ: UniqueSucc, NewPred: PredBlock, ExistPred: BB, MSSAU);
3944
3945	// Try to update branch weights.
3946	uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
3947	if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
3948	SuccTrueWeight, SuccFalseWeight)) {
3949	SmallVector<uint64_t, `8`> NewWeights;
3950
3951	if (PBI->getSuccessor(i: `0`) == BB) {
3952	// PBI: br i1 %x, BB, FalseDest
3953	// BI: br i1 %y, UniqueSucc, FalseDest
3954	// TrueWeight is TrueWeight for PBI TrueWeight for BI.*
3955	NewWeights.push_back(Elt: PredTrueWeight * SuccTrueWeight);
3956	// FalseWeight is FalseWeight for PBI TotalWeight for BI +*
3957	// TrueWeight for PBI FalseWeight for BI.*
3958	// We assume that total weights of a BranchInst can fit into 32 bits.
3959	// Therefore, we will not have overflow using 64-bit arithmetic.
3960	NewWeights.push_back(Elt: PredFalseWeight *
3961	(SuccFalseWeight + SuccTrueWeight) +
3962	PredTrueWeight * SuccFalseWeight);
3963	} else {
3964	// PBI: br i1 %x, TrueDest, BB
3965	// BI: br i1 %y, TrueDest, UniqueSucc
3966	// TrueWeight is TrueWeight for PBI TotalWeight for BI +*
3967	// FalseWeight for PBI TrueWeight for BI.*
3968	NewWeights.push_back(Elt: PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
3969	PredFalseWeight * SuccTrueWeight);
3970	// FalseWeight is FalseWeight for PBI FalseWeight for BI.*
3971	NewWeights.push_back(Elt: PredFalseWeight * SuccFalseWeight);
3972	}
3973
3974	// Halve the weights if any of them cannot fit in an uint32_t
3975	fitWeights(Weights: NewWeights);
3976
3977	SmallVector<uint32_t, `8`> MDWeights(NewWeights.begin(), NewWeights.end());
3978	setBranchWeights(I: PBI, TrueWeight: MDWeights [`0`], FalseWeight: MDWeights [`1`], /IsExpected=/false);
3979
3980	// TODO: If BB is reachable from all paths through PredBlock, then we
3981	// could replace PBI's branch probabilities with BI's.
3982	} else
3983	PBI->setMetadata(KindID: LLVMContext::MD_prof, Node: nullptr);
3984
3985	// Now, update the CFG.
3986	PBI->setSuccessor(idx: PBI->getSuccessor(i: `0`) != BB, NewSucc: UniqueSucc);
3987
3988	if (DTU)
3989	DTU->applyUpdates(Updates: {{DominatorTree::Insert, PredBlock, UniqueSucc},
3990	{DominatorTree::Delete, PredBlock, BB}});
3991
3992	// If BI was a loop latch, it may have had associated loop metadata.
3993	// We need to copy it to the new latch, that is, PBI.
3994	if (MDNode *LoopMD = BI->getMetadata(KindID: LLVMContext::MD_loop))
3995	PBI->setMetadata(KindID: LLVMContext::MD_loop, Node: LoopMD);
3996
3997	ValueToValueMapTy VMap; // maps original values to cloned values
3998	cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap);
3999
4000	Module *M = BB->getModule();
4001
4002	PredBlock->getTerminator()->cloneDebugInfoFrom(From: BB->getTerminator());
4003	for (DbgVariableRecord &DVR :
4004	filterDbgVars(R: PredBlock->getTerminator()->getDbgRecordRange())) {
4005	RemapDbgRecord(M, DR: &DVR, VM&: VMap,
4006	Flags: RF_NoModuleLevelChanges \| RF_IgnoreMissingLocals);
4007	}
4008
4009	// Now that the Cond was cloned into the predecessor basic block,
4010	// or/and the two conditions together.
4011	Value *BICond = VMap [BI->getCondition()];
4012	PBI->setCondition(
4013	createLogicalOp(Builder, Opc, LHS: PBI->getCondition(), RHS: BICond, Name: "or.cond"));
4014
4015	++NumFoldBranchToCommonDest;
4016	return true;
4017	}
4018
4019	/// Return if an instruction's type or any of its operands' types are a vector
4020	/// type.
4021	static bool isVectorOp(Instruction &I) {
4022	return I.getType()->isVectorTy() \|\| any_of(Range: I.operands(), P: [](Use &U) {
4023	return U ->getType()->isVectorTy();
4024	});
4025	}
4026
4027	/// If this basic block is simple enough, and if a predecessor branches to us
4028	/// and one of our successors, fold the block into the predecessor and use
4029	/// logical operations to pick the right destination.
4030	bool llvm::foldBranchToCommonDest(BranchInst BI, DomTreeUpdater DTU,
4031	MemorySSAUpdater *MSSAU,
4032	const TargetTransformInfo *TTI,
4033	unsigned BonusInstThreshold) {
4034	// If this block ends with an unconditional branch,
4035	// let speculativelyExecuteBB() deal with it.
4036	if (!BI->isConditional())
4037	return false;
4038
4039	BasicBlock *BB = BI->getParent();
4040	TargetTransformInfo::TargetCostKind CostKind =
4041	BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize
4042	: TargetTransformInfo::TCK_SizeAndLatency;
4043
4044	Instruction *Cond = dyn_cast<Instruction>(Val: BI->getCondition());
4045
4046	if (!Cond \|\| !isa<CmpInst, BinaryOperator, SelectInst, TruncInst>(Val: Cond) \|\|
4047	Cond->getParent() != BB \|\| !Cond->hasOneUse())
4048	return false;
4049
4050	// Finally, don't infinitely unroll conditional loops.
4051	if (is_contained(Range: successors(BB), Element: BB))
4052	return false;
4053
4054	// With which predecessors will we want to deal with?
4055	SmallVector<BasicBlock *, `8`> Preds;
4056	for (BasicBlock *PredBlock : predecessors(BB)) {
4057	BranchInst *PBI = dyn_cast<BranchInst>(Val: PredBlock->getTerminator());
4058
4059	// Check that we have two conditional branches. If there is a PHI node in
4060	// the common successor, verify that the same value flows in from both
4061	// blocks.
4062	if (!PBI \|\| PBI->isUnconditional() \|\| !safeToMergeTerminators(SI1: BI, SI2: PBI))
4063	continue;
4064
4065	// Determine if the two branches share a common destination.
4066	BasicBlock *CommonSucc;
4067	Instruction::BinaryOps Opc;
4068	bool InvertPredCond;
4069	if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4070	std::tie(args&: CommonSucc, args&: Opc, args&: InvertPredCond) = *Recipe;
4071	else
4072	continue;
4073
4074	// Check the cost of inserting the necessary logic before performing the
4075	// transformation.
4076	if (TTI) {
4077	Type *Ty = BI->getCondition()->getType();
4078	InstructionCost Cost = TTI->getArithmeticInstrCost(Opcode: Opc, Ty, CostKind);
4079	if (InvertPredCond && (!PBI->getCondition()->hasOneUse() \|\|
4080	!isa<CmpInst>(Val: PBI->getCondition())))
4081	Cost += TTI->getArithmeticInstrCost(Opcode: Instruction::Xor, Ty, CostKind);
4082
4083	if (Cost > BranchFoldThreshold)
4084	continue;
4085	}
4086
4087	// Ok, we do want to deal with this predecessor. Record it.
4088	Preds.emplace_back(Args&: PredBlock);
4089	}
4090
4091	// If there aren't any predecessors into which we can fold,
4092	// don't bother checking the cost.
4093	if (Preds.empty())
4094	return false;
4095
4096	// Only allow this transformation if computing the condition doesn't involve
4097	// too many instructions and these involved instructions can be executed
4098	// unconditionally. We denote all involved instructions except the condition
4099	// as "bonus instructions", and only allow this transformation when the
4100	// number of the bonus instructions we'll need to create when cloning into
4101	// each predecessor does not exceed a certain threshold.
4102	unsigned NumBonusInsts = `0`;
4103	bool SawVectorOp = false;
4104	const unsigned PredCount = Preds.size();
4105	for (Instruction &I : *BB) {
4106	// Don't check the branch condition comparison itself.
4107	if (&I == Cond)
4108	continue;
4109	// Ignore the terminator.
4110	if (isa<BranchInst>(Val: I))
4111	continue;
4112	// I must be safe to execute unconditionally.
4113	if (!isSafeToSpeculativelyExecute(I: &I))
4114	return false;
4115	SawVectorOp \|= isVectorOp(I);
4116
4117	// Account for the cost of duplicating this instruction into each
4118	// predecessor. Ignore free instructions.
4119	if (!TTI \|\| TTI->getInstructionCost(U: &I, CostKind) !=
4120	TargetTransformInfo::TCC_Free) {
4121	NumBonusInsts += PredCount;
4122
4123	// Early exits once we reach the limit.
4124	if (NumBonusInsts >
4125	BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4126	return false;
4127	}
4128
4129	auto IsBCSSAUse = [BB, &I](Use &U) {
4130	auto *UI = cast<Instruction>(Val: U.getUser());
4131	if (auto *PN = dyn_cast<PHINode>(Val: UI))
4132	return PN->getIncomingBlock(U) == BB;
4133	return UI->getParent() == BB && I.comesBefore(Other: UI);
4134	};
4135
4136	// Does this instruction require rewriting of uses?
4137	if (!all_of(Range: I.uses(), P: IsBCSSAUse))
4138	return false;
4139	}
4140	if (NumBonusInsts >
4141	BonusInstThreshold *
4142	(SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : `1`))
4143	return false;
4144
4145	// Ok, we have the budget. Perform the transformation.
4146	for (BasicBlock *PredBlock : Preds) {
4147	auto *PBI = cast<BranchInst>(Val: PredBlock->getTerminator());
4148	return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4149	}
4150	return false;
4151	}
4152
4153	// If there is only one store in BB1 and BB2, return it, otherwise return
4154	// nullptr.
4155	static StoreInst findUniqueStoreInBlocks(BasicBlock BB1, BasicBlock *BB2) {
4156	StoreInst S = nullptr*;
4157	for (auto *BB : {BB1, BB2}) {
4158	if (!BB)
4159	continue;
4160	for (auto &I : *BB)
4161	if (auto *SI = dyn_cast<StoreInst>(Val: &I)) {
4162	if (S)
4163	// Multiple stores seen.
4164	return nullptr;
4165	else
4166	S = SI;
4167	}
4168	}
4169	return S;
4170	}
4171
4172	static Value ensureValueAvailableInSuccessor(Value V, BasicBlock *BB,
4173	Value AlternativeV = nullptr*) {
4174	// PHI is going to be a PHI node that allows the value V that is defined in
4175	// BB to be referenced in BB's only successor.
4176	//
4177	// If AlternativeV is nullptr, the only value we care about in PHI is V. It
4178	// doesn't matter to us what the other operand is (it'll never get used). We
4179	// could just create a new PHI with an undef incoming value, but that could
4180	// increase register pressure if EarlyCSE/InstCombine can't fold it with some
4181	// other PHI. So here we directly look for some PHI in BB's successor with V
4182	// as an incoming operand. If we find one, we use it, else we create a new
4183	// one.
4184	//
4185	// If AlternativeV is not nullptr, we care about both incoming values in PHI.
4186	// PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4187	// where OtherBB is the single other predecessor of BB's only successor.
4188	PHINode PHI = nullptr*;
4189	BasicBlock *Succ = BB->getSingleSuccessor();
4190
4191	for (auto I = Succ->begin(); isa<PHINode>(Val: I); ++I)
4192	if (cast<PHINode>(Val&: I)->getIncomingValueForBlock(BB) == V) {
4193	PHI = cast<PHINode>(Val&: I);
4194	if (!AlternativeV)
4195	break;
4196
4197	assert(Succ->hasNPredecessors(`2`));
4198	auto PredI = pred_begin(BB: Succ);
4199	BasicBlock OtherPredBB = PredI == BB ? ++PredI : PredI;
4200	if (PHI->getIncomingValueForBlock(BB: OtherPredBB) == AlternativeV)
4201	break;
4202	PHI = nullptr;
4203	}
4204	if (PHI)
4205	return PHI;
4206
4207	// If V is not an instruction defined in BB, just return it.
4208	if (!AlternativeV &&
4209	(!isa<Instruction>(Val: V) \|\| cast<Instruction>(Val: V)->getParent() != BB))
4210	return V;
4211
4212	PHI = PHINode::Create(Ty: V->getType(), NumReservedValues: `2`, NameStr: "simplifycfg.merge");
4213	PHI->insertBefore(InsertPos: Succ->begin());
4214	PHI->addIncoming(V, BB);
4215	for (BasicBlock *PredBB : predecessors(BB: Succ))
4216	if (PredBB != BB)
4217	PHI->addIncoming(
4218	V: AlternativeV ? AlternativeV : PoisonValue::get(T: V->getType()), BB: PredBB);
4219	return PHI;
4220	}
4221
4222	static bool mergeConditionalStoreToAddress(
4223	BasicBlock PTB, BasicBlock PFB, BasicBlock QTB, BasicBlock QFB,
4224	BasicBlock PostBB, Value Address, bool InvertPCond, bool InvertQCond,
4225	DomTreeUpdater DTU, const* DataLayout &DL, const TargetTransformInfo &TTI) {
4226	// For every pointer, there must be exactly two stores, one coming from
4227	// PTB or PFB, and the other from QTB or QFB. We don't support more than one
4228	// store (to any address) in PTB,PFB or QTB,QFB.
4229	// FIXME: We could relax this restriction with a bit more work and performance
4230	// testing.
4231	StoreInst *PStore = findUniqueStoreInBlocks(BB1: PTB, BB2: PFB);
4232	StoreInst *QStore = findUniqueStoreInBlocks(BB1: QTB, BB2: QFB);
4233	if (!PStore \|\| !QStore)
4234	return false;
4235
4236	// Now check the stores are compatible.
4237	if (!QStore->isUnordered() \|\| !PStore->isUnordered() \|\|
4238	PStore->getValueOperand()->getType() !=
4239	QStore->getValueOperand()->getType())
4240	return false;
4241
4242	// Check that sinking the store won't cause program behavior changes. Sinking
4243	// the store out of the Q blocks won't change any behavior as we're sinking
4244	// from a block to its unconditional successor. But we're moving a store from
4245	// the P blocks down through the middle block (QBI) and past both QFB and QTB.
4246	// So we need to check that there are no aliasing loads or stores in
4247	// QBI, QTB and QFB. We also need to check there are no conflicting memory
4248	// operations between PStore and the end of its parent block.
4249	//
4250	// The ideal way to do this is to query AliasAnalysis, but we don't
4251	// preserve AA currently so that is dangerous. Be super safe and just
4252	// check there are no other memory operations at all.
4253	for (auto &I : *QFB->getSinglePredecessor())
4254	if (I.mayReadOrWriteMemory())
4255	return false;
4256	for (auto &I : *QFB)
4257	if (&I != QStore && I.mayReadOrWriteMemory())
4258	return false;
4259	if (QTB)
4260	for (auto &I : *QTB)
4261	if (&I != QStore && I.mayReadOrWriteMemory())
4262	return false;
4263	for (auto I = BasicBlock::iterator (PStore), E = PStore->getParent()->end();
4264	I != E; ++I)
4265	if (&*I != PStore && I ->mayReadOrWriteMemory())
4266	return false;
4267
4268	// If we're not in aggressive mode, we only optimize if we have some
4269	// confidence that by optimizing we'll allow P and/or Q to be if-converted.
4270	auto IsWorthwhile = [&](BasicBlock BB, ArrayRef<StoreInst > FreeStores) {
4271	if (!BB)
4272	return true;
4273	// Heuristic: if the block can be if-converted/phi-folded and the
4274	// instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4275	// thread this store.
4276	InstructionCost Cost = `0`;
4277	InstructionCost Budget =
4278	PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
4279	for (auto &I : BB->instructionsWithoutDebug(SkipPseudoOp: false)) {
4280	// Consider terminator instruction to be free.
4281	if (I.isTerminator())
4282	continue;
4283	// If this is one the stores that we want to speculate out of this BB,
4284	// then don't count it's cost, consider it to be free.
4285	if (auto *S = dyn_cast<StoreInst>(Val: &I))
4286	if (llvm::find(Range&: FreeStores, Val: S))
4287	continue;
4288	// Else, we have a white-list of instructions that we are ak speculating.
4289	if (!isa<BinaryOperator>(Val: I) && !isa<GetElementPtrInst>(Val: I))
4290	return false; // Not in white-list - not worthwhile folding.
4291	// And finally, if this is a non-free instruction that we are okay
4292	// speculating, ensure that we consider the speculation budget.
4293	Cost +=
4294	TTI.getInstructionCost(U: &I, CostKind: TargetTransformInfo::TCK_SizeAndLatency);
4295	if (Cost > Budget)
4296	return false; // Eagerly refuse to fold as soon as we're out of budget.
4297	}
4298	assert(Cost <= Budget &&
4299	"When we run out of budget we will eagerly return from within the "
4300	"per-instruction loop.");
4301	return true;
4302	};
4303
4304	const std::array<StoreInst *, `2`> FreeStores = {PStore, QStore};
4305	if (!MergeCondStoresAggressively &&
4306	(!IsWorthwhile (PTB, FreeStores) \|\| !IsWorthwhile (PFB, FreeStores) \|\|
4307	!IsWorthwhile (QTB, FreeStores) \|\| !IsWorthwhile (QFB, FreeStores)))
4308	return false;
4309
4310	// If PostBB has more than two predecessors, we need to split it so we can
4311	// sink the store.
4312	if (std::next(x: pred_begin(BB: PostBB), n: `2`) != pred_end(BB: PostBB)) {
4313	// We know that QFB's only successor is PostBB. And QFB has a single
4314	// predecessor. If QTB exists, then its only successor is also PostBB.
4315	// If QTB does not exist, then QFB's only predecessor has a conditional
4316	// branch to QFB and PostBB.
4317	BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4318	BasicBlock *NewBB =
4319	SplitBlockPredecessors(BB: PostBB, Preds: {QFB, TruePred}, Suffix: "condstore.split", DTU);
4320	if (!NewBB)
4321	return false;
4322	PostBB = NewBB;
4323	}
4324
4325	// OK, we're going to sink the stores to PostBB. The store has to be
4326	// conditional though, so first create the predicate.
4327	Value *PCond = cast<BranchInst>(Val: PFB->getSinglePredecessor()->getTerminator())
4328	->getCondition();
4329	Value *QCond = cast<BranchInst>(Val: QFB->getSinglePredecessor()->getTerminator())
4330	->getCondition();
4331
4332	Value *PPHI = ensureValueAvailableInSuccessor(V: PStore->getValueOperand(),
4333	BB: PStore->getParent());
4334	Value *QPHI = ensureValueAvailableInSuccessor(V: QStore->getValueOperand(),
4335	BB: QStore->getParent(), AlternativeV: PPHI);
4336
4337	BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4338	IRBuilder<> QB(PostBB, PostBBFirst);
4339	QB.SetCurrentDebugLocation(PostBBFirst ->getStableDebugLoc());
4340
4341	Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(V: PCond);
4342	Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(V: QCond);
4343
4344	if (InvertPCond)
4345	PPred = QB.CreateNot(V: PPred);
4346	if (InvertQCond)
4347	QPred = QB.CreateNot(V: QPred);
4348	Value *CombinedPred = QB.CreateOr(LHS: PPred, RHS: QPred);
4349
4350	BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4351	auto *T = SplitBlockAndInsertIfThen(Cond: CombinedPred, SplitBefore: InsertPt,
4352	/Unreachable=/false,
4353	/BranchWeights=/nullptr, DTU);
4354
4355	QB.SetInsertPoint(T);
4356	StoreInst *SI = cast<StoreInst>(Val: QB.CreateStore(Val: QPHI, Ptr: Address));
4357	SI->setAAMetadata(PStore->getAAMetadata().merge(Other: QStore->getAAMetadata()));
4358	// Choose the minimum alignment. If we could prove both stores execute, we
4359	// could use biggest one. In this case, though, we only know that one of the
4360	// stores executes. And we don't know it's safe to take the alignment from a
4361	// store that doesn't execute.
4362	SI->setAlignment(std::min(a: PStore->getAlign(), b: QStore->getAlign()));
4363
4364	QStore->eraseFromParent();
4365	PStore->eraseFromParent();
4366
4367	return true;
4368	}
4369
4370	static bool mergeConditionalStores(BranchInst PBI, BranchInst QBI,
4371	DomTreeUpdater DTU, const* DataLayout &DL,
4372	const TargetTransformInfo &TTI) {
4373	// The intention here is to find diamonds or triangles (see below) where each
4374	// conditional block contains a store to the same address. Both of these
4375	// stores are conditional, so they can't be unconditionally sunk. But it may
4376	// be profitable to speculatively sink the stores into one merged store at the
4377	// end, and predicate the merged store on the union of the two conditions of
4378	// PBI and QBI.
4379	//
4380	// This can reduce the number of stores executed if both of the conditions are
4381	// true, and can allow the blocks to become small enough to be if-converted.
4382	// This optimization will also chain, so that ladders of test-and-set
4383	// sequences can be if-converted away.
4384	//
4385	// We only deal with simple diamonds or triangles:
4386	//
4387	// PBI or PBI or a combination of the two
4388	// / \ \| \
4389	// PTB PFB \| PFB
4390	// \ / \| /
4391	// QBI QBI
4392	// / \ \| \
4393	// QTB QFB \| QFB
4394	// \ / \| /
4395	// PostBB PostBB
4396	//
4397	// We model triangles as a type of diamond with a nullptr "true" block.
4398	// Triangles are canonicalized so that the fallthrough edge is represented by
4399	// a true condition, as in the diagram above.
4400	BasicBlock *PTB = PBI->getSuccessor(i: `0`);
4401	BasicBlock *PFB = PBI->getSuccessor(i: `1`);
4402	BasicBlock *QTB = QBI->getSuccessor(i: `0`);
4403	BasicBlock *QFB = QBI->getSuccessor(i: `1`);
4404	BasicBlock *PostBB = QFB->getSingleSuccessor();
4405
4406	// Make sure we have a good guess for PostBB. If QTB's only successor is
4407	// QFB, then QFB is a better PostBB.
4408	if (QTB->getSingleSuccessor() == QFB)
4409	PostBB = QFB;
4410
4411	// If we couldn't find a good PostBB, stop.
4412	if (!PostBB)
4413	return false;
4414
4415	bool InvertPCond = false, InvertQCond = false;
4416	// Canonicalize fallthroughs to the true branches.
4417	if (PFB == QBI->getParent()) {
4418	std::swap(a&: PFB, b&: PTB);
4419	InvertPCond = true;
4420	}
4421	if (QFB == PostBB) {
4422	std::swap(a&: QFB, b&: QTB);
4423	InvertQCond = true;
4424	}
4425
4426	// From this point on we can assume PTB or QTB may be fallthroughs but PFB
4427	// and QFB may not. Model fallthroughs as a nullptr block.
4428	if (PTB == QBI->getParent())
4429	PTB = nullptr;
4430	if (QTB == PostBB)
4431	QTB = nullptr;
4432
4433	// Legality bailouts. We must have at least the non-fallthrough blocks and
4434	// the post-dominating block, and the non-fallthroughs must only have one
4435	// predecessor.
4436	auto HasOnePredAndOneSucc = [](BasicBlock BB, BasicBlock P, BasicBlock *S) {
4437	return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4438	};
4439	if (!HasOnePredAndOneSucc (PFB, PBI->getParent(), QBI->getParent()) \|\|
4440	!HasOnePredAndOneSucc (QFB, QBI->getParent(), PostBB))
4441	return false;
4442	if ((PTB && !HasOnePredAndOneSucc (PTB, PBI->getParent(), QBI->getParent())) \|\|
4443	(QTB && !HasOnePredAndOneSucc (QTB, QBI->getParent(), PostBB)))
4444	return false;
4445	if (!QBI->getParent()->hasNUses(N: `2`))
4446	return false;
4447
4448	// OK, this is a sequence of two diamonds or triangles.
4449	// Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4450	SmallPtrSet<Value *, `4`> PStoreAddresses, QStoreAddresses;
4451	for (auto *BB : {PTB, PFB}) {
4452	if (!BB)
4453	continue;
4454	for (auto &I : *BB)
4455	if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I))
4456	PStoreAddresses.insert(Ptr: SI->getPointerOperand());
4457	}
4458	for (auto *BB : {QTB, QFB}) {
4459	if (!BB)
4460	continue;
4461	for (auto &I : *BB)
4462	if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I))
4463	QStoreAddresses.insert(Ptr: SI->getPointerOperand());
4464	}
4465
4466	set_intersect(S1&: PStoreAddresses, S2: QStoreAddresses);
4467	// set_intersect mutates PStoreAddresses in place. Rename it here to make it
4468	// clear what it contains.
4469	auto &CommonAddresses = PStoreAddresses;
4470
4471	bool Changed = false;
4472	for (auto *Address : CommonAddresses)
4473	Changed \|=
4474	mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4475	InvertPCond, InvertQCond, DTU, DL, TTI);
4476	return Changed;
4477	}
4478
4479	/// If the previous block ended with a widenable branch, determine if reusing
4480	/// the target block is profitable and legal. This will have the effect of
4481	/// "widening" PBI, but doesn't require us to reason about hosting safety.
4482	static bool tryWidenCondBranchToCondBranch(BranchInst PBI, BranchInst BI,
4483	DomTreeUpdater *DTU) {
4484	// TODO: This can be generalized in two important ways:
4485	// 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4486	// values from the PBI edge.
4487	// 2) We can sink side effecting instructions into BI's fallthrough
4488	// successor provided they doesn't contribute to computation of
4489	// BI's condition.
4490	BasicBlock *IfTrueBB = PBI->getSuccessor(i: `0`);
4491	BasicBlock *IfFalseBB = PBI->getSuccessor(i: `1`);
4492	if (!isWidenableBranch(U: PBI) \|\| IfTrueBB != BI->getParent() \|\|
4493	!BI->getParent()->getSinglePredecessor())
4494	return false;
4495	if (!IfFalseBB->phis().empty())
4496	return false; // TODO
4497	// This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4498	// may undo the transform done here.
4499	// TODO: There might be a more fine-grained solution to this.
4500	if (!llvm::succ_empty(BB: IfFalseBB))
4501	return false;
4502	// Use lambda to lazily compute expensive condition after cheap ones.
4503	auto NoSideEffects = [](BasicBlock &BB) {
4504	return llvm::none_of(Range&: BB, P: [](const Instruction &I) {
4505	return I.mayWriteToMemory() \|\| I.mayHaveSideEffects();
4506	});
4507	};
4508	if (BI->getSuccessor(i: `1`) != IfFalseBB && // no inf looping
4509	BI->getSuccessor(i: `1`)->getTerminatingDeoptimizeCall() && // profitability
4510	NoSideEffects (*BI->getParent())) {
4511	auto *OldSuccessor = BI->getSuccessor(i: `1`);
4512	OldSuccessor->removePredecessor(Pred: BI->getParent());
4513	BI->setSuccessor(idx: `1`, NewSucc: IfFalseBB);
4514	if (DTU)
4515	DTU->applyUpdates(
4516	Updates: {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4517	{DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4518	return true;
4519	}
4520	if (BI->getSuccessor(i: `0`) != IfFalseBB && // no inf looping
4521	BI->getSuccessor(i: `0`)->getTerminatingDeoptimizeCall() && // profitability
4522	NoSideEffects (*BI->getParent())) {
4523	auto *OldSuccessor = BI->getSuccessor(i: `0`);
4524	OldSuccessor->removePredecessor(Pred: BI->getParent());
4525	BI->setSuccessor(idx: `0`, NewSucc: IfFalseBB);
4526	if (DTU)
4527	DTU->applyUpdates(
4528	Updates: {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4529	{DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4530	return true;
4531	}
4532	return false;
4533	}
4534
4535	/// If we have a conditional branch as a predecessor of another block,
4536	/// this function tries to simplify it. We know
4537	/// that PBI and BI are both conditional branches, and BI is in one of the
4538	/// successor blocks of PBI - PBI branches to BI.
4539	static bool SimplifyCondBranchToCondBranch(BranchInst PBI, BranchInst BI,
4540	DomTreeUpdater *DTU,
4541	const DataLayout &DL,
4542	const TargetTransformInfo &TTI) {
4543	assert(PBI->isConditional() && BI->isConditional());
4544	BasicBlock *BB = BI->getParent();
4545
4546	// If this block ends with a branch instruction, and if there is a
4547	// predecessor that ends on a branch of the same condition, make
4548	// this conditional branch redundant.
4549	if (PBI->getCondition() == BI->getCondition() &&
4550	PBI->getSuccessor(i: `0`) != PBI->getSuccessor(i: `1`)) {
4551	// Okay, the outcome of this conditional branch is statically
4552	// knowable. If this block had a single pred, handle specially, otherwise
4553	// foldCondBranchOnValueKnownInPredecessor() will handle it.
4554	if (BB->getSinglePredecessor()) {
4555	// Turn this into a branch on constant.
4556	bool CondIsTrue = PBI->getSuccessor(i: `0`) == BB;
4557	BI->setCondition(
4558	ConstantInt::get(Ty: Type::getInt1Ty(C&: BB->getContext()), V: CondIsTrue));
4559	return true; // Nuke the branch on constant.
4560	}
4561	}
4562
4563	// If the previous block ended with a widenable branch, determine if reusing
4564	// the target block is profitable and legal. This will have the effect of
4565	// "widening" PBI, but doesn't require us to reason about hosting safety.
4566	if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4567	return true;
4568
4569	// If both branches are conditional and both contain stores to the same
4570	// address, remove the stores from the conditionals and create a conditional
4571	// merged store at the end.
4572	if (MergeCondStores && mergeConditionalStores(PBI, QBI: BI, DTU, DL, TTI))
4573	return true;
4574
4575	// If this is a conditional branch in an empty block, and if any
4576	// predecessors are a conditional branch to one of our destinations,
4577	// fold the conditions into logical ops and one cond br.
4578
4579	// Ignore dbg intrinsics.
4580	if (&BB->instructionsWithoutDebug(SkipPseudoOp: false*).begin() != BI)
4581	return false;
4582
4583	int PBIOp, BIOp;
4584	if (PBI->getSuccessor(i: `0`) == BI->getSuccessor(i: `0`)) {
4585	PBIOp = `0`;
4586	BIOp = `0`;
4587	} else if (PBI->getSuccessor(i: `0`) == BI->getSuccessor(i: `1`)) {
4588	PBIOp = `0`;
4589	BIOp = `1`;
4590	} else if (PBI->getSuccessor(i: `1`) == BI->getSuccessor(i: `0`)) {
4591	PBIOp = `1`;
4592	BIOp = `0`;
4593	} else if (PBI->getSuccessor(i: `1`) == BI->getSuccessor(i: `1`)) {
4594	PBIOp = `1`;
4595	BIOp = `1`;
4596	} else {
4597	return false;
4598	}
4599
4600	// Check to make sure that the other destination of this branch
4601	// isn't BB itself. If so, this is an infinite loop that will
4602	// keep getting unwound.
4603	if (PBI->getSuccessor(i: PBIOp) == BB)
4604	return false;
4605
4606	// If predecessor's branch probability to BB is too low don't merge branches.
4607	SmallVector<uint32_t, `2`> PredWeights;
4608	if (!PBI->getMetadata(KindID: LLVMContext::MD_unpredictable) &&
4609	extractBranchWeights(I: *PBI, Weights&: PredWeights) &&
4610	(static_cast<uint64_t>(PredWeights [`0`]) + PredWeights [`1`]) != `0`) {
4611
4612	BranchProbability CommonDestProb = BranchProbability::getBranchProbability(
4613	Numerator: PredWeights [PBIOp],
4614	Denominator: static_cast<uint64_t>(PredWeights [`0`]) + PredWeights [`1`]);
4615
4616	BranchProbability Likely = TTI.getPredictableBranchThreshold();
4617	if (CommonDestProb >= Likely)
4618	return false;
4619	}
4620
4621	// Do not perform this transformation if it would require
4622	// insertion of a large number of select instructions. For targets
4623	// without predication/cmovs, this is a big pessimization.
4624
4625	BasicBlock *CommonDest = PBI->getSuccessor(i: PBIOp);
4626	BasicBlock *RemovedDest = PBI->getSuccessor(i: PBIOp ^ `1`);
4627	unsigned NumPhis = `0`;
4628	for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(Val: II);
4629	++II, ++NumPhis) {
4630	if (NumPhis > `2`) // Disable this xform.
4631	return false;
4632	}
4633
4634	// Finally, if everything is ok, fold the branches to logical ops.
4635	BasicBlock *OtherDest = BI->getSuccessor(i: BIOp ^ `1`);
4636
4637	LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4638	<< "AND: " << *BI->getParent());
4639
4640	SmallVector<DominatorTree::UpdateType, `5`> Updates;
4641
4642	// If OtherDest is* BB, then BB is a basic block with a single conditional*
4643	// branch in it, where one edge (OtherDest) goes back to itself but the other
4644	// exits. We don't know* that the program avoids the infinite loop*
4645	// (even though that seems likely). If we do this xform naively, we'll end up
4646	// recursively unpeeling the loop. Since we know that (after the xform is
4647	// done) that the block is* infinite if reached, we just make it an obviously*
4648	// infinite loop with no cond branch.
4649	if (OtherDest == BB) {
4650	// Insert it at the end of the function, because it's either code,
4651	// or it won't matter if it's hot. :)
4652	BasicBlock *InfLoopBlock =
4653	BasicBlock::Create(Context&: BB->getContext(), Name: "infloop", Parent: BB->getParent());
4654	BranchInst::Create(IfTrue: InfLoopBlock, InsertBefore: InfLoopBlock);
4655	if (DTU)
4656	Updates.push_back(Elt: {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4657	OtherDest = InfLoopBlock;
4658	}
4659
4660	LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4661
4662	// BI may have other predecessors. Because of this, we leave
4663	// it alone, but modify PBI.
4664
4665	// Make sure we get to CommonDest on True&True directions.
4666	Value *PBICond = PBI->getCondition();
4667	IRBuilder<NoFolder> Builder(PBI);
4668	if (PBIOp)
4669	PBICond = Builder.CreateNot(V: PBICond, Name: PBICond->getName() + ".not");
4670
4671	Value *BICond = BI->getCondition();
4672	if (BIOp)
4673	BICond = Builder.CreateNot(V: BICond, Name: BICond->getName() + ".not");
4674
4675	// Merge the conditions.
4676	Value *Cond =
4677	createLogicalOp(Builder, Opc: Instruction::Or, LHS: PBICond, RHS: BICond, Name: "brmerge");
4678
4679	// Modify PBI to branch on the new condition to the new dests.
4680	PBI->setCondition(Cond);
4681	PBI->setSuccessor(idx: `0`, NewSucc: CommonDest);
4682	PBI->setSuccessor(idx: `1`, NewSucc: OtherDest);
4683
4684	if (DTU) {
4685	Updates.push_back(Elt: {DominatorTree::Insert, PBI->getParent(), OtherDest});
4686	Updates.push_back(Elt: {DominatorTree::Delete, PBI->getParent(), RemovedDest});
4687
4688	DTU->applyUpdates(Updates);
4689	}
4690
4691	// Update branch weight for PBI.
4692	uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4693	uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4694	bool HasWeights =
4695	extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4696	SuccTrueWeight, SuccFalseWeight);
4697	if (HasWeights) {
4698	PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4699	PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4700	SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4701	SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4702	// The weight to CommonDest should be PredCommon SuccTotal +*
4703	// PredOther SuccCommon.*
4704	// The weight to OtherDest should be PredOther SuccOther.*
4705	uint64_t NewWeights[`2`] = {PredCommon * (SuccCommon + SuccOther) +
4706	PredOther * SuccCommon,
4707	PredOther * SuccOther};
4708	// Halve the weights if any of them cannot fit in an uint32_t
4709	fitWeights(Weights: NewWeights);
4710
4711	setBranchWeights(I: PBI, TrueWeight: NewWeights[`0`], FalseWeight: NewWeights[`1`], /IsExpected=/false);
4712	}
4713
4714	// OtherDest may have phi nodes. If so, add an entry from PBI's
4715	// block that are identical to the entries for BI's block.
4716	addPredecessorToBlock(Succ: OtherDest, NewPred: PBI->getParent(), ExistPred: BB);
4717
4718	// We know that the CommonDest already had an edge from PBI to
4719	// it. If it has PHIs though, the PHIs may have different
4720	// entries for BB and PBI's BB. If so, insert a select to make
4721	// them agree.
4722	for (PHINode &PN : CommonDest->phis()) {
4723	Value *BIV = PN.getIncomingValueForBlock(BB);
4724	unsigned PBBIdx = PN.getBasicBlockIndex(BB: PBI->getParent());
4725	Value *PBIV = PN.getIncomingValue(i: PBBIdx);
4726	if (BIV != PBIV) {
4727	// Insert a select in PBI to pick the right value.
4728	SelectInst *NV = cast<SelectInst>(
4729	Val: Builder.CreateSelect(C: PBICond, True: PBIV, False: BIV, Name: PBIV->getName() + ".mux"));
4730	PN.setIncomingValue(i: PBBIdx, V: NV);
4731	// Although the select has the same condition as PBI, the original branch
4732	// weights for PBI do not apply to the new select because the select's
4733	// 'logical' edges are incoming edges of the phi that is eliminated, not
4734	// the outgoing edges of PBI.
4735	if (HasWeights) {
4736	uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4737	uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4738	uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4739	uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4740	// The weight to PredCommonDest should be PredCommon SuccTotal.*
4741	// The weight to PredOtherDest should be PredOther SuccCommon.*
4742	uint64_t NewWeights[`2`] = {PredCommon * (SuccCommon + SuccOther),
4743	PredOther * SuccCommon};
4744
4745	fitWeights(Weights: NewWeights);
4746
4747	setBranchWeights(I: NV, TrueWeight: NewWeights[`0`], FalseWeight: NewWeights[`1`],
4748	/IsExpected=/false);
4749	}
4750	}
4751	}
4752
4753	LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4754	LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4755
4756	// This basic block is probably dead. We know it has at least
4757	// one fewer predecessor.
4758	return true;
4759	}
4760
4761	// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4762	// true or to FalseBB if Cond is false.
4763	// Takes care of updating the successors and removing the old terminator.
4764	// Also makes sure not to introduce new successors by assuming that edges to
4765	// non-successor TrueBBs and FalseBBs aren't reachable.
4766	bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4767	Value Cond, BasicBlock TrueBB,
4768	BasicBlock *FalseBB,
4769	uint32_t TrueWeight,
4770	uint32_t FalseWeight) {
4771	auto *BB = OldTerm->getParent();
4772	// Remove any superfluous successor edges from the CFG.
4773	// First, figure out which successors to preserve.
4774	// If TrueBB and FalseBB are equal, only try to preserve one copy of that
4775	// successor.
4776	BasicBlock *KeepEdge1 = TrueBB;
4777	BasicBlock KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr*;
4778
4779	SmallSetVector<BasicBlock *, `2`> RemovedSuccessors;
4780
4781	// Then remove the rest.
4782	for (BasicBlock *Succ : successors(I: OldTerm)) {
4783	// Make sure only to keep exactly one copy of each edge.
4784	if (Succ == KeepEdge1)
4785	KeepEdge1 = nullptr;
4786	else if (Succ == KeepEdge2)
4787	KeepEdge2 = nullptr;
4788	else {
4789	Succ->removePredecessor(Pred: BB,
4790	/KeepOneInputPHIs=/true);
4791
4792	if (Succ != TrueBB && Succ != FalseBB)
4793	RemovedSuccessors.insert(X: Succ);
4794	}
4795	}
4796
4797	IRBuilder<> Builder(OldTerm);
4798	Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4799
4800	// Insert an appropriate new terminator.
4801	if (!KeepEdge1 && !KeepEdge2) {
4802	if (TrueBB == FalseBB) {
4803	// We were only looking for one successor, and it was present.
4804	// Create an unconditional branch to it.
4805	Builder.CreateBr(Dest: TrueBB);
4806	} else {
4807	// We found both of the successors we were looking for.
4808	// Create a conditional branch sharing the condition of the select.
4809	BranchInst *NewBI = Builder.CreateCondBr(Cond, True: TrueBB, False: FalseBB);
4810	if (TrueWeight != FalseWeight)
4811	setBranchWeights(I: NewBI, TrueWeight, FalseWeight, /IsExpected=/false);
4812	}
4813	} else if (KeepEdge1 && (KeepEdge2 \|\| TrueBB == FalseBB)) {
4814	// Neither of the selected blocks were successors, so this
4815	// terminator must be unreachable.
4816	new UnreachableInst (OldTerm->getContext(), OldTerm->getIterator());
4817	} else {
4818	// One of the selected values was a successor, but the other wasn't.
4819	// Insert an unconditional branch to the one that was found;
4820	// the edge to the one that wasn't must be unreachable.
4821	if (!KeepEdge1) {
4822	// Only TrueBB was found.
4823	Builder.CreateBr(Dest: TrueBB);
4824	} else {
4825	// Only FalseBB was found.
4826	Builder.CreateBr(Dest: FalseBB);
4827	}
4828	}
4829
4830	eraseTerminatorAndDCECond(TI: OldTerm);
4831
4832	if (DTU) {
4833	SmallVector<DominatorTree::UpdateType, `2`> Updates;
4834	Updates.reserve(N: RemovedSuccessors.size());
4835	for (auto *RemovedSuccessor : RemovedSuccessors)
4836	Updates.push_back(Elt: {DominatorTree::Delete, BB, RemovedSuccessor});
4837	DTU->applyUpdates(Updates);
4838	}
4839
4840	return true;
4841	}
4842
4843	// Replaces
4844	// (switch (select cond, X, Y)) on constant X, Y
4845	// with a branch - conditional if X and Y lead to distinct BBs,
4846	// unconditional otherwise.
4847	bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4848	SelectInst *Select) {
4849	// Check for constant integer values in the select.
4850	ConstantInt *TrueVal = dyn_cast<ConstantInt>(Val: Select->getTrueValue());
4851	ConstantInt *FalseVal = dyn_cast<ConstantInt>(Val: Select->getFalseValue());
4852	if (!TrueVal \|\| !FalseVal)
4853	return false;
4854
4855	// Find the relevant condition and destinations.
4856	Value *Condition = Select->getCondition();
4857	BasicBlock *TrueBB = SI->findCaseValue(C: TrueVal)->getCaseSuccessor();
4858	BasicBlock *FalseBB = SI->findCaseValue(C: FalseVal)->getCaseSuccessor();
4859
4860	// Get weight for TrueBB and FalseBB.
4861	uint32_t TrueWeight = `0`, FalseWeight = `0`;
4862	SmallVector<uint64_t, `8`> Weights;
4863	bool HasWeights = hasBranchWeightMD(I: *SI);
4864	if (HasWeights) {
4865	getBranchWeights(TI: SI, Weights);
4866	if (Weights.size() == `1` + SI->getNumCases()) {
4867	TrueWeight =
4868	(uint32_t)Weights [SI->findCaseValue(C: TrueVal)->getSuccessorIndex()];
4869	FalseWeight =
4870	(uint32_t)Weights [SI->findCaseValue(C: FalseVal)->getSuccessorIndex()];
4871	}
4872	}
4873
4874	// Perform the actual simplification.
4875	return simplifyTerminatorOnSelect(OldTerm: SI, Cond: Condition, TrueBB, FalseBB, TrueWeight,
4876	FalseWeight);
4877	}
4878
4879	// Replaces
4880	// (indirectbr (select cond, blockaddress(@fn, BlockA),
4881	// blockaddress(@fn, BlockB)))
4882	// with
4883	// (br cond, BlockA, BlockB).
4884	bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4885	SelectInst *SI) {
4886	// Check that both operands of the select are block addresses.
4887	BlockAddress *TBA = dyn_cast<BlockAddress>(Val: SI->getTrueValue());
4888	BlockAddress *FBA = dyn_cast<BlockAddress>(Val: SI->getFalseValue());
4889	if (!TBA \|\| !FBA)
4890	return false;
4891
4892	// Extract the actual blocks.
4893	BasicBlock *TrueBB = TBA->getBasicBlock();
4894	BasicBlock *FalseBB = FBA->getBasicBlock();
4895
4896	// Perform the actual simplification.
4897	return simplifyTerminatorOnSelect(OldTerm: IBI, Cond: SI->getCondition(), TrueBB, FalseBB, TrueWeight: `0`,
4898	FalseWeight: `0`);
4899	}
4900
4901	/// This is called when we find an icmp instruction
4902	/// (a seteq/setne with a constant) as the only instruction in a
4903	/// block that ends with an uncond branch. We are looking for a very specific
4904	/// pattern that occurs when "A == 1 \|\| A == 2 \|\| A == 3" gets simplified. In
4905	/// this case, we merge the first two "or's of icmp" into a switch, but then the
4906	/// default value goes to an uncond block with a seteq in it, we get something
4907	/// like:
4908	///
4909	/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
4910	/// DEFAULT:
4911	/// %tmp = icmp eq i8 %A, 92
4912	/// br label %end
4913	/// end:
4914	/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
4915	///
4916	/// We prefer to split the edge to 'end' so that there is a true/false entry to
4917	/// the PHI, merging the third icmp into the switch.
4918	bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
4919	ICmpInst *ICI, IRBuilder<> &Builder) {
4920	BasicBlock *BB = ICI->getParent();
4921
4922	// If the block has any PHIs in it or the icmp has multiple uses, it is too
4923	// complex.
4924	if (isa<PHINode>(Val: BB->begin()) \|\| !ICI->hasOneUse())
4925	return false;
4926
4927	Value *V = ICI->getOperand(i_nocapture: `0`);
4928	ConstantInt *Cst = cast<ConstantInt>(Val: ICI->getOperand(i_nocapture: `1`));
4929
4930	// The pattern we're looking for is where our only predecessor is a switch on
4931	// 'V' and this block is the default case for the switch. In this case we can
4932	// fold the compared value into the switch to simplify things.
4933	BasicBlock *Pred = BB->getSinglePredecessor();
4934	if (!Pred \|\| !isa<SwitchInst>(Val: Pred->getTerminator()))
4935	return false;
4936
4937	SwitchInst *SI = cast<SwitchInst>(Val: Pred->getTerminator());
4938	if (SI->getCondition() != V)
4939	return false;
4940
4941	// If BB is reachable on a non-default case, then we simply know the value of
4942	// V in this block. Substitute it and constant fold the icmp instruction
4943	// away.
4944	if (SI->getDefaultDest() != BB) {
4945	ConstantInt *VVal = SI->findCaseDest(BB);
4946	assert(VVal && "Should have a unique destination value");
4947	ICI->setOperand(i_nocapture: `0`, Val_nocapture: VVal);
4948
4949	if (Value *V = simplifyInstruction(I: ICI, Q: {DL, ICI})) {
4950	ICI->replaceAllUsesWith(V);
4951	ICI->eraseFromParent();
4952	}
4953	// BB is now empty, so it is likely to simplify away.
4954	return requestResimplify();
4955	}
4956
4957	// Ok, the block is reachable from the default dest. If the constant we're
4958	// comparing exists in one of the other edges, then we can constant fold ICI
4959	// and zap it.
4960	if (SI->findCaseValue(C: Cst) != SI->case_default()) {
4961	Value *V;
4962	if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4963	V = ConstantInt::getFalse(Context&: BB->getContext());
4964	else
4965	V = ConstantInt::getTrue(Context&: BB->getContext());
4966
4967	ICI->replaceAllUsesWith(V);
4968	ICI->eraseFromParent();
4969	// BB is now empty, so it is likely to simplify away.
4970	return requestResimplify();
4971	}
4972
4973	// The use of the icmp has to be in the 'end' block, by the only PHI node in
4974	// the block.
4975	BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(Idx: `0`);
4976	PHINode *PHIUse = dyn_cast<PHINode>(Val: ICI->user_back());
4977	if (PHIUse == nullptr \|\| PHIUse != &SuccBlock->front() \|\|
4978	isa<PHINode>(Val: ++BasicBlock::iterator (PHIUse)))
4979	return false;
4980
4981	// If the icmp is a SETEQ, then the default dest gets false, the new edge gets
4982	// true in the PHI.
4983	Constant *DefaultCst = ConstantInt::getTrue(Context&: BB->getContext());
4984	Constant *NewCst = ConstantInt::getFalse(Context&: BB->getContext());
4985
4986	if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4987	std::swap(a&: DefaultCst, b&: NewCst);
4988
4989	// Replace ICI (which is used by the PHI for the default value) with true or
4990	// false depending on if it is EQ or NE.
4991	ICI->replaceAllUsesWith(V: DefaultCst);
4992	ICI->eraseFromParent();
4993
4994	SmallVector<DominatorTree::UpdateType, `2`> Updates;
4995
4996	// Okay, the switch goes to this block on a default value. Add an edge from
4997	// the switch to the merge point on the compared value.
4998	BasicBlock *NewBB =
4999	BasicBlock::Create(Context&: BB->getContext(), Name: "switch.edge", Parent: BB->getParent(), InsertBefore: BB);
5000	{
5001	SwitchInstProfUpdateWrapper SIW(*SI);
5002	auto W0 = SIW.getSuccessorWeight(idx: `0`);
5003	SwitchInstProfUpdateWrapper::CaseWeightOpt NewW;
5004	if (W0) {
5005	NewW = ((uint64_t(*W0) + `1`) >> `1`);
5006	SIW.setSuccessorWeight(idx: `0`, W: *NewW);
5007	}
5008	SIW.addCase(OnVal: Cst, Dest: NewBB, W: NewW);
5009	if (DTU)
5010	Updates.push_back(Elt: {DominatorTree::Insert, Pred, NewBB});
5011	}
5012
5013	// NewBB branches to the phi block, add the uncond branch and the phi entry.
5014	Builder.SetInsertPoint(NewBB);
5015	Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5016	Builder.CreateBr(Dest: SuccBlock);
5017	PHIUse->addIncoming(V: NewCst, BB: NewBB);
5018	if (DTU) {
5019	Updates.push_back(Elt: {DominatorTree::Insert, NewBB, SuccBlock});
5020	DTU->applyUpdates(Updates);
5021	}
5022	return true;
5023	}
5024
5025	/// The specified branch is a conditional branch.
5026	/// Check to see if it is branching on an or/and chain of icmp instructions, and
5027	/// fold it into a switch instruction if so.
5028	bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
5029	IRBuilder<> &Builder,
5030	const DataLayout &DL) {
5031	Instruction *Cond = dyn_cast<Instruction>(Val: BI->getCondition());
5032	if (!Cond)
5033	return false;
5034
5035	// Change br (X == 0 \| X == 1), T, F into a switch instruction.
5036	// If this is a bunch of seteq's or'd together, or if it's a bunch of
5037	// 'setne's and'ed together, collect them.
5038
5039	// Try to gather values from a chain of and/or to be turned into a switch
5040	ConstantComparesGatherer ConstantCompare(Cond, DL);
5041	// Unpack the result
5042	SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5043	Value *CompVal = ConstantCompare.CompValue;
5044	unsigned UsedICmps = ConstantCompare.UsedICmps;
5045	Value *ExtraCase = ConstantCompare.Extra;
5046
5047	// If we didn't have a multiply compared value, fail.
5048	if (!CompVal)
5049	return false;
5050
5051	// Avoid turning single icmps into a switch.
5052	if (UsedICmps <= `1`)
5053	return false;
5054
5055	bool TrueWhenEqual = match(V: Cond, P: m_LogicalOr(L: m_Value(), R: m_Value()));
5056
5057	// There might be duplicate constants in the list, which the switch
5058	// instruction can't handle, remove them now.
5059	array_pod_sort(Start: Values.begin(), End: Values.end(), Compare: constantIntSortPredicate);
5060	Values.erase(CS: llvm::unique(R&: Values), CE: Values.end());
5061
5062	// If Extra was used, we require at least two switch values to do the
5063	// transformation. A switch with one value is just a conditional branch.
5064	if (ExtraCase && Values.size() < `2`)
5065	return false;
5066
5067	// TODO: Preserve branch weight metadata, similarly to how
5068	// foldValueComparisonIntoPredecessors preserves it.
5069
5070	// Figure out which block is which destination.
5071	BasicBlock *DefaultBB = BI->getSuccessor(i: `1`);
5072	BasicBlock *EdgeBB = BI->getSuccessor(i: `0`);
5073	if (!TrueWhenEqual)
5074	std::swap(a&: DefaultBB, b&: EdgeBB);
5075
5076	BasicBlock *BB = BI->getParent();
5077
5078	LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5079	<< " cases into SWITCH. BB is:\n"
5080	<< *BB);
5081
5082	SmallVector<DominatorTree::UpdateType, `2`> Updates;
5083
5084	// If there are any extra values that couldn't be folded into the switch
5085	// then we evaluate them with an explicit branch first. Split the block
5086	// right before the condbr to handle it.
5087	if (ExtraCase) {
5088	BasicBlock NewBB = SplitBlock(Old: BB, SplitPt: BI, DTU, /LI=/*nullptr,
5089	/MSSAU=/nullptr, BBName: "switch.early.test");
5090
5091	// Remove the uncond branch added to the old block.
5092	Instruction *OldTI = BB->getTerminator();
5093	Builder.SetInsertPoint(OldTI);
5094
5095	// There can be an unintended UB if extra values are Poison. Before the
5096	// transformation, extra values may not be evaluated according to the
5097	// condition, and it will not raise UB. But after transformation, we are
5098	// evaluating extra values before checking the condition, and it will raise
5099	// UB. It can be solved by adding freeze instruction to extra values.
5100	AssumptionCache *AC = Options.AC;
5101
5102	if (!isGuaranteedNotToBeUndefOrPoison(V: ExtraCase, AC, CtxI: BI, DT: nullptr))
5103	ExtraCase = Builder.CreateFreeze(V: ExtraCase);
5104
5105	if (TrueWhenEqual)
5106	Builder.CreateCondBr(Cond: ExtraCase, True: EdgeBB, False: NewBB);
5107	else
5108	Builder.CreateCondBr(Cond: ExtraCase, True: NewBB, False: EdgeBB);
5109
5110	OldTI->eraseFromParent();
5111
5112	if (DTU)
5113	Updates.push_back(Elt: {DominatorTree::Insert, BB, EdgeBB});
5114
5115	// If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5116	// for the edge we just added.
5117	addPredecessorToBlock(Succ: EdgeBB, NewPred: BB, ExistPred: NewBB);
5118
5119	LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5120	<< "\nEXTRABB = " << *BB);
5121	BB = NewBB;
5122	}
5123
5124	Builder.SetInsertPoint(BI);
5125	// Convert pointer to int before we switch.
5126	if (CompVal->getType()->isPointerTy()) {
5127	CompVal = Builder.CreatePtrToInt(
5128	V: CompVal, DestTy: DL.getIntPtrType(CompVal->getType()), Name: "magicptr");
5129	}
5130
5131	// Create the new switch instruction now.
5132	SwitchInst *New = Builder.CreateSwitch(V: CompVal, Dest: DefaultBB, NumCases: Values.size());
5133
5134	// Add all of the 'cases' to the switch instruction.
5135	for (ConstantInt *Val : Values)
5136	New->addCase(OnVal: Val, Dest: EdgeBB);
5137
5138	// We added edges from PI to the EdgeBB. As such, if there were any
5139	// PHI nodes in EdgeBB, they need entries to be added corresponding to
5140	// the number of edges added.
5141	for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(Val: BBI); ++BBI) {
5142	PHINode *PN = cast<PHINode>(Val&: BBI);
5143	Value *InVal = PN->getIncomingValueForBlock(BB);
5144	for (unsigned i = `0`, e = Values.size() - `1`; i != e; ++i)
5145	PN->addIncoming(V: InVal, BB);
5146	}
5147
5148	// Erase the old branch instruction.
5149	eraseTerminatorAndDCECond(TI: BI);
5150	if (DTU)
5151	DTU->applyUpdates(Updates);
5152
5153	LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << `'\n'`);
5154	return true;
5155	}
5156
5157	bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5158	if (isa<PHINode>(Val: RI->getValue()))
5159	return simplifyCommonResume(RI);
5160	else if (isa<LandingPadInst>(Val: RI->getParent()->getFirstNonPHIIt()) &&
5161	RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5162	// The resume must unwind the exception that caused control to branch here.
5163	return simplifySingleResume(RI);
5164
5165	return false;
5166	}
5167
5168	// Check if cleanup block is empty
5169	static bool isCleanupBlockEmpty(iterator_range<BasicBlock::iterator> R) {
5170	for (Instruction &I : R) {
5171	auto *II = dyn_cast<IntrinsicInst>(Val: &I);
5172	if (!II)
5173	return false;
5174
5175	Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5176	switch (IntrinsicID) {
5177	case Intrinsic::dbg_declare:
5178	case Intrinsic::dbg_value:
5179	case Intrinsic::dbg_label:
5180	case Intrinsic::lifetime_end:
5181	break;
5182	default:
5183	return false;
5184	}
5185	}
5186	return true;
5187	}
5188
5189	// Simplify resume that is shared by several landing pads (phi of landing pad).
5190	bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5191	BasicBlock *BB = RI->getParent();
5192
5193	// Check that there are no other instructions except for debug and lifetime
5194	// intrinsics between the phi's and resume instruction.
5195	if (!isCleanupBlockEmpty(R: make_range(x: RI->getParent()->getFirstNonPHIIt(),
5196	y: BB->getTerminator()->getIterator())))
5197	return false;
5198
5199	SmallSetVector<BasicBlock *, `4`> TrivialUnwindBlocks;
5200	auto *PhiLPInst = cast<PHINode>(Val: RI->getValue());
5201
5202	// Check incoming blocks to see if any of them are trivial.
5203	for (unsigned Idx = `0`, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5204	Idx++) {
5205	auto *IncomingBB = PhiLPInst->getIncomingBlock(i: Idx);
5206	auto *IncomingValue = PhiLPInst->getIncomingValue(i: Idx);
5207
5208	// If the block has other successors, we can not delete it because
5209	// it has other dependents.
5210	if (IncomingBB->getUniqueSuccessor() != BB)
5211	continue;
5212
5213	auto *LandingPad = dyn_cast<LandingPadInst>(Val: IncomingBB->getFirstNonPHIIt());
5214	// Not the landing pad that caused the control to branch here.
5215	if (IncomingValue != LandingPad)
5216	continue;
5217
5218	if (isCleanupBlockEmpty(
5219	R: make_range(x: LandingPad->getNextNode(), y: IncomingBB->getTerminator())))
5220	TrivialUnwindBlocks.insert(X: IncomingBB);
5221	}
5222
5223	// If no trivial unwind blocks, don't do any simplifications.
5224	if (TrivialUnwindBlocks.empty())
5225	return false;
5226
5227	// Turn all invokes that unwind here into calls.
5228	for (auto *TrivialBB : TrivialUnwindBlocks) {
5229	// Blocks that will be simplified should be removed from the phi node.
5230	// Note there could be multiple edges to the resume block, and we need
5231	// to remove them all.
5232	while (PhiLPInst->getBasicBlockIndex(BB: TrivialBB) != -`1`)
5233	BB->removePredecessor(Pred: TrivialBB, KeepOneInputPHIs: true);
5234
5235	for (BasicBlock *Pred :
5236	llvm::make_early_inc_range(Range: predecessors(BB: TrivialBB))) {
5237	removeUnwindEdge(BB: Pred, DTU);
5238	++NumInvokes;
5239	}
5240
5241	// In each SimplifyCFG run, only the current processed block can be erased.
5242	// Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5243	// of erasing TrivialBB, we only remove the branch to the common resume
5244	// block so that we can later erase the resume block since it has no
5245	// predecessors.
5246	TrivialBB->getTerminator()->eraseFromParent();
5247	new UnreachableInst (RI->getContext(), TrivialBB);
5248	if (DTU)
5249	DTU->applyUpdates(Updates: {{DominatorTree::Delete, TrivialBB, BB}});
5250	}
5251
5252	// Delete the resume block if all its predecessors have been removed.
5253	if (pred_empty(BB))
5254	DeleteDeadBlock(BB, DTU);
5255
5256	return !TrivialUnwindBlocks.empty();
5257	}
5258
5259	// Simplify resume that is only used by a single (non-phi) landing pad.
5260	bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5261	BasicBlock *BB = RI->getParent();
5262	auto *LPInst = cast<LandingPadInst>(Val: BB->getFirstNonPHIIt());
5263	assert(RI->getValue() == LPInst &&
5264	"Resume must unwind the exception that caused control to here");
5265
5266	// Check that there are no other instructions except for debug intrinsics.
5267	if (!isCleanupBlockEmpty(
5268	R: make_range<Instruction *>(x: LPInst->getNextNode(), y: RI)))
5269	return false;
5270
5271	// Turn all invokes that unwind here into calls and delete the basic block.
5272	for (BasicBlock *Pred : llvm::make_early_inc_range(Range: predecessors(BB))) {
5273	removeUnwindEdge(BB: Pred, DTU);
5274	++NumInvokes;
5275	}
5276
5277	// The landingpad is now unreachable. Zap it.
5278	DeleteDeadBlock(BB, DTU);
5279	return true;
5280	}
5281
5282	static bool removeEmptyCleanup(CleanupReturnInst RI, DomTreeUpdater DTU) {
5283	// If this is a trivial cleanup pad that executes no instructions, it can be
5284	// eliminated. If the cleanup pad continues to the caller, any predecessor
5285	// that is an EH pad will be updated to continue to the caller and any
5286	// predecessor that terminates with an invoke instruction will have its invoke
5287	// instruction converted to a call instruction. If the cleanup pad being
5288	// simplified does not continue to the caller, each predecessor will be
5289	// updated to continue to the unwind destination of the cleanup pad being
5290	// simplified.
5291	BasicBlock *BB = RI->getParent();
5292	CleanupPadInst *CPInst = RI->getCleanupPad();
5293	if (CPInst->getParent() != BB)
5294	// This isn't an empty cleanup.
5295	return false;
5296
5297	// We cannot kill the pad if it has multiple uses. This typically arises
5298	// from unreachable basic blocks.
5299	if (!CPInst->hasOneUse())
5300	return false;
5301
5302	// Check that there are no other instructions except for benign intrinsics.
5303	if (!isCleanupBlockEmpty(
5304	R: make_range<Instruction *>(x: CPInst->getNextNode(), y: RI)))
5305	return false;
5306
5307	// If the cleanup return we are simplifying unwinds to the caller, this will
5308	// set UnwindDest to nullptr.
5309	BasicBlock *UnwindDest = RI->getUnwindDest();
5310
5311	// We're about to remove BB from the control flow. Before we do, sink any
5312	// PHINodes into the unwind destination. Doing this before changing the
5313	// control flow avoids some potentially slow checks, since we can currently
5314	// be certain that UnwindDest and BB have no common predecessors (since they
5315	// are both EH pads).
5316	if (UnwindDest) {
5317	// First, go through the PHI nodes in UnwindDest and update any nodes that
5318	// reference the block we are removing
5319	for (PHINode &DestPN : UnwindDest->phis()) {
5320	int Idx = DestPN.getBasicBlockIndex(BB);
5321	// Since BB unwinds to UnwindDest, it has to be in the PHI node.
5322	assert(Idx != -`1`);
5323	// This PHI node has an incoming value that corresponds to a control
5324	// path through the cleanup pad we are removing. If the incoming
5325	// value is in the cleanup pad, it must be a PHINode (because we
5326	// verified above that the block is otherwise empty). Otherwise, the
5327	// value is either a constant or a value that dominates the cleanup
5328	// pad being removed.
5329	//
5330	// Because BB and UnwindDest are both EH pads, all of their
5331	// predecessors must unwind to these blocks, and since no instruction
5332	// can have multiple unwind destinations, there will be no overlap in
5333	// incoming blocks between SrcPN and DestPN.
5334	Value *SrcVal = DestPN.getIncomingValue(i: Idx);
5335	PHINode *SrcPN = dyn_cast<PHINode>(Val: SrcVal);
5336
5337	bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5338	for (auto *Pred : predecessors(BB)) {
5339	Value *Incoming =
5340	NeedPHITranslation ? SrcPN->getIncomingValueForBlock(BB: Pred) : SrcVal;
5341	DestPN.addIncoming(V: Incoming, BB: Pred);
5342	}
5343	}
5344
5345	// Sink any remaining PHI nodes directly into UnwindDest.
5346	BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5347	for (PHINode &PN : make_early_inc_range(Range: BB->phis())) {
5348	if (PN.use_empty() \|\| !PN.isUsedOutsideOfBlock(BB))
5349	// If the PHI node has no uses or all of its uses are in this basic
5350	// block (meaning they are debug or lifetime intrinsics), just leave
5351	// it. It will be erased when we erase BB below.
5352	continue;
5353
5354	// Otherwise, sink this PHI node into UnwindDest.
5355	// Any predecessors to UnwindDest which are not already represented
5356	// must be back edges which inherit the value from the path through
5357	// BB. In this case, the PHI value must reference itself.
5358	for (auto *pred : predecessors(BB: UnwindDest))
5359	if (pred != BB)
5360	PN.addIncoming(V: &PN, BB: pred);
5361	PN.moveBefore(InsertPos: InsertPt);
5362	// Also, add a dummy incoming value for the original BB itself,
5363	// so that the PHI is well-formed until we drop said predecessor.
5364	PN.addIncoming(V: PoisonValue::get(T: PN.getType()), BB);
5365	}
5366	}
5367
5368	std::vector<DominatorTree::UpdateType> Updates;
5369
5370	// We use make_early_inc_range here because we will remove all predecessors.
5371	for (BasicBlock *PredBB : llvm::make_early_inc_range(Range: predecessors(BB))) {
5372	if (UnwindDest == nullptr) {
5373	if (DTU) {
5374	DTU->applyUpdates(Updates);
5375	Updates.clear();
5376	}
5377	removeUnwindEdge(BB: PredBB, DTU);
5378	++NumInvokes;
5379	} else {
5380	BB->removePredecessor(Pred: PredBB);
5381	Instruction *TI = PredBB->getTerminator();
5382	TI->replaceUsesOfWith(From: BB, To: UnwindDest);
5383	if (DTU) {
5384	Updates.push_back(x: {DominatorTree::Insert, PredBB, UnwindDest});
5385	Updates.push_back(x: {DominatorTree::Delete, PredBB, BB});
5386	}
5387	}
5388	}
5389
5390	if (DTU)
5391	DTU->applyUpdates(Updates);
5392
5393	DeleteDeadBlock(BB, DTU);
5394
5395	return true;
5396	}
5397
5398	// Try to merge two cleanuppads together.
5399	static bool mergeCleanupPad(CleanupReturnInst *RI) {
5400	// Skip any cleanuprets which unwind to caller, there is nothing to merge
5401	// with.
5402	BasicBlock *UnwindDest = RI->getUnwindDest();
5403	if (!UnwindDest)
5404	return false;
5405
5406	// This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5407	// be safe to merge without code duplication.
5408	if (UnwindDest->getSinglePredecessor() != RI->getParent())
5409	return false;
5410
5411	// Verify that our cleanuppad's unwind destination is another cleanuppad.
5412	auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(Val: &UnwindDest->front());
5413	if (!SuccessorCleanupPad)
5414	return false;
5415
5416	CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5417	// Replace any uses of the successor cleanupad with the predecessor pad
5418	// The only cleanuppad uses should be this cleanupret, it's cleanupret and
5419	// funclet bundle operands.
5420	SuccessorCleanupPad->replaceAllUsesWith(V: PredecessorCleanupPad);
5421	// Remove the old cleanuppad.
5422	SuccessorCleanupPad->eraseFromParent();
5423	// Now, we simply replace the cleanupret with a branch to the unwind
5424	// destination.
5425	BranchInst::Create(IfTrue: UnwindDest, InsertBefore: RI->getParent());
5426	RI->eraseFromParent();
5427
5428	return true;
5429	}
5430
5431	bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5432	// It is possible to transiantly have an undef cleanuppad operand because we
5433	// have deleted some, but not all, dead blocks.
5434	// Eventually, this block will be deleted.
5435	if (isa<UndefValue>(Val: RI->getOperand(i_nocapture: `0`)))
5436	return false;
5437
5438	if (mergeCleanupPad(RI))
5439	return true;
5440
5441	if (removeEmptyCleanup(RI, DTU))
5442	return true;
5443
5444	return false;
5445	}
5446
5447	// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5448	bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5449	BasicBlock *BB = UI->getParent();
5450
5451	bool Changed = false;
5452
5453	// Ensure that any debug-info records that used to occur after the Unreachable
5454	// are moved to in front of it -- otherwise they'll "dangle" at the end of
5455	// the block.
5456	BB->flushTerminatorDbgRecords();
5457
5458	// Debug-info records on the unreachable inst itself should be deleted, as
5459	// below we delete everything past the final executable instruction.
5460	UI->dropDbgRecords();
5461
5462	// If there are any instructions immediately before the unreachable that can
5463	// be removed, do so.
5464	while (UI->getIterator() != BB->begin()) {
5465	BasicBlock::iterator BBI = UI->getIterator();
5466	--BBI;
5467
5468	if (!isGuaranteedToTransferExecutionToSuccessor(I: &*BBI))
5469	break; // Can not drop any more instructions. We're done here.
5470	// Otherwise, this instruction can be freely erased,
5471	// even if it is not side-effect free.
5472
5473	// Note that deleting EH's here is in fact okay, although it involves a bit
5474	// of subtle reasoning. If this inst is an EH, all the predecessors of this
5475	// block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5476	// and we can therefore guarantee this block will be erased.
5477
5478	// If we're deleting this, we're deleting any subsequent debug info, so
5479	// delete DbgRecords.
5480	BBI ->dropDbgRecords();
5481
5482	// Delete this instruction (any uses are guaranteed to be dead)
5483	BBI ->replaceAllUsesWith(V: PoisonValue::get(T: BBI ->getType()));
5484	BBI ->eraseFromParent();
5485	Changed = true;
5486	}
5487
5488	// If the unreachable instruction is the first in the block, take a gander
5489	// at all of the predecessors of this instruction, and simplify them.
5490	if (&BB->front() != UI)
5491	return Changed;
5492
5493	std::vector<DominatorTree::UpdateType> Updates;
5494
5495	SmallSetVector<BasicBlock *, `8`> Preds(pred_begin(BB), pred_end(BB));
5496	for (BasicBlock *Predecessor : Preds) {
5497	Instruction *TI = Predecessor->getTerminator();
5498	IRBuilder<> Builder(TI);
5499	if (auto *BI = dyn_cast<BranchInst>(Val: TI)) {
5500	// We could either have a proper unconditional branch,
5501	// or a degenerate conditional branch with matching destinations.
5502	if (all_of(Range: BI->successors(),
5503	P: [BB](auto Successor) { return* Successor == BB; })) {
5504	new UnreachableInst (TI->getContext(), TI->getIterator());
5505	TI->eraseFromParent();
5506	Changed = true;
5507	} else {
5508	assert(BI->isConditional() && "Can't get here with an uncond branch.");
5509	Value* Cond = BI->getCondition();
5510	assert(BI->getSuccessor(`0`) != BI->getSuccessor(`1`) &&
5511	"The destinations are guaranteed to be different here.");
5512	CallInst *Assumption;
5513	if (BI->getSuccessor(i: `0`) == BB) {
5514	Assumption = Builder.CreateAssumption(Cond: Builder.CreateNot(V: Cond));
5515	Builder.CreateBr(Dest: BI->getSuccessor(i: `1`));
5516	} else {
5517	assert(BI->getSuccessor(`1`) == BB && "Incorrect CFG");
5518	Assumption = Builder.CreateAssumption(Cond);
5519	Builder.CreateBr(Dest: BI->getSuccessor(i: `0`));
5520	}
5521	if (Options.AC)
5522	Options.AC->registerAssumption(CI: cast<AssumeInst>(Val: Assumption));
5523
5524	eraseTerminatorAndDCECond(TI: BI);
5525	Changed = true;
5526	}
5527	if (DTU)
5528	Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5529	} else if (auto *SI = dyn_cast<SwitchInst>(Val: TI)) {
5530	SwitchInstProfUpdateWrapper SU(*SI);
5531	for (auto i = SU ->case_begin(), e = SU ->case_end(); i != e;) {
5532	if (i ->getCaseSuccessor() != BB) {
5533	++i;
5534	continue;
5535	}
5536	BB->removePredecessor(Pred: SU ->getParent());
5537	i = SU.removeCase(I: i);
5538	e = SU ->case_end();
5539	Changed = true;
5540	}
5541	// Note that the default destination can't be removed!
5542	if (DTU && SI->getDefaultDest() != BB)
5543	Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5544	} else if (auto *II = dyn_cast<InvokeInst>(Val: TI)) {
5545	if (II->getUnwindDest() == BB) {
5546	if (DTU) {
5547	DTU->applyUpdates(Updates);
5548	Updates.clear();
5549	}
5550	auto *CI = cast<CallInst>(Val: removeUnwindEdge(BB: TI->getParent(), DTU));
5551	if (!CI->doesNotThrow())
5552	CI->setDoesNotThrow();
5553	Changed = true;
5554	}
5555	} else if (auto *CSI = dyn_cast<CatchSwitchInst>(Val: TI)) {
5556	if (CSI->getUnwindDest() == BB) {
5557	if (DTU) {
5558	DTU->applyUpdates(Updates);
5559	Updates.clear();
5560	}
5561	removeUnwindEdge(BB: TI->getParent(), DTU);
5562	Changed = true;
5563	continue;
5564	}
5565
5566	for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5567	E = CSI->handler_end();
5568	I != E; ++I) {
5569	if (*I == BB) {
5570	CSI->removeHandler(HI: I);
5571	--I;
5572	--E;
5573	Changed = true;
5574	}
5575	}
5576	if (DTU)
5577	Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5578	if (CSI->getNumHandlers() == `0`) {
5579	if (CSI->hasUnwindDest()) {
5580	// Redirect all predecessors of the block containing CatchSwitchInst
5581	// to instead branch to the CatchSwitchInst's unwind destination.
5582	if (DTU) {
5583	for (auto *PredecessorOfPredecessor : predecessors(BB: Predecessor)) {
5584	Updates.push_back(x: {DominatorTree::Insert,
5585	PredecessorOfPredecessor,
5586	CSI->getUnwindDest()});
5587	Updates.push_back(x: {DominatorTree::Delete,
5588	PredecessorOfPredecessor, Predecessor});
5589	}
5590	}
5591	Predecessor->replaceAllUsesWith(V: CSI->getUnwindDest());
5592	} else {
5593	// Rewrite all preds to unwind to caller (or from invoke to call).
5594	if (DTU) {
5595	DTU->applyUpdates(Updates);
5596	Updates.clear();
5597	}
5598	SmallVector<BasicBlock *, `8`> EHPreds(predecessors(BB: Predecessor));
5599	for (BasicBlock *EHPred : EHPreds)
5600	removeUnwindEdge(BB: EHPred, DTU);
5601	}
5602	// The catchswitch is no longer reachable.
5603	new UnreachableInst (CSI->getContext(), CSI->getIterator());
5604	CSI->eraseFromParent();
5605	Changed = true;
5606	}
5607	} else if (auto *CRI = dyn_cast<CleanupReturnInst>(Val: TI)) {
5608	(void)CRI;
5609	assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5610	"Expected to always have an unwind to BB.");
5611	if (DTU)
5612	Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5613	new UnreachableInst (TI->getContext(), TI->getIterator());
5614	TI->eraseFromParent();
5615	Changed = true;
5616	}
5617	}
5618
5619	if (DTU)
5620	DTU->applyUpdates(Updates);
5621
5622	// If this block is now dead, remove it.
5623	if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5624	DeleteDeadBlock(BB, DTU);
5625	return true;
5626	}
5627
5628	return Changed;
5629	}
5630
5631	static bool casesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
5632	assert(Cases.size() >= `1`);
5633
5634	array_pod_sort(Start: Cases.begin(), End: Cases.end(), Compare: constantIntSortPredicate);
5635	for (size_t I = `1`, E = Cases.size(); I != E; ++I) {
5636	if (Cases [I - `1`]->getValue() != Cases [I]->getValue() + `1`)
5637	return false;
5638	}
5639	return true;
5640	}
5641
5642	static void createUnreachableSwitchDefault(SwitchInst *Switch,
5643	DomTreeUpdater *DTU,
5644	bool RemoveOrigDefaultBlock = true) {
5645	LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5646	auto *BB = Switch->getParent();
5647	auto *OrigDefaultBlock = Switch->getDefaultDest();
5648	if (RemoveOrigDefaultBlock)
5649	OrigDefaultBlock->removePredecessor(Pred: BB);
5650	BasicBlock *NewDefaultBlock = BasicBlock::Create(
5651	Context&: BB->getContext(), Name: BB->getName() + ".unreachabledefault", Parent: BB->getParent(),
5652	InsertBefore: OrigDefaultBlock);
5653	auto UI = new* UnreachableInst (Switch->getContext(), NewDefaultBlock);
5654	UI->setDebugLoc(DebugLoc::getTemporary());
5655	Switch->setDefaultDest(&*NewDefaultBlock);
5656	if (DTU) {
5657	SmallVector<DominatorTree::UpdateType, `2`> Updates;
5658	Updates.push_back(Elt: {DominatorTree::Insert, BB, &*NewDefaultBlock});
5659	if (RemoveOrigDefaultBlock &&
5660	!is_contained(Range: successors(BB), Element: OrigDefaultBlock))
5661	Updates.push_back(Elt: {DominatorTree::Delete, BB, &*OrigDefaultBlock});
5662	DTU->applyUpdates(Updates);
5663	}
5664	}
5665
5666	/// Turn a switch into an integer range comparison and branch.
5667	/// Switches with more than 2 destinations are ignored.
5668	/// Switches with 1 destination are also ignored.
5669	bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5670	IRBuilder<> &Builder) {
5671	assert(SI->getNumCases() > `1` && "Degenerate switch?");
5672
5673	bool HasDefault = !SI->defaultDestUnreachable();
5674
5675	auto *BB = SI->getParent();
5676
5677	// Partition the cases into two sets with different destinations.
5678	BasicBlock DestA = HasDefault ? SI->getDefaultDest() : nullptr*;
5679	BasicBlock DestB = nullptr*;
5680	SmallVector<ConstantInt *, `16`> CasesA;
5681	SmallVector<ConstantInt *, `16`> CasesB;
5682
5683	for (auto Case : SI->cases()) {
5684	BasicBlock *Dest = Case.getCaseSuccessor();
5685	if (!DestA)
5686	DestA = Dest;
5687	if (Dest == DestA) {
5688	CasesA.push_back(Elt: Case.getCaseValue());
5689	continue;
5690	}
5691	if (!DestB)
5692	DestB = Dest;
5693	if (Dest == DestB) {
5694	CasesB.push_back(Elt: Case.getCaseValue());
5695	continue;
5696	}
5697	return false; // More than two destinations.
5698	}
5699	if (!DestB)
5700	return false; // All destinations are the same and the default is unreachable
5701
5702	assert(DestA && DestB &&
5703	"Single-destination switch should have been folded.");
5704	assert(DestA != DestB);
5705	assert(DestB != SI->getDefaultDest());
5706	assert(!CasesB.empty() && "There must be non-default cases.");
5707	assert(!CasesA.empty() \|\| HasDefault);
5708
5709	// Figure out if one of the sets of cases form a contiguous range.
5710	SmallVectorImpl<ConstantInt > ContiguousCases = nullptr;
5711	BasicBlock ContiguousDest = nullptr*;
5712	BasicBlock OtherDest = nullptr*;
5713	if (!CasesA.empty() && casesAreContiguous(Cases&: CasesA)) {
5714	ContiguousCases = &CasesA;
5715	ContiguousDest = DestA;
5716	OtherDest = DestB;
5717	} else if (casesAreContiguous(Cases&: CasesB)) {
5718	ContiguousCases = &CasesB;
5719	ContiguousDest = DestB;
5720	OtherDest = DestA;
5721	} else
5722	return false;
5723
5724	// Start building the compare and branch.
5725
5726	Constant *Offset = ConstantExpr::getNeg(C: ContiguousCases->back());
5727	Constant *NumCases =
5728	ConstantInt::get(Ty: Offset->getType(), V: ContiguousCases->size());
5729
5730	Value *Sub = SI->getCondition();
5731	if (!Offset->isNullValue())
5732	Sub = Builder.CreateAdd(LHS: Sub, RHS: Offset, Name: Sub->getName() + ".off");
5733
5734	Value *Cmp;
5735	// If NumCases overflowed, then all possible values jump to the successor.
5736	if (NumCases->isNullValue() && !ContiguousCases->empty())
5737	Cmp = ConstantInt::getTrue(Context&: SI->getContext());
5738	else
5739	Cmp = Builder.CreateICmpULT(LHS: Sub, RHS: NumCases, Name: "switch");
5740	BranchInst *NewBI = Builder.CreateCondBr(Cond: Cmp, True: ContiguousDest, False: OtherDest);
5741
5742	// Update weight for the newly-created conditional branch.
5743	if (hasBranchWeightMD(I: *SI)) {
5744	SmallVector<uint64_t, `8`> Weights;
5745	getBranchWeights(TI: SI, Weights);
5746	if (Weights.size() == `1` + SI->getNumCases()) {
5747	uint64_t TrueWeight = `0`;
5748	uint64_t FalseWeight = `0`;
5749	for (size_t I = `0`, E = Weights.size(); I != E; ++I) {
5750	if (SI->getSuccessor(idx: I) == ContiguousDest)
5751	TrueWeight += Weights [I];
5752	else
5753	FalseWeight += Weights [I];
5754	}
5755	while (TrueWeight > UINT32_MAX \|\| FalseWeight > UINT32_MAX) {
5756	TrueWeight /= `2`;
5757	FalseWeight /= `2`;
5758	}
5759	setBranchWeights(I: NewBI, TrueWeight, FalseWeight, /IsExpected=/false);
5760	}
5761	}
5762
5763	// Prune obsolete incoming values off the successors' PHI nodes.
5764	for (auto BBI = ContiguousDest->begin(); isa<PHINode>(Val: BBI); ++BBI) {
5765	unsigned PreviousEdges = ContiguousCases->size();
5766	if (ContiguousDest == SI->getDefaultDest())
5767	++PreviousEdges;
5768	for (unsigned I = `0`, E = PreviousEdges - `1`; I != E; ++I)
5769	cast<PHINode>(Val&: BBI)->removeIncomingValue(BB: SI->getParent());
5770	}
5771	for (auto BBI = OtherDest->begin(); isa<PHINode>(Val: BBI); ++BBI) {
5772	unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
5773	if (OtherDest == SI->getDefaultDest())
5774	++PreviousEdges;
5775	for (unsigned I = `0`, E = PreviousEdges - `1`; I != E; ++I)
5776	cast<PHINode>(Val&: BBI)->removeIncomingValue(BB: SI->getParent());
5777	}
5778
5779	// Clean up the default block - it may have phis or other instructions before
5780	// the unreachable terminator.
5781	if (!HasDefault)
5782	createUnreachableSwitchDefault(Switch: SI, DTU);
5783
5784	auto *UnreachableDefault = SI->getDefaultDest();
5785
5786	// Drop the switch.
5787	SI->eraseFromParent();
5788
5789	if (!HasDefault && DTU)
5790	DTU->applyUpdates(Updates: {{DominatorTree::Delete, BB, UnreachableDefault}});
5791
5792	return true;
5793	}
5794
5795	/// Compute masked bits for the condition of a switch
5796	/// and use it to remove dead cases.
5797	static bool eliminateDeadSwitchCases(SwitchInst SI, DomTreeUpdater DTU,
5798	AssumptionCache *AC,
5799	const DataLayout &DL) {
5800	Value *Cond = SI->getCondition();
5801	KnownBits Known = computeKnownBits(V: Cond, DL, AC, CxtI: SI);
5802
5803	// We can also eliminate cases by determining that their values are outside of
5804	// the limited range of the condition based on how many significant (non-sign)
5805	// bits are in the condition value.
5806	unsigned MaxSignificantBitsInCond =
5807	ComputeMaxSignificantBits(Op: Cond, DL, AC, CxtI: SI);
5808
5809	// Gather dead cases.
5810	SmallVector<ConstantInt *, `8`> DeadCases;
5811	SmallDenseMap<BasicBlock , int*, `8`> NumPerSuccessorCases;
5812	SmallVector<BasicBlock *, `8`> UniqueSuccessors;
5813	for (const auto &Case : SI->cases()) {
5814	auto *Successor = Case.getCaseSuccessor();
5815	if (DTU) {
5816	auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Key: Successor);
5817	if (Inserted)
5818	UniqueSuccessors.push_back(Elt: Successor);
5819	++It ->second;
5820	}
5821	const APInt &CaseVal = Case.getCaseValue()->getValue();
5822	if (Known.Zero.intersects(RHS: CaseVal) \|\| !Known.One.isSubsetOf(RHS: CaseVal) \|\|
5823	(CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
5824	DeadCases.push_back(Elt: Case.getCaseValue());
5825	if (DTU)
5826	--NumPerSuccessorCases [Successor];
5827	LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
5828	<< " is dead.\n");
5829	}
5830	}
5831
5832	// If we can prove that the cases must cover all possible values, the
5833	// default destination becomes dead and we can remove it. If we know some
5834	// of the bits in the value, we can use that to more precisely compute the
5835	// number of possible unique case values.
5836	bool HasDefault = !SI->defaultDestUnreachable();
5837	const unsigned NumUnknownBits =
5838	Known.getBitWidth() - (Known.Zero \| Known.One).popcount();
5839	assert(NumUnknownBits <= Known.getBitWidth());
5840	if (HasDefault && DeadCases.empty() &&
5841	NumUnknownBits < `64` / avoid overflow /) {
5842	uint64_t AllNumCases = `1ULL` << NumUnknownBits;
5843	if (SI->getNumCases() == AllNumCases) {
5844	createUnreachableSwitchDefault(Switch: SI, DTU);
5845	return true;
5846	}
5847	// When only one case value is missing, replace default with that case.
5848	// Eliminating the default branch will provide more opportunities for
5849	// optimization, such as lookup tables.
5850	if (SI->getNumCases() == AllNumCases - `1`) {
5851	assert(NumUnknownBits > `1` && "Should be canonicalized to a branch");
5852	IntegerType *CondTy = cast<IntegerType>(Val: Cond->getType());
5853	if (CondTy->getIntegerBitWidth() > `64` \|\|
5854	!DL.fitsInLegalInteger(Width: CondTy->getIntegerBitWidth()))
5855	return false;
5856
5857	uint64_t MissingCaseVal = `0`;
5858	for (const auto &Case : SI->cases())
5859	MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
5860	auto *MissingCase =
5861	cast<ConstantInt>(Val: ConstantInt::get(Ty: Cond->getType(), V: MissingCaseVal));
5862	SwitchInstProfUpdateWrapper SIW(*SI);
5863	SIW.addCase(OnVal: MissingCase, Dest: SI->getDefaultDest(), W: SIW.getSuccessorWeight(idx: `0`));
5864	createUnreachableSwitchDefault(Switch: SI, DTU, /RemoveOrigDefaultBlock/ false);
5865	SIW.setSuccessorWeight(idx: `0`, W: `0`);
5866	return true;
5867	}
5868	}
5869
5870	if (DeadCases.empty())
5871	return false;
5872
5873	SwitchInstProfUpdateWrapper SIW(*SI);
5874	for (ConstantInt *DeadCase : DeadCases) {
5875	SwitchInst::CaseIt CaseI = SI->findCaseValue(C: DeadCase);
5876	assert(CaseI != SI->case_default() &&
5877	"Case was not found. Probably mistake in DeadCases forming.");
5878	// Prune unused values from PHI nodes.
5879	CaseI ->getCaseSuccessor()->removePredecessor(Pred: SI->getParent());
5880	SIW.removeCase(I: CaseI);
5881	}
5882
5883	if (DTU) {
5884	std::vector<DominatorTree::UpdateType> Updates;
5885	for (auto *Successor : UniqueSuccessors)
5886	if (NumPerSuccessorCases [Successor] == `0`)
5887	Updates.push_back(x: {DominatorTree::Delete, SI->getParent(), Successor});
5888	DTU->applyUpdates(Updates);
5889	}
5890
5891	return true;
5892	}
5893
5894	/// If BB would be eligible for simplification by
5895	/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
5896	/// by an unconditional branch), look at the phi node for BB in the successor
5897	/// block and see if the incoming value is equal to CaseValue. If so, return
5898	/// the phi node, and set PhiIndex to BB's index in the phi node.
5899	static PHINode findPHIForConditionForwarding(ConstantInt CaseValue,
5900	BasicBlock BB, int* *PhiIndex) {
5901	if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
5902	return nullptr; // BB must be empty to be a candidate for simplification.
5903	if (!BB->getSinglePredecessor())
5904	return nullptr; // BB must be dominated by the switch.
5905
5906	BranchInst *Branch = dyn_cast<BranchInst>(Val: BB->getTerminator());
5907	if (!Branch \|\| !Branch->isUnconditional())
5908	return nullptr; // Terminator must be unconditional branch.
5909
5910	BasicBlock *Succ = Branch->getSuccessor(i: `0`);
5911
5912	for (PHINode &PHI : Succ->phis()) {
5913	int Idx = PHI.getBasicBlockIndex(BB);
5914	assert(Idx >= `0` && "PHI has no entry for predecessor?");
5915
5916	Value *InValue = PHI.getIncomingValue(i: Idx);
5917	if (InValue != CaseValue)
5918	continue;
5919
5920	*PhiIndex = Idx;
5921	return &PHI;
5922	}
5923
5924	return nullptr;
5925	}
5926
5927	/// Try to forward the condition of a switch instruction to a phi node
5928	/// dominated by the switch, if that would mean that some of the destination
5929	/// blocks of the switch can be folded away. Return true if a change is made.
5930	static bool forwardSwitchConditionToPHI(SwitchInst *SI) {
5931	using ForwardingNodesMap = DenseMap<PHINode , SmallVector<int*, `4`>>;
5932
5933	ForwardingNodesMap ForwardingNodes;
5934	BasicBlock *SwitchBlock = SI->getParent();
5935	bool Changed = false;
5936	for (const auto &Case : SI->cases()) {
5937	ConstantInt *CaseValue = Case.getCaseValue();
5938	BasicBlock *CaseDest = Case.getCaseSuccessor();
5939
5940	// Replace phi operands in successor blocks that are using the constant case
5941	// value rather than the switch condition variable:
5942	// switchbb:
5943	// switch i32 %x, label %default [
5944	// i32 17, label %succ
5945	// ...
5946	// succ:
5947	// %r = phi i32 ... [ 17, %switchbb ] ...
5948	// -->
5949	// %r = phi i32 ... [ %x, %switchbb ] ...
5950
5951	for (PHINode &Phi : CaseDest->phis()) {
5952	// This only works if there is exactly 1 incoming edge from the switch to
5953	// a phi. If there is >1, that means multiple cases of the switch map to 1
5954	// value in the phi, and that phi value is not the switch condition. Thus,
5955	// this transform would not make sense (the phi would be invalid because
5956	// a phi can't have different incoming values from the same block).
5957	int SwitchBBIdx = Phi.getBasicBlockIndex(BB: SwitchBlock);
5958	if (Phi.getIncomingValue(i: SwitchBBIdx) == CaseValue &&
5959	count(Range: Phi.blocks(), Element: SwitchBlock) == `1`) {
5960	Phi.setIncomingValue(i: SwitchBBIdx, V: SI->getCondition());
5961	Changed = true;
5962	}
5963	}
5964
5965	// Collect phi nodes that are indirectly using this switch's case constants.
5966	int PhiIdx;
5967	if (auto *Phi = findPHIForConditionForwarding(CaseValue, BB: CaseDest, PhiIndex: &PhiIdx))
5968	ForwardingNodes [Phi].push_back(Elt: PhiIdx);
5969	}
5970
5971	for (auto &ForwardingNode : ForwardingNodes) {
5972	PHINode *Phi = ForwardingNode.first;
5973	SmallVectorImpl<int> &Indexes = ForwardingNode.second;
5974	// Check if it helps to fold PHI.
5975	if (Indexes.size() < `2` && !llvm::is_contained(Range: Phi->incoming_values(), Element: SI->getCondition()))
5976	continue;
5977
5978	for (int Index : Indexes)
5979	Phi->setIncomingValue(i: Index, V: SI->getCondition());
5980	Changed = true;
5981	}
5982
5983	return Changed;
5984	}
5985
5986	/// Return true if the backend will be able to handle
5987	/// initializing an array of constants like C.
5988	static bool validLookupTableConstant(Constant C, const* TargetTransformInfo &TTI) {
5989	if (C->isThreadDependent())
5990	return false;
5991	if (C->isDLLImportDependent())
5992	return false;
5993
5994	if (!isa<ConstantFP>(Val: C) && !isa<ConstantInt>(Val: C) &&
5995	!isa<ConstantPointerNull>(Val: C) && !isa<GlobalValue>(Val: C) &&
5996	!isa<UndefValue>(Val: C) && !isa<ConstantExpr>(Val: C))
5997	return false;
5998
5999	if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: C)) {
6000	// Pointer casts and in-bounds GEPs will not prohibit the backend from
6001	// materializing the array of constants.
6002	Constant *StrippedC = cast<Constant>(Val: CE->stripInBoundsConstantOffsets());
6003	if (StrippedC == C \|\| !validLookupTableConstant(C: StrippedC, TTI))
6004	return false;
6005	}
6006
6007	if (!TTI.shouldBuildLookupTablesForConstant(C))
6008	return false;
6009
6010	return true;
6011	}
6012
6013	/// If V is a Constant, return it. Otherwise, try to look up
6014	/// its constant value in ConstantPool, returning 0 if it's not there.
6015	static Constant *
6016	lookupConstant(Value *V,
6017	const SmallDenseMap<Value , Constant > &ConstantPool) {
6018	if (Constant *C = dyn_cast<Constant>(Val: V))
6019	return C;
6020	return ConstantPool.lookup(Val: V);
6021	}
6022
6023	/// Try to fold instruction I into a constant. This works for
6024	/// simple instructions such as binary operations where both operands are
6025	/// constant or can be replaced by constants from the ConstantPool. Returns the
6026	/// resulting constant on success, 0 otherwise.
6027	static Constant *
6028	constantFold(Instruction I, const* DataLayout &DL,
6029	const SmallDenseMap<Value , Constant > &ConstantPool) {
6030	if (SelectInst *Select = dyn_cast<SelectInst>(Val: I)) {
6031	Constant *A = lookupConstant(V: Select->getCondition(), ConstantPool);
6032	if (!A)
6033	return nullptr;
6034	if (A->isAllOnesValue())
6035	return lookupConstant(V: Select->getTrueValue(), ConstantPool);
6036	if (A->isNullValue())
6037	return lookupConstant(V: Select->getFalseValue(), ConstantPool);
6038	return nullptr;
6039	}
6040
6041	SmallVector<Constant *, `4`> COps;
6042	for (unsigned N = `0`, E = I->getNumOperands(); N != E; ++N) {
6043	if (Constant *A = lookupConstant(V: I->getOperand(i: N), ConstantPool))
6044	COps.push_back(Elt: A);
6045	else
6046	return nullptr;
6047	}
6048
6049	return ConstantFoldInstOperands(I, Ops: COps, DL);
6050	}
6051
6052	/// Try to determine the resulting constant values in phi nodes
6053	/// at the common destination basic block, CommonDest, for one of the case*
6054	/// destionations CaseDest corresponding to value CaseVal (0 for the default
6055	/// case), of a switch instruction SI.
6056	static bool
6057	getCaseResults(SwitchInst SI, ConstantInt CaseVal, BasicBlock *CaseDest,
6058	BasicBlock **CommonDest,
6059	SmallVectorImpl<std::pair<PHINode , Constant >> &Res,
6060	const DataLayout &DL, const TargetTransformInfo &TTI) {
6061	// The block from which we enter the common destination.
6062	BasicBlock *Pred = SI->getParent();
6063
6064	// If CaseDest is empty except for some side-effect free instructions through
6065	// which we can constant-propagate the CaseVal, continue to its successor.
6066	SmallDenseMap<Value , Constant > ConstantPool;
6067	ConstantPool.insert(KV: std::make_pair(x: SI->getCondition(), y&: CaseVal));
6068	for (Instruction &I : CaseDest->instructionsWithoutDebug(SkipPseudoOp: false)) {
6069	if (I.isTerminator()) {
6070	// If the terminator is a simple branch, continue to the next block.
6071	if (I.getNumSuccessors() != `1` \|\| I.isSpecialTerminator())
6072	return false;
6073	Pred = CaseDest;
6074	CaseDest = I.getSuccessor(Idx: `0`);
6075	} else if (Constant *C = constantFold(I: &I, DL, ConstantPool)) {
6076	// Instruction is side-effect free and constant.
6077
6078	// If the instruction has uses outside this block or a phi node slot for
6079	// the block, it is not safe to bypass the instruction since it would then
6080	// no longer dominate all its uses.
6081	for (auto &Use : I.uses()) {
6082	User *User = Use.getUser();
6083	if (Instruction *I = dyn_cast<Instruction>(Val: User))
6084	if (I->getParent() == CaseDest)
6085	continue;
6086	if (PHINode *Phi = dyn_cast<PHINode>(Val: User))
6087	if (Phi->getIncomingBlock(U: Use) == CaseDest)
6088	continue;
6089	return false;
6090	}
6091
6092	ConstantPool.insert(KV: std::make_pair(x: &I, y&: C));
6093	} else {
6094	break;
6095	}
6096	}
6097
6098	// If we did not have a CommonDest before, use the current one.
6099	if (!*CommonDest)
6100	*CommonDest = CaseDest;
6101	// If the destination isn't the common one, abort.
6102	if (CaseDest != *CommonDest)
6103	return false;
6104
6105	// Get the values for this case from phi nodes in the destination block.
6106	for (PHINode &PHI : (*CommonDest)->phis()) {
6107	int Idx = PHI.getBasicBlockIndex(BB: Pred);
6108	if (Idx == -`1`)
6109	continue;
6110
6111	Constant *ConstVal =
6112	lookupConstant(V: PHI.getIncomingValue(i: Idx), ConstantPool);
6113	if (!ConstVal)
6114	return false;
6115
6116	// Be conservative about which kinds of constants we support.
6117	if (!validLookupTableConstant(C: ConstVal, TTI))
6118	return false;
6119
6120	Res.push_back(Elt: std::make_pair(x: &PHI, y&: ConstVal));
6121	}
6122
6123	return Res.size() > `0`;
6124	}
6125
6126	// Helper function used to add CaseVal to the list of cases that generate
6127	// Result. Returns the updated number of cases that generate this result.
6128	static size_t mapCaseToResult(ConstantInt *CaseVal,
6129	SwitchCaseResultVectorTy &UniqueResults,
6130	Constant *Result) {
6131	for (auto &I : UniqueResults) {
6132	if (I.first == Result) {
6133	I.second.push_back(Elt: CaseVal);
6134	return I.second.size();
6135	}
6136	}
6137	UniqueResults.push_back(
6138	Elt: std::make_pair(x&: Result, y: SmallVector<ConstantInt *, `4`>(`1`, CaseVal)));
6139	return `1`;
6140	}
6141
6142	// Helper function that initializes a map containing
6143	// results for the PHI node of the common destination block for a switch
6144	// instruction. Returns false if multiple PHI nodes have been found or if
6145	// there is not a common destination block for the switch.
6146	static bool initializeUniqueCases(SwitchInst SI, PHINode &PHI,
6147	BasicBlock *&CommonDest,
6148	SwitchCaseResultVectorTy &UniqueResults,
6149	Constant *&DefaultResult,
6150	const DataLayout &DL,
6151	const TargetTransformInfo &TTI,
6152	uintptr_t MaxUniqueResults) {
6153	for (const auto &I : SI->cases()) {
6154	ConstantInt *CaseVal = I.getCaseValue();
6155
6156	// Resulting value at phi nodes for this case value.
6157	SwitchCaseResultsTy Results;
6158	if (!getCaseResults(SI, CaseVal, CaseDest: I.getCaseSuccessor(), CommonDest: &CommonDest, Res&: Results,
6159	DL, TTI))
6160	return false;
6161
6162	// Only one value per case is permitted.
6163	if (Results.size() > `1`)
6164	return false;
6165
6166	// Add the case->result mapping to UniqueResults.
6167	const size_t NumCasesForResult =
6168	mapCaseToResult(CaseVal, UniqueResults, Result: Results.begin()->second);
6169
6170	// Early out if there are too many cases for this result.
6171	if (NumCasesForResult > MaxSwitchCasesPerResult)
6172	return false;
6173
6174	// Early out if there are too many unique results.
6175	if (UniqueResults.size() > MaxUniqueResults)
6176	return false;
6177
6178	// Check the PHI consistency.
6179	if (!PHI)
6180	PHI = Results [`0`].first;
6181	else if (PHI != Results [`0`].first)
6182	return false;
6183	}
6184	// Find the default result value.
6185	SmallVector<std::pair<PHINode , Constant >, `1`> DefaultResults;
6186	getCaseResults(SI, CaseVal: nullptr, CaseDest: SI->getDefaultDest(), CommonDest: &CommonDest, Res&: DefaultResults,
6187	DL, TTI);
6188	// If the default value is not found abort unless the default destination
6189	// is unreachable.
6190	DefaultResult =
6191	DefaultResults.size() == `1` ? DefaultResults.begin()->second : nullptr;
6192
6193	return DefaultResult \|\| SI->defaultDestUnreachable();
6194	}
6195
6196	// Helper function that checks if it is possible to transform a switch with only
6197	// two cases (or two cases + default) that produces a result into a select.
6198	// TODO: Handle switches with more than 2 cases that map to the same result.
6199	static Value foldSwitchToSelect(const* SwitchCaseResultVectorTy &ResultVector,
6200	Constant DefaultResult, Value Condition,
6201	IRBuilder<> &Builder, const DataLayout &DL) {
6202	// If we are selecting between only two cases transform into a simple
6203	// select or a two-way select if default is possible.
6204	// Example:
6205	// switch (a) { %0 = icmp eq i32 %a, 10
6206	// case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6207	// case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6208	// default: return 4; %3 = select i1 %2, i32 2, i32 %1
6209	// }
6210	if (ResultVector.size() == `2` && ResultVector [`0`].second.size() == `1` &&
6211	ResultVector [`1`].second.size() == `1`) {
6212	ConstantInt *FirstCase = ResultVector [`0`].second [`0`];
6213	ConstantInt *SecondCase = ResultVector [`1`].second [`0`];
6214	Value *SelectValue = ResultVector [`1`].first;
6215	if (DefaultResult) {
6216	Value *ValueCompare =
6217	Builder.CreateICmpEQ(LHS: Condition, RHS: SecondCase, Name: "switch.selectcmp");
6218	SelectValue = Builder.CreateSelect(C: ValueCompare, True: ResultVector [`1`].first,
6219	False: DefaultResult, Name: "switch.select");
6220	}
6221	Value *ValueCompare =
6222	Builder.CreateICmpEQ(LHS: Condition, RHS: FirstCase, Name: "switch.selectcmp");
6223	return Builder.CreateSelect(C: ValueCompare, True: ResultVector [`0`].first,
6224	False: SelectValue, Name: "switch.select");
6225	}
6226
6227	// Handle the degenerate case where two cases have the same result value.
6228	if (ResultVector.size() == `1` && DefaultResult) {
6229	ArrayRef<ConstantInt *> CaseValues = ResultVector [`0`].second;
6230	unsigned CaseCount = CaseValues.size();
6231	// n bits group cases map to the same result:
6232	// case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6233	// case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6234	// case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6235	if (isPowerOf2_32(Value: CaseCount)) {
6236	ConstantInt *MinCaseVal = CaseValues [`0`];
6237	// If there are bits that are set exclusively by CaseValues, we
6238	// can transform the switch into a select if the conjunction of
6239	// all the values uniquely identify CaseValues.
6240	APInt AndMask = APInt::getAllOnes(numBits: MinCaseVal->getBitWidth());
6241
6242	// Find the minimum value and compute the and of all the case values.
6243	for (auto *Case : CaseValues) {
6244	if (Case->getValue().slt(RHS: MinCaseVal->getValue()))
6245	MinCaseVal = Case;
6246	AndMask &= Case->getValue();
6247	}
6248	KnownBits Known = computeKnownBits(V: Condition, DL);
6249
6250	if (!AndMask.isZero() && Known.getMaxValue().uge(RHS: AndMask)) {
6251	// Compute the number of bits that are free to vary.
6252	unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6253
6254	// Check if the number of values covered by the mask is equal
6255	// to the number of cases.
6256	if (FreeBits == Log2_32(Value: CaseCount)) {
6257	Value *And = Builder.CreateAnd(LHS: Condition, RHS: AndMask);
6258	Value *Cmp = Builder.CreateICmpEQ(
6259	LHS: And, RHS: Constant::getIntegerValue(Ty: And->getType(), V: AndMask));
6260	return Builder.CreateSelect(C: Cmp, True: ResultVector [`0`].first,
6261	False: DefaultResult);
6262	}
6263	}
6264
6265	// Mark the bits case number touched.
6266	APInt BitMask = APInt::getZero(numBits: MinCaseVal->getBitWidth());
6267	for (auto *Case : CaseValues)
6268	BitMask \|= (Case->getValue() - MinCaseVal->getValue());
6269
6270	// Check if cases with the same result can cover all number
6271	// in touched bits.
6272	if (BitMask.popcount() == Log2_32(Value: CaseCount)) {
6273	if (!MinCaseVal->isNullValue())
6274	Condition = Builder.CreateSub(LHS: Condition, RHS: MinCaseVal);
6275	Value *And = Builder.CreateAnd(LHS: Condition, RHS: ~BitMask, Name: "switch.and");
6276	Value *Cmp = Builder.CreateICmpEQ(
6277	LHS: And, RHS: Constant::getNullValue(Ty: And->getType()), Name: "switch.selectcmp");
6278	return Builder.CreateSelect(C: Cmp, True: ResultVector [`0`].first, False: DefaultResult);
6279	}
6280	}
6281
6282	// Handle the degenerate case where two cases have the same value.
6283	if (CaseValues.size() == `2`) {
6284	Value *Cmp1 = Builder.CreateICmpEQ(LHS: Condition, RHS: CaseValues [`0`],
6285	Name: "switch.selectcmp.case1");
6286	Value *Cmp2 = Builder.CreateICmpEQ(LHS: Condition, RHS: CaseValues [`1`],
6287	Name: "switch.selectcmp.case2");
6288	Value *Cmp = Builder.CreateOr(LHS: Cmp1, RHS: Cmp2, Name: "switch.selectcmp");
6289	return Builder.CreateSelect(C: Cmp, True: ResultVector [`0`].first, False: DefaultResult);
6290	}
6291	}
6292
6293	return nullptr;
6294	}
6295
6296	// Helper function to cleanup a switch instruction that has been converted into
6297	// a select, fixing up PHI nodes and basic blocks.
6298	static void removeSwitchAfterSelectFold(SwitchInst SI, PHINode PHI,
6299	Value *SelectValue,
6300	IRBuilder<> &Builder,
6301	DomTreeUpdater *DTU) {
6302	std::vector<DominatorTree::UpdateType> Updates;
6303
6304	BasicBlock *SelectBB = SI->getParent();
6305	BasicBlock *DestBB = PHI->getParent();
6306
6307	if (DTU && !is_contained(Range: predecessors(BB: DestBB), Element: SelectBB))
6308	Updates.push_back(x: {DominatorTree::Insert, SelectBB, DestBB});
6309	Builder.CreateBr(Dest: DestBB);
6310
6311	// Remove the switch.
6312
6313	PHI->removeIncomingValueIf(
6314	Predicate: [&](unsigned Idx) { return PHI->getIncomingBlock(i: Idx) == SelectBB; });
6315	PHI->addIncoming(V: SelectValue, BB: SelectBB);
6316
6317	SmallPtrSet<BasicBlock *, `4`> RemovedSuccessors;
6318	for (unsigned i = `0`, e = SI->getNumSuccessors(); i < e; ++i) {
6319	BasicBlock *Succ = SI->getSuccessor(idx: i);
6320
6321	if (Succ == DestBB)
6322	continue;
6323	Succ->removePredecessor(Pred: SelectBB);
6324	if (DTU && RemovedSuccessors.insert(Ptr: Succ).second)
6325	Updates.push_back(x: {DominatorTree::Delete, SelectBB, Succ});
6326	}
6327	SI->eraseFromParent();
6328	if (DTU)
6329	DTU->applyUpdates(Updates);
6330	}
6331
6332	/// If a switch is only used to initialize one or more phi nodes in a common
6333	/// successor block with only two different constant values, try to replace the
6334	/// switch with a select. Returns true if the fold was made.
6335	static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
6336	DomTreeUpdater DTU, const* DataLayout &DL,
6337	const TargetTransformInfo &TTI) {
6338	Value *const Cond = SI->getCondition();
6339	PHINode PHI = nullptr*;
6340	BasicBlock CommonDest = nullptr*;
6341	Constant *DefaultResult;
6342	SwitchCaseResultVectorTy UniqueResults;
6343	// Collect all the cases that will deliver the same value from the switch.
6344	if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6345	DL, TTI, /MaxUniqueResults/ `2`))
6346	return false;
6347
6348	assert(PHI != nullptr && "PHI for value select not found");
6349	Builder.SetInsertPoint(SI);
6350	Value *SelectValue =
6351	foldSwitchToSelect(ResultVector: UniqueResults, DefaultResult, Condition: Cond, Builder, DL);
6352	if (!SelectValue)
6353	return false;
6354
6355	removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6356	return true;
6357	}
6358
6359	namespace {
6360
6361	/// This class represents a lookup table that can be used to replace a switch.
6362	class SwitchLookupTable {
6363	public:
6364	/// Create a lookup table to use as a switch replacement with the contents
6365	/// of Values, using DefaultValue to fill any holes in the table.
6366	SwitchLookupTable(
6367	Module &M, uint64_t TableSize, ConstantInt *Offset,
6368	const SmallVectorImpl<std::pair<ConstantInt , Constant >> &Values,
6369	Constant DefaultValue, const* DataLayout &DL, const StringRef &FuncName);
6370
6371	/// Build instructions with Builder to retrieve the value at
6372	/// the position given by Index in the lookup table.
6373	Value buildLookup(Value Index, IRBuilder<> &Builder, const DataLayout &DL);
6374
6375	/// Return true if a table with TableSize elements of
6376	/// type ElementType would fit in a target-legal register.
6377	static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6378	Type *ElementType);
6379
6380	private:
6381	// Depending on the contents of the table, it can be represented in
6382	// different ways.
6383	enum {
6384	// For tables where each element contains the same value, we just have to
6385	// store that single value and return it for each lookup.
6386	SingleValueKind,
6387
6388	// For tables where there is a linear relationship between table index
6389	// and values. We calculate the result with a simple multiplication
6390	// and addition instead of a table lookup.
6391	LinearMapKind,
6392
6393	// For small tables with integer elements, we can pack them into a bitmap
6394	// that fits into a target-legal register. Values are retrieved by
6395	// shift and mask operations.
6396	BitMapKind,
6397
6398	// The table is stored as an array of values. Values are retrieved by load
6399	// instructions from the table.
6400	ArrayKind
6401	} Kind;
6402
6403	// For SingleValueKind, this is the single value.
6404	Constant SingleValue = nullptr*;
6405
6406	// For BitMapKind, this is the bitmap.
6407	ConstantInt BitMap = nullptr*;
6408	IntegerType BitMapElementTy = nullptr*;
6409
6410	// For LinearMapKind, these are the constants used to derive the value.
6411	ConstantInt LinearOffset = nullptr*;
6412	ConstantInt LinearMultiplier = nullptr*;
6413	bool LinearMapValWrapped = false;
6414
6415	// For ArrayKind, this is the array.
6416	GlobalVariable Array = nullptr*;
6417	};
6418
6419	} // end anonymous namespace
6420
6421	SwitchLookupTable::SwitchLookupTable(
6422	Module &M, uint64_t TableSize, ConstantInt *Offset,
6423	const SmallVectorImpl<std::pair<ConstantInt , Constant >> &Values,
6424	Constant DefaultValue, const* DataLayout &DL, const StringRef &FuncName) {
6425	assert(Values.size() && "Can't build lookup table without values!");
6426	assert(TableSize >= Values.size() && "Can't fit values in table!");
6427
6428	// If all values in the table are equal, this is that value.
6429	SingleValue = Values.begin()->second;
6430
6431	Type *ValueType = Values.begin()->second->getType();
6432
6433	// Build up the table contents.
6434	SmallVector<Constant *, `64`> TableContents(TableSize);
6435	for (const auto &[CaseVal, CaseRes] : Values) {
6436	assert(CaseRes->getType() == ValueType);
6437
6438	uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6439	TableContents [Idx] = CaseRes;
6440
6441	if (SingleValue && !isa<PoisonValue>(Val: CaseRes) && CaseRes != SingleValue)
6442	SingleValue = isa<PoisonValue>(Val: SingleValue) ? CaseRes : nullptr;
6443	}
6444
6445	// Fill in any holes in the table with the default result.
6446	if (Values.size() < TableSize) {
6447	assert(DefaultValue &&
6448	"Need a default value to fill the lookup table holes.");
6449	assert(DefaultValue->getType() == ValueType);
6450	for (uint64_t I = `0`; I < TableSize; ++I) {
6451	if (!TableContents [I])
6452	TableContents [I] = DefaultValue;
6453	}
6454
6455	// If the default value is poison, all the holes are poison.
6456	bool DefaultValueIsPoison = isa<PoisonValue>(Val: DefaultValue);
6457
6458	if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6459	SingleValue = nullptr;
6460	}
6461
6462	// If each element in the table contains the same value, we only need to store
6463	// that single value.
6464	if (SingleValue) {
6465	Kind = SingleValueKind;
6466	return;
6467	}
6468
6469	// Check if we can derive the value with a linear transformation from the
6470	// table index.
6471	if (isa<IntegerType>(Val: ValueType)) {
6472	bool LinearMappingPossible = true;
6473	APInt PrevVal;
6474	APInt DistToPrev;
6475	// When linear map is monotonic and signed overflow doesn't happen on
6476	// maximum index, we can attach nsw on Add and Mul.
6477	bool NonMonotonic = false;
6478	assert(TableSize >= `2` && "Should be a SingleValue table.");
6479	// Check if there is the same distance between two consecutive values.
6480	for (uint64_t I = `0`; I < TableSize; ++I) {
6481	ConstantInt *ConstVal = dyn_cast<ConstantInt>(Val: TableContents [I]);
6482
6483	if (!ConstVal && isa<PoisonValue>(Val: TableContents [I])) {
6484	// This is an poison, so it's (probably) a lookup table hole.
6485	// To prevent any regressions from before we switched to using poison as
6486	// the default value, holes will fall back to using the first value.
6487	// This can be removed once we add proper handling for poisons in lookup
6488	// tables.
6489	ConstVal = dyn_cast<ConstantInt>(Val: Values [`0`].second);
6490	}
6491
6492	if (!ConstVal) {
6493	// This is an undef. We could deal with it, but undefs in lookup tables
6494	// are very seldom. It's probably not worth the additional complexity.
6495	LinearMappingPossible = false;
6496	break;
6497	}
6498	const APInt &Val = ConstVal->getValue();
6499	if (I != `0`) {
6500	APInt Dist = Val - PrevVal;
6501	if (I == `1`) {
6502	DistToPrev = Dist;
6503	} else if (Dist != DistToPrev) {
6504	LinearMappingPossible = false;
6505	break;
6506	}
6507	NonMonotonic \|=
6508	Dist.isStrictlyPositive() ? Val.sle(RHS: PrevVal) : Val.sgt(RHS: PrevVal);
6509	}
6510	PrevVal = Val;
6511	}
6512	if (LinearMappingPossible) {
6513	LinearOffset = cast<ConstantInt>(Val: TableContents [`0`]);
6514	LinearMultiplier = ConstantInt::get(Context&: M.getContext(), V: DistToPrev);
6515	APInt M = LinearMultiplier->getValue();
6516	bool MayWrap = true;
6517	if (isIntN(N: M.getBitWidth(), x: TableSize - `1`))
6518	(void)M.smul_ov(RHS: APInt (M.getBitWidth(), TableSize - `1`), Overflow&: MayWrap);
6519	LinearMapValWrapped = NonMonotonic \|\| MayWrap;
6520	Kind = LinearMapKind;
6521	++NumLinearMaps;
6522	return;
6523	}
6524	}
6525
6526	// If the type is integer and the table fits in a register, build a bitmap.
6527	if (wouldFitInRegister(DL, TableSize, ElementType: ValueType)) {
6528	IntegerType *IT = cast<IntegerType>(Val: ValueType);
6529	APInt TableInt(TableSize * IT->getBitWidth(), `0`);
6530	for (uint64_t I = TableSize; I > `0`; --I) {
6531	TableInt <<= IT->getBitWidth();
6532	// Insert values into the bitmap. Undef values are set to zero.
6533	if (!isa<UndefValue>(Val: TableContents [I - `1`])) {
6534	ConstantInt *Val = cast<ConstantInt>(Val: TableContents [I - `1`]);
6535	TableInt \|= Val->getValue().zext(width: TableInt.getBitWidth());
6536	}
6537	}
6538	BitMap = ConstantInt::get(Context&: M.getContext(), V: TableInt);
6539	BitMapElementTy = IT;
6540	Kind = BitMapKind;
6541	++NumBitMaps;
6542	return;
6543	}
6544
6545	// Store the table in an array.
6546	ArrayType *ArrayTy = ArrayType::get(ElementType: ValueType, NumElements: TableSize);
6547	Constant *Initializer = ConstantArray::get(T: ArrayTy, V: TableContents);
6548
6549	Array = new GlobalVariable (M, ArrayTy, /isConstant=/true,
6550	GlobalVariable::PrivateLinkage, Initializer,
6551	"switch.table." + FuncName);
6552	Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6553	// Set the alignment to that of an array items. We will be only loading one
6554	// value out of it.
6555	Array->setAlignment(DL.getPrefTypeAlign(Ty: ValueType));
6556	Kind = ArrayKind;
6557	}
6558
6559	Value SwitchLookupTable::buildLookup(Value Index, IRBuilder<> &Builder,
6560	const DataLayout &DL) {
6561	switch (Kind) {
6562	case SingleValueKind:
6563	return SingleValue;
6564	case LinearMapKind: {
6565	// Derive the result value from the input value.
6566	Value *Result = Builder.CreateIntCast(V: Index, DestTy: LinearMultiplier->getType(),
6567	isSigned: false, Name: "switch.idx.cast");
6568	if (!LinearMultiplier->isOne())
6569	Result = Builder.CreateMul(LHS: Result, RHS: LinearMultiplier, Name: "switch.idx.mult",
6570	/HasNUW = / false,
6571	/HasNSW = / !LinearMapValWrapped);
6572
6573	if (!LinearOffset->isZero())
6574	Result = Builder.CreateAdd(LHS: Result, RHS: LinearOffset, Name: "switch.offset",
6575	/HasNUW = / false,
6576	/HasNSW = / !LinearMapValWrapped);
6577	return Result;
6578	}
6579	case BitMapKind: {
6580	// Type of the bitmap (e.g. i59).
6581	IntegerType *MapTy = BitMap->getIntegerType();
6582
6583	// Cast Index to the same type as the bitmap.
6584	// Note: The Index is <= the number of elements in the table, so
6585	// truncating it to the width of the bitmask is safe.
6586	Value *ShiftAmt = Builder.CreateZExtOrTrunc(V: Index, DestTy: MapTy, Name: "switch.cast");
6587
6588	// Multiply the shift amount by the element width. NUW/NSW can always be
6589	// set, because wouldFitInRegister guarantees Index ShiftAmt is in*
6590	// BitMap's bit width.
6591	ShiftAmt = Builder.CreateMul(
6592	LHS: ShiftAmt, RHS: ConstantInt::get(Ty: MapTy, V: BitMapElementTy->getBitWidth()),
6593	Name: "switch.shiftamt",/HasNUW =/true,/HasNSW =/true);
6594
6595	// Shift down.
6596	Value *DownShifted =
6597	Builder.CreateLShr(LHS: BitMap, RHS: ShiftAmt, Name: "switch.downshift");
6598	// Mask off.
6599	return Builder.CreateTrunc(V: DownShifted, DestTy: BitMapElementTy, Name: "switch.masked");
6600	}
6601	case ArrayKind: {
6602	Type *IndexTy = DL.getIndexType(PtrTy: Array->getType());
6603
6604	if (Index->getType() != IndexTy)
6605	Index = Builder.CreateZExtOrTrunc(V: Index, DestTy: IndexTy);
6606
6607	Value *GEPIndices[] = {ConstantInt::get(Ty: IndexTy, V: `0`), Index};
6608	Value *GEP = Builder.CreateInBoundsGEP(Ty: Array->getValueType(), Ptr: Array,
6609	IdxList: GEPIndices, Name: "switch.gep");
6610	return Builder.CreateLoad(
6611	Ty: cast<ArrayType>(Val: Array->getValueType())->getElementType(), Ptr: GEP,
6612	Name: "switch.load");
6613	}
6614	}
6615	llvm_unreachable("Unknown lookup table kind!");
6616	}
6617
6618	bool SwitchLookupTable::wouldFitInRegister(const DataLayout &DL,
6619	uint64_t TableSize,
6620	Type *ElementType) {
6621	auto *IT = dyn_cast<IntegerType>(Val: ElementType);
6622	if (!IT)
6623	return false;
6624	// FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6625	// are <= 15, we could try to narrow the type.
6626
6627	// Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6628	if (TableSize >= UINT_MAX / IT->getBitWidth())
6629	return false;
6630	return DL.fitsInLegalInteger(Width: TableSize * IT->getBitWidth());
6631	}
6632
6633	static bool isTypeLegalForLookupTable(Type Ty, const* TargetTransformInfo &TTI,
6634	const DataLayout &DL) {
6635	// Allow any legal type.
6636	if (TTI.isTypeLegal(Ty))
6637	return true;
6638
6639	auto *IT = dyn_cast<IntegerType>(Val: Ty);
6640	if (!IT)
6641	return false;
6642
6643	// Also allow power of 2 integer types that have at least 8 bits and fit in
6644	// a register. These types are common in frontend languages and targets
6645	// usually support loads of these types.
6646	// TODO: We could relax this to any integer that fits in a register and rely
6647	// on ABI alignment and padding in the table to allow the load to be widened.
6648	// Or we could widen the constants and truncate the load.
6649	unsigned BitWidth = IT->getBitWidth();
6650	return BitWidth >= `8` && isPowerOf2_32(Value: BitWidth) &&
6651	DL.fitsInLegalInteger(Width: IT->getBitWidth());
6652	}
6653
6654	static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
6655	// 40% is the default density for building a jump table in optsize/minsize
6656	// mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
6657	// function was based on.
6658	const uint64_t MinDensity = `40`;
6659
6660	if (CaseRange >= UINT64_MAX / `100`)
6661	return false; // Avoid multiplication overflows below.
6662
6663	return NumCases * `100` >= CaseRange * MinDensity;
6664	}
6665
6666	static bool isSwitchDense(ArrayRef<int64_t> Values) {
6667	uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6668	uint64_t Range = Diff + `1`;
6669	if (Range < Diff)
6670	return false; // Overflow.
6671
6672	return isSwitchDense(NumCases: Values.size(), CaseRange: Range);
6673	}
6674
6675	/// Determine whether a lookup table should be built for this switch, based on
6676	/// the number of cases, size of the table, and the types of the results.
6677	// TODO: We could support larger than legal types by limiting based on the
6678	// number of loads required and/or table size. If the constants are small we
6679	// could use smaller table entries and extend after the load.
6680	static bool
6681	shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
6682	const TargetTransformInfo &TTI, const DataLayout &DL,
6683	const SmallDenseMap<PHINode , Type > &ResultTypes) {
6684	if (SI->getNumCases() > TableSize)
6685	return false; // TableSize overflowed.
6686
6687	bool AllTablesFitInRegister = true;
6688	bool HasIllegalType = false;
6689	for (const auto &I : ResultTypes) {
6690	Type *Ty = I.second;
6691
6692	// Saturate this flag to true.
6693	HasIllegalType = HasIllegalType \|\| !isTypeLegalForLookupTable(Ty, TTI, DL);
6694
6695	// Saturate this flag to false.
6696	AllTablesFitInRegister =
6697	AllTablesFitInRegister &&
6698	SwitchLookupTable::wouldFitInRegister(DL, TableSize, ElementType: Ty);
6699
6700	// If both flags saturate, we're done. NOTE: This only* works with*
6701	// saturating flags, and all flags have to saturate first due to the
6702	// non-deterministic behavior of iterating over a dense map.
6703	if (HasIllegalType && !AllTablesFitInRegister)
6704	break;
6705	}
6706
6707	// If each table would fit in a register, we should build it anyway.
6708	if (AllTablesFitInRegister)
6709	return true;
6710
6711	// Don't build a table that doesn't fit in-register if it has illegal types.
6712	if (HasIllegalType)
6713	return false;
6714
6715	return isSwitchDense(NumCases: SI->getNumCases(), CaseRange: TableSize);
6716	}
6717
6718	static bool shouldUseSwitchConditionAsTableIndex(
6719	ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
6720	bool HasDefaultResults, const SmallDenseMap<PHINode , Type > &ResultTypes,
6721	const DataLayout &DL, const TargetTransformInfo &TTI) {
6722	if (MinCaseVal.isNullValue())
6723	return true;
6724	if (MinCaseVal.isNegative() \|\|
6725	MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() \|\|
6726	!HasDefaultResults)
6727	return false;
6728	return all_of(Range: ResultTypes, P: [&](const auto &KV) {
6729	return SwitchLookupTable::wouldFitInRegister(
6730	DL, TableSize: MaxCaseVal.getLimitedValue() + `1` / TableSize /,
6731	ElementType: KV.second / ResultType /);
6732	});
6733	}
6734
6735	/// Try to reuse the switch table index compare. Following pattern:
6736	/// \code
6737	/// if (idx < tablesize)
6738	/// r = table[idx]; // table does not contain default_value
6739	/// else
6740	/// r = default_value;
6741	/// if (r != default_value)
6742	/// ...
6743	/// \endcode
6744	/// Is optimized to:
6745	/// \code
6746	/// cond = idx < tablesize;
6747	/// if (cond)
6748	/// r = table[idx];
6749	/// else
6750	/// r = default_value;
6751	/// if (cond)
6752	/// ...
6753	/// \endcode
6754	/// Jump threading will then eliminate the second if(cond).
6755	static void reuseTableCompare(
6756	User PhiUser, BasicBlock PhiBlock, BranchInst *RangeCheckBranch,
6757	Constant *DefaultValue,
6758	const SmallVectorImpl<std::pair<ConstantInt , Constant >> &Values) {
6759	ICmpInst *CmpInst = dyn_cast<ICmpInst>(Val: PhiUser);
6760	if (!CmpInst)
6761	return;
6762
6763	// We require that the compare is in the same block as the phi so that jump
6764	// threading can do its work afterwards.
6765	if (CmpInst->getParent() != PhiBlock)
6766	return;
6767
6768	Constant *CmpOp1 = dyn_cast<Constant>(Val: CmpInst->getOperand(i_nocapture: `1`));
6769	if (!CmpOp1)
6770	return;
6771
6772	Value *RangeCmp = RangeCheckBranch->getCondition();
6773	Constant *TrueConst = ConstantInt::getTrue(Ty: RangeCmp->getType());
6774	Constant *FalseConst = ConstantInt::getFalse(Ty: RangeCmp->getType());
6775
6776	// Check if the compare with the default value is constant true or false.
6777	const DataLayout &DL = PhiBlock->getDataLayout();
6778	Constant *DefaultConst = ConstantFoldCompareInstOperands(
6779	Predicate: CmpInst->getPredicate(), LHS: DefaultValue, RHS: CmpOp1, DL);
6780	if (DefaultConst != TrueConst && DefaultConst != FalseConst)
6781	return;
6782
6783	// Check if the compare with the case values is distinct from the default
6784	// compare result.
6785	for (auto ValuePair : Values) {
6786	Constant *CaseConst = ConstantFoldCompareInstOperands(
6787	Predicate: CmpInst->getPredicate(), LHS: ValuePair.second, RHS: CmpOp1, DL);
6788	if (!CaseConst \|\| CaseConst == DefaultConst \|\|
6789	(CaseConst != TrueConst && CaseConst != FalseConst))
6790	return;
6791	}
6792
6793	// Check if the branch instruction dominates the phi node. It's a simple
6794	// dominance check, but sufficient for our needs.
6795	// Although this check is invariant in the calling loops, it's better to do it
6796	// at this late stage. Practically we do it at most once for a switch.
6797	BasicBlock *BranchBlock = RangeCheckBranch->getParent();
6798	for (BasicBlock *Pred : predecessors(BB: PhiBlock)) {
6799	if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
6800	return;
6801	}
6802
6803	if (DefaultConst == FalseConst) {
6804	// The compare yields the same result. We can replace it.
6805	CmpInst->replaceAllUsesWith(V: RangeCmp);
6806	++NumTableCmpReuses;
6807	} else {
6808	// The compare yields the same result, just inverted. We can replace it.
6809	Value *InvertedTableCmp = BinaryOperator::CreateXor(
6810	V1: RangeCmp, V2: ConstantInt::get(Ty: RangeCmp->getType(), V: `1`), Name: "inverted.cmp",
6811	InsertBefore: RangeCheckBranch->getIterator());
6812	CmpInst->replaceAllUsesWith(V: InvertedTableCmp);
6813	++NumTableCmpReuses;
6814	}
6815	}
6816
6817	/// If the switch is only used to initialize one or more phi nodes in a common
6818	/// successor block with different constant values, replace the switch with
6819	/// lookup tables.
6820	static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
6821	DomTreeUpdater DTU, const* DataLayout &DL,
6822	const TargetTransformInfo &TTI) {
6823	assert(SI->getNumCases() > `1` && "Degenerate switch?");
6824
6825	BasicBlock *BB = SI->getParent();
6826	Function *Fn = BB->getParent();
6827	// Only build lookup table when we have a target that supports it or the
6828	// attribute is not set.
6829	if (!TTI.shouldBuildLookupTables() \|\|
6830	(Fn->getFnAttribute(Kind: "no-jump-tables").getValueAsBool()))
6831	return false;
6832
6833	// FIXME: If the switch is too sparse for a lookup table, perhaps we could
6834	// split off a dense part and build a lookup table for that.
6835
6836	// FIXME: This creates arrays of GEPs to constant strings, which means each
6837	// GEP needs a runtime relocation in PIC code. We should just build one big
6838	// string and lookup indices into that.
6839
6840	// Ignore switches with less than three cases. Lookup tables will not make
6841	// them faster, so we don't analyze them.
6842	if (SI->getNumCases() < `3`)
6843	return false;
6844
6845	// Figure out the corresponding result for each case value and phi node in the
6846	// common destination, as well as the min and max case values.
6847	assert(!SI->cases().empty());
6848	SwitchInst::CaseIt CI = SI->case_begin();
6849	ConstantInt *MinCaseVal = CI ->getCaseValue();
6850	ConstantInt *MaxCaseVal = CI ->getCaseValue();
6851
6852	BasicBlock CommonDest = nullptr*;
6853
6854	using ResultListTy = SmallVector<std::pair<ConstantInt , Constant >, `4`>;
6855	SmallDenseMap<PHINode *, ResultListTy> ResultLists;
6856
6857	SmallDenseMap<PHINode , Constant > DefaultResults;
6858	SmallDenseMap<PHINode , Type > ResultTypes;
6859	SmallVector<PHINode *, `4`> PHIs;
6860
6861	for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
6862	ConstantInt *CaseVal = CI ->getCaseValue();
6863	if (CaseVal->getValue().slt(RHS: MinCaseVal->getValue()))
6864	MinCaseVal = CaseVal;
6865	if (CaseVal->getValue().sgt(RHS: MaxCaseVal->getValue()))
6866	MaxCaseVal = CaseVal;
6867
6868	// Resulting value at phi nodes for this case value.
6869	using ResultsTy = SmallVector<std::pair<PHINode , Constant >, `4`>;
6870	ResultsTy Results;
6871	if (!getCaseResults(SI, CaseVal, CaseDest: CI ->getCaseSuccessor(), CommonDest: &CommonDest,
6872	Res&: Results, DL, TTI))
6873	return false;
6874
6875	// Append the result from this case to the list for each phi.
6876	for (const auto &I : Results) {
6877	PHINode *PHI = I.first;
6878	Constant *Value = I.second;
6879	auto [It, Inserted] = ResultLists.try_emplace(Key: PHI);
6880	if (Inserted)
6881	PHIs.push_back(Elt: PHI);
6882	It ->second.push_back(Elt: std::make_pair(x&: CaseVal, y&: Value));
6883	}
6884	}
6885
6886	// Keep track of the result types.
6887	for (PHINode *PHI : PHIs) {
6888	ResultTypes [PHI] = ResultLists [PHI][`0`].second->getType();
6889	}
6890
6891	uint64_t NumResults = ResultLists [PHIs [`0`]].size();
6892
6893	// If the table has holes, we need a constant result for the default case
6894	// or a bitmask that fits in a register.
6895	SmallVector<std::pair<PHINode , Constant >, `4`> DefaultResultsList;
6896	bool HasDefaultResults =
6897	getCaseResults(SI, CaseVal: nullptr, CaseDest: SI->getDefaultDest(), CommonDest: &CommonDest,
6898	Res&: DefaultResultsList, DL, TTI);
6899
6900	for (const auto &I : DefaultResultsList) {
6901	PHINode *PHI = I.first;
6902	Constant *Result = I.second;
6903	DefaultResults [PHI] = Result;
6904	}
6905
6906	bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
6907	MinCaseVal&: MinCaseVal, MaxCaseVal: MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
6908	uint64_t TableSize;
6909	if (UseSwitchConditionAsTableIndex)
6910	TableSize = MaxCaseVal->getLimitedValue() + `1`;
6911	else
6912	TableSize =
6913	(MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + `1`;
6914
6915	// If the default destination is unreachable, or if the lookup table covers
6916	// all values of the conditional variable, branch directly to the lookup table
6917	// BB. Otherwise, check that the condition is within the case range.
6918	bool DefaultIsReachable = !SI->defaultDestUnreachable();
6919
6920	bool TableHasHoles = (NumResults < TableSize);
6921
6922	// If the table has holes but the default destination doesn't produce any
6923	// constant results, the lookup table entries corresponding to the holes will
6924	// contain poison.
6925	bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
6926
6927	// If the default destination doesn't produce a constant result but is still
6928	// reachable, and the lookup table has holes, we need to use a mask to
6929	// determine if the current index should load from the lookup table or jump
6930	// to the default case.
6931	// The mask is unnecessary if the table has holes but the default destination
6932	// is unreachable, as in that case the holes must also be unreachable.
6933	bool NeedMask = AllHolesArePoison && DefaultIsReachable;
6934	if (NeedMask) {
6935	// As an extra penalty for the validity test we require more cases.
6936	if (SI->getNumCases() < `4`) // FIXME: Find best threshold value (benchmark).
6937	return false;
6938	if (!DL.fitsInLegalInteger(Width: TableSize))
6939	return false;
6940	}
6941
6942	if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
6943	return false;
6944
6945	std::vector<DominatorTree::UpdateType> Updates;
6946
6947	// Compute the maximum table size representable by the integer type we are
6948	// switching upon.
6949	unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
6950	uint64_t MaxTableSize = CaseSize > `63` ? UINT64_MAX : `1ULL` << CaseSize;
6951	assert(MaxTableSize >= TableSize &&
6952	"It is impossible for a switch to have more entries than the max "
6953	"representable value of its input integer type's size.");
6954
6955	// Create the BB that does the lookups.
6956	Module &Mod = *CommonDest->getParent()->getParent();
6957	BasicBlock *LookupBB = BasicBlock::Create(
6958	Context&: Mod.getContext(), Name: "switch.lookup", Parent: CommonDest->getParent(), InsertBefore: CommonDest);
6959
6960	// Compute the table index value.
6961	Builder.SetInsertPoint(SI);
6962	Value *TableIndex;
6963	ConstantInt *TableIndexOffset;
6964	if (UseSwitchConditionAsTableIndex) {
6965	TableIndexOffset = ConstantInt::get(Ty: MaxCaseVal->getIntegerType(), V: `0`);
6966	TableIndex = SI->getCondition();
6967	} else {
6968	TableIndexOffset = MinCaseVal;
6969	// If the default is unreachable, all case values are s>= MinCaseVal. Then
6970	// we can try to attach nsw.
6971	bool MayWrap = true;
6972	if (!DefaultIsReachable) {
6973	APInt Res = MaxCaseVal->getValue().ssub_ov(RHS: MinCaseVal->getValue(), Overflow&: MayWrap);
6974	(void)Res;
6975	}
6976
6977	TableIndex = Builder.CreateSub(LHS: SI->getCondition(), RHS: TableIndexOffset,
6978	Name: "switch.tableidx", /HasNUW =/false,
6979	/HasNSW =/!MayWrap);
6980	}
6981
6982	BranchInst RangeCheckBranch = nullptr*;
6983
6984	// Grow the table to cover all possible index values to avoid the range check.
6985	// It will use the default result to fill in the table hole later, so make
6986	// sure it exist.
6987	if (UseSwitchConditionAsTableIndex && HasDefaultResults) {
6988	ConstantRange CR = computeConstantRange(V: TableIndex, / ForSigned / false);
6989	// Grow the table shouldn't have any size impact by checking
6990	// wouldFitInRegister.
6991	// TODO: Consider growing the table also when it doesn't fit in a register
6992	// if no optsize is specified.
6993	const uint64_t UpperBound = CR.getUpper().getLimitedValue();
6994	if (!CR.isUpperWrapped() && all_of(Range&: ResultTypes, P: [&](const auto &KV) {
6995	return SwitchLookupTable::wouldFitInRegister(
6996	DL, TableSize: UpperBound, ElementType: KV.second / ResultType /);
6997	})) {
6998	// There may be some case index larger than the UpperBound (unreachable
6999	// case), so make sure the table size does not get smaller.
7000	TableSize = std::max(a: UpperBound, b: TableSize);
7001	// The default branch is unreachable after we enlarge the lookup table.
7002	// Adjust DefaultIsReachable to reuse code path.
7003	DefaultIsReachable = false;
7004	}
7005	}
7006
7007	const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7008	if (!DefaultIsReachable \|\| GeneratingCoveredLookupTable) {
7009	Builder.CreateBr(Dest: LookupBB);
7010	if (DTU)
7011	Updates.push_back(x: {DominatorTree::Insert, BB, LookupBB});
7012	// Note: We call removeProdecessor later since we need to be able to get the
7013	// PHI value for the default case in case we're using a bit mask.
7014	} else {
7015	Value *Cmp = Builder.CreateICmpULT(
7016	LHS: TableIndex, RHS: ConstantInt::get(Ty: MinCaseVal->getType(), V: TableSize));
7017	RangeCheckBranch =
7018	Builder.CreateCondBr(Cond: Cmp, True: LookupBB, False: SI->getDefaultDest());
7019	if (DTU)
7020	Updates.push_back(x: {DominatorTree::Insert, BB, LookupBB});
7021	}
7022
7023	// Populate the BB that does the lookups.
7024	Builder.SetInsertPoint(LookupBB);
7025
7026	if (NeedMask) {
7027	// Before doing the lookup, we do the hole check. The LookupBB is therefore
7028	// re-purposed to do the hole check, and we create a new LookupBB.
7029	BasicBlock *MaskBB = LookupBB;
7030	MaskBB->setName("switch.hole_check");
7031	LookupBB = BasicBlock::Create(Context&: Mod.getContext(), Name: "switch.lookup",
7032	Parent: CommonDest->getParent(), InsertBefore: CommonDest);
7033
7034	// Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7035	// unnecessary illegal types.
7036	uint64_t TableSizePowOf2 = NextPowerOf2(A: std::max(a: `7ULL`, b: TableSize - `1ULL`));
7037	APInt MaskInt(TableSizePowOf2, `0`);
7038	APInt One(TableSizePowOf2, `1`);
7039	// Build bitmask; fill in a 1 bit for every case.
7040	const ResultListTy &ResultList = ResultLists [PHIs [`0`]];
7041	for (const auto &Result : ResultList) {
7042	uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7043	.getLimitedValue();
7044	MaskInt \|= One << Idx;
7045	}
7046	ConstantInt *TableMask = ConstantInt::get(Context&: Mod.getContext(), V: MaskInt);
7047
7048	// Get the TableIndex'th bit of the bitmask.
7049	// If this bit is 0 (meaning hole) jump to the default destination,
7050	// else continue with table lookup.
7051	IntegerType *MapTy = TableMask->getIntegerType();
7052	Value *MaskIndex =
7053	Builder.CreateZExtOrTrunc(V: TableIndex, DestTy: MapTy, Name: "switch.maskindex");
7054	Value *Shifted = Builder.CreateLShr(LHS: TableMask, RHS: MaskIndex, Name: "switch.shifted");
7055	Value *LoBit = Builder.CreateTrunc(
7056	V: Shifted, DestTy: Type::getInt1Ty(C&: Mod.getContext()), Name: "switch.lobit");
7057	Builder.CreateCondBr(Cond: LoBit, True: LookupBB, False: SI->getDefaultDest());
7058	if (DTU) {
7059	Updates.push_back(x: {DominatorTree::Insert, MaskBB, LookupBB});
7060	Updates.push_back(x: {DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7061	}
7062	Builder.SetInsertPoint(LookupBB);
7063	addPredecessorToBlock(Succ: SI->getDefaultDest(), NewPred: MaskBB, ExistPred: BB);
7064	}
7065
7066	if (!DefaultIsReachable \|\| GeneratingCoveredLookupTable) {
7067	// We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7068	// do not delete PHINodes here.
7069	SI->getDefaultDest()->removePredecessor(Pred: BB,
7070	/KeepOneInputPHIs=/true);
7071	if (DTU)
7072	Updates.push_back(x: {DominatorTree::Delete, BB, SI->getDefaultDest()});
7073	}
7074
7075	for (PHINode *PHI : PHIs) {
7076	const ResultListTy &ResultList = ResultLists [PHI];
7077
7078	Type *ResultType = ResultList.begin()->second->getType();
7079
7080	// Use any value to fill the lookup table holes.
7081	Constant *DV =
7082	AllHolesArePoison ? PoisonValue::get(T: ResultType) : DefaultResults [PHI];
7083	StringRef FuncName = Fn->getName();
7084	SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV,
7085	DL, FuncName);
7086
7087	Value *Result = Table.buildLookup(Index: TableIndex, Builder, DL);
7088
7089	// Do a small peephole optimization: re-use the switch table compare if
7090	// possible.
7091	if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7092	BasicBlock *PhiBlock = PHI->getParent();
7093	// Search for compare instructions which use the phi.
7094	for (auto *User : PHI->users()) {
7095	reuseTableCompare(PhiUser: User, PhiBlock, RangeCheckBranch, DefaultValue: DV, Values: ResultList);
7096	}
7097	}
7098
7099	PHI->addIncoming(V: Result, BB: LookupBB);
7100	}
7101
7102	Builder.CreateBr(Dest: CommonDest);
7103	if (DTU)
7104	Updates.push_back(x: {DominatorTree::Insert, LookupBB, CommonDest});
7105
7106	// Remove the switch.
7107	SmallPtrSet<BasicBlock *, `8`> RemovedSuccessors;
7108	for (unsigned i = `0`, e = SI->getNumSuccessors(); i < e; ++i) {
7109	BasicBlock *Succ = SI->getSuccessor(idx: i);
7110
7111	if (Succ == SI->getDefaultDest())
7112	continue;
7113	Succ->removePredecessor(Pred: BB);
7114	if (DTU && RemovedSuccessors.insert(Ptr: Succ).second)
7115	Updates.push_back(x: {DominatorTree::Delete, BB, Succ});
7116	}
7117	SI->eraseFromParent();
7118
7119	if (DTU)
7120	DTU->applyUpdates(Updates);
7121
7122	++NumLookupTables;
7123	if (NeedMask)
7124	++NumLookupTablesHoles;
7125	return true;
7126	}
7127
7128	/// Try to transform a switch that has "holes" in it to a contiguous sequence
7129	/// of cases.
7130	///
7131	/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7132	/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7133	///
7134	/// This converts a sparse switch into a dense switch which allows better
7135	/// lowering and could also allow transforming into a lookup table.
7136	static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
7137	const DataLayout &DL,
7138	const TargetTransformInfo &TTI) {
7139	auto *CondTy = cast<IntegerType>(Val: SI->getCondition()->getType());
7140	if (CondTy->getIntegerBitWidth() > `64` \|\|
7141	!DL.fitsInLegalInteger(Width: CondTy->getIntegerBitWidth()))
7142	return false;
7143	// Only bother with this optimization if there are more than 3 switch cases;
7144	// SDAG will only bother creating jump tables for 4 or more cases.
7145	if (SI->getNumCases() < `4`)
7146	return false;
7147
7148	// This transform is agnostic to the signedness of the input or case values. We
7149	// can treat the case values as signed or unsigned. We can optimize more common
7150	// cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7151	// as signed.
7152	SmallVector<int64_t,`4`> Values;
7153	for (const auto &C : SI->cases())
7154	Values.push_back(Elt: C.getCaseValue()->getValue().getSExtValue());
7155	llvm::sort(C&: Values);
7156
7157	// If the switch is already dense, there's nothing useful to do here.
7158	if (isSwitchDense(Values))
7159	return false;
7160
7161	// First, transform the values such that they start at zero and ascend.
7162	int64_t Base = Values [`0`];
7163	for (auto &V : Values)
7164	V -= (uint64_t)(Base);
7165
7166	// Now we have signed numbers that have been shifted so that, given enough
7167	// precision, there are no negative values. Since the rest of the transform
7168	// is bitwise only, we switch now to an unsigned representation.
7169
7170	// This transform can be done speculatively because it is so cheap - it
7171	// results in a single rotate operation being inserted.
7172
7173	// countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7174	// one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7175	// less than 64.
7176	unsigned Shift = `64`;
7177	for (auto &V : Values)
7178	Shift = std::min(a: Shift, b: (unsigned)llvm::countr_zero(Val: (uint64_t)V));
7179	assert(Shift < `64`);
7180	if (Shift > `0`)
7181	for (auto &V : Values)
7182	V = (int64_t)((uint64_t)V >> Shift);
7183
7184	if (!isSwitchDense(Values))
7185	// Transform didn't create a dense switch.
7186	return false;
7187
7188	// The obvious transform is to shift the switch condition right and emit a
7189	// check that the condition actually cleanly divided by GCD, i.e.
7190	// C & (1 << Shift - 1) == 0
7191	// inserting a new CFG edge to handle the case where it didn't divide cleanly.
7192	//
7193	// A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7194	// shift and puts the shifted-off bits in the uppermost bits. If any of these
7195	// are nonzero then the switch condition will be very large and will hit the
7196	// default case.
7197
7198	auto *Ty = cast<IntegerType>(Val: SI->getCondition()->getType());
7199	Builder.SetInsertPoint(SI);
7200	Value *Sub =
7201	Builder.CreateSub(LHS: SI->getCondition(), RHS: ConstantInt::get(Ty, V: Base));
7202	Value *Rot = Builder.CreateIntrinsic(
7203	RetTy: Ty, ID: Intrinsic::fshl,
7204	Args: {Sub, Sub, ConstantInt::get(Ty, V: Ty->getBitWidth() - Shift)});
7205	SI->replaceUsesOfWith(From: SI->getCondition(), To: Rot);
7206
7207	for (auto Case : SI->cases()) {
7208	auto *Orig = Case.getCaseValue();
7209	auto Sub = Orig->getValue() - APInt (Ty->getBitWidth(), Base, true);
7210	Case.setValue(cast<ConstantInt>(Val: ConstantInt::get(Ty, V: Sub.lshr(shiftAmt: Shift))));
7211	}
7212	return true;
7213	}
7214
7215	/// Tries to transform switch of powers of two to reduce switch range.
7216	/// For example, switch like:
7217	/// switch (C) { case 1: case 2: case 64: case 128: }
7218	/// will be transformed to:
7219	/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7220	///
7221	/// This transformation allows better lowering and may transform the switch
7222	/// instruction into a sequence of bit manipulation and a smaller
7223	/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7224	/// address of the jump target, and indirectly jump to it).
7225	static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
7226	const DataLayout &DL,
7227	const TargetTransformInfo &TTI) {
7228	Value *Condition = SI->getCondition();
7229	LLVMContext &Context = SI->getContext();
7230	auto *CondTy = cast<IntegerType>(Val: Condition->getType());
7231
7232	if (CondTy->getIntegerBitWidth() > `64` \|\|
7233	!DL.fitsInLegalInteger(Width: CondTy->getIntegerBitWidth()))
7234	return false;
7235
7236	// Ensure trailing zeroes count intrinsic emission is not too expensive.
7237	IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7238	{Condition, ConstantInt::getTrue(Context)});
7239	if (TTI.getIntrinsicInstrCost(ICA: Attrs, CostKind: TTI::TCK_SizeAndLatency) >
7240	TTI::TCC_Basic * `2`)
7241	return false;
7242
7243	// Only bother with this optimization if there are more than 3 switch cases.
7244	// SDAG will start emitting jump tables for 4 or more cases.
7245	if (SI->getNumCases() < `4`)
7246	return false;
7247
7248	// We perform this optimization only for switches with
7249	// unreachable default case.
7250	// This assumtion will save us from checking if `Condition` is a power of two.
7251	if (!SI->defaultDestUnreachable())
7252	return false;
7253
7254	// Check that switch cases are powers of two.
7255	SmallVector<uint64_t, `4`> Values;
7256	for (const auto &Case : SI->cases()) {
7257	uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7258	if (llvm::has_single_bit(Value: CaseValue))
7259	Values.push_back(Elt: CaseValue);
7260	else
7261	return false;
7262	}
7263
7264	// isSwichDense requires case values to be sorted.
7265	llvm::sort(C&: Values);
7266	if (!isSwitchDense(NumCases: Values.size(), CaseRange: llvm::countr_zero(Val: Values.back()) -
7267	llvm::countr_zero(Val: Values.front()) + `1`))
7268	// Transform is unable to generate dense switch.
7269	return false;
7270
7271	Builder.SetInsertPoint(SI);
7272
7273	// Replace each case with its trailing zeros number.
7274	for (auto &Case : SI->cases()) {
7275	auto *OrigValue = Case.getCaseValue();
7276	Case.setValue(ConstantInt::get(Ty: OrigValue->getIntegerType(),
7277	V: OrigValue->getValue().countr_zero()));
7278	}
7279
7280	// Replace condition with its trailing zeros number.
7281	auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7282	ID: Intrinsic::cttz, Types: {CondTy}, Args: {Condition, ConstantInt::getTrue(Context)});
7283
7284	SI->setCondition(ConditionTrailingZeros);
7285
7286	return true;
7287	}
7288
7289	/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7290	/// the same destination.
7291	static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder,
7292	DomTreeUpdater *DTU) {
7293	auto *Cmp = dyn_cast<CmpIntrinsic>(Val: SI->getCondition());
7294	if (!Cmp \|\| !Cmp->hasOneUse())
7295	return false;
7296
7297	SmallVector<uint32_t, `4`> Weights;
7298	bool HasWeights = extractBranchWeights(ProfileData: getBranchWeightMDNode(I: *SI), Weights);
7299	if (!HasWeights)
7300	Weights.resize(N: `4`); // Avoid checking HasWeights everywhere.
7301
7302	// Normalize to [us]cmp == Res ? Succ : OtherSucc.
7303	int64_t Res;
7304	BasicBlock Succ, OtherSucc;
7305	uint32_t SuccWeight = `0`, OtherSuccWeight = `0`;
7306	BasicBlock Unreachable = nullptr*;
7307
7308	if (SI->getNumCases() == `2`) {
7309	// Find which of 1, 0 or -1 is missing (handled by default dest).
7310	SmallSet<int64_t, `3`> Missing;
7311	Missing.insert(V: `1`);
7312	Missing.insert(V: `0`);
7313	Missing.insert(V: -`1`);
7314
7315	Succ = SI->getDefaultDest();
7316	SuccWeight = Weights [`0`];
7317	OtherSucc = nullptr;
7318	for (auto &Case : SI->cases()) {
7319	std::optional<int64_t> Val =
7320	Case.getCaseValue()->getValue().trySExtValue();
7321	if (!Val)
7322	return false;
7323	if (!Missing.erase(V: *Val))
7324	return false;
7325	if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7326	return false;
7327	OtherSucc = Case.getCaseSuccessor();
7328	OtherSuccWeight += Weights [Case.getSuccessorIndex()];
7329	}
7330
7331	assert(Missing.size() == `1` && "Should have one case left");
7332	Res = *Missing.begin();
7333	} else if (SI->getNumCases() == `3` && SI->defaultDestUnreachable()) {
7334	// Normalize so that Succ is taken once and OtherSucc twice.
7335	Unreachable = SI->getDefaultDest();
7336	Succ = OtherSucc = nullptr;
7337	for (auto &Case : SI->cases()) {
7338	BasicBlock *NewSucc = Case.getCaseSuccessor();
7339	uint32_t Weight = Weights [Case.getSuccessorIndex()];
7340	if (!OtherSucc \|\| OtherSucc == NewSucc) {
7341	OtherSucc = NewSucc;
7342	OtherSuccWeight += Weight;
7343	} else if (!Succ) {
7344	Succ = NewSucc;
7345	SuccWeight = Weight;
7346	} else if (Succ == NewSucc) {
7347	std::swap(a&: Succ, b&: OtherSucc);
7348	std::swap(a&: SuccWeight, b&: OtherSuccWeight);
7349	} else
7350	return false;
7351	}
7352	for (auto &Case : SI->cases()) {
7353	std::optional<int64_t> Val =
7354	Case.getCaseValue()->getValue().trySExtValue();
7355	if (!Val \|\| (Val != `1` && Val != `0` && Val != -`1`))
7356	return false;
7357	if (Case.getCaseSuccessor() == Succ) {
7358	Res = *Val;
7359	break;
7360	}
7361	}
7362	} else {
7363	return false;
7364	}
7365
7366	// Determine predicate for the missing case.
7367	ICmpInst::Predicate Pred;
7368	switch (Res) {
7369	case `1`:
7370	Pred = ICmpInst::ICMP_UGT;
7371	break;
7372	case `0`:
7373	Pred = ICmpInst::ICMP_EQ;
7374	break;
7375	case -`1`:
7376	Pred = ICmpInst::ICMP_ULT;
7377	break;
7378	}
7379	if (Cmp->isSigned())
7380	Pred = ICmpInst::getSignedPredicate(Pred);
7381
7382	MDNode NewWeights = nullptr*;
7383	if (HasWeights)
7384	NewWeights = MDBuilder (SI->getContext())
7385	.createBranchWeights(TrueWeight: SuccWeight, FalseWeight: OtherSuccWeight);
7386
7387	BasicBlock *BB = SI->getParent();
7388	Builder.SetInsertPoint(SI->getIterator());
7389	Value *ICmp = Builder.CreateICmp(P: Pred, LHS: Cmp->getLHS(), RHS: Cmp->getRHS());
7390	Builder.CreateCondBr(Cond: ICmp, True: Succ, False: OtherSucc, BranchWeights: NewWeights,
7391	Unpredictable: SI->getMetadata(KindID: LLVMContext::MD_unpredictable));
7392	OtherSucc->removePredecessor(Pred: BB);
7393	if (Unreachable)
7394	Unreachable->removePredecessor(Pred: BB);
7395	SI->eraseFromParent();
7396	Cmp->eraseFromParent();
7397	if (DTU && Unreachable)
7398	DTU->applyUpdates(Updates: {{DominatorTree::Delete, BB, Unreachable}});
7399	return true;
7400	}
7401
7402	/// Checking whether two cases of SI are equal depends on the contents of the
7403	/// BasicBlock and the incoming values of their successor PHINodes.
7404	/// PHINode::getIncomingValueForBlock is O(\|Preds\|), so we'd like to avoid
7405	/// calling this function on each BasicBlock every time isEqual is called,
7406	/// especially since the same BasicBlock may be passed as an argument multiple
7407	/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7408	/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7409	/// of the incoming values.
7410	struct SwitchSuccWrapper {
7411	BasicBlock *Dest;
7412	DenseMap<PHINode , SmallDenseMap<BasicBlock , Value , `8`>> PhiPredIVs;
7413	};
7414
7415	namespace llvm {
7416	template <> struct DenseMapInfo<const SwitchSuccWrapper *> {
7417	static const SwitchSuccWrapper *getEmptyKey() {
7418	return static_cast<SwitchSuccWrapper *>(
7419	DenseMapInfo<void *>::getEmptyKey());
7420	}
7421	static const SwitchSuccWrapper *getTombstoneKey() {
7422	return static_cast<SwitchSuccWrapper *>(
7423	DenseMapInfo<void *>::getTombstoneKey());
7424	}
7425	static unsigned getHashValue(const SwitchSuccWrapper *SSW) {
7426	BasicBlock *Succ = SSW->Dest;
7427	BranchInst *BI = cast<BranchInst>(Val: Succ->getTerminator());
7428	assert(BI->isUnconditional() &&
7429	"Only supporting unconditional branches for now");
7430	assert(BI->getNumSuccessors() == `1` &&
7431	"Expected unconditional branches to have one successor");
7432	assert(Succ->size() == `1` && "Expected just a single branch in the BB");
7433
7434	// Since we assume the BB is just a single BranchInst with a single
7435	// successor, we hash as the BB and the incoming Values of its successor
7436	// PHIs. Initially, we tried to just use the successor BB as the hash, but
7437	// including the incoming PHI values leads to better performance.
7438	// We also tried to build a map from BB -> Succs.IncomingValues ahead of
7439	// time and passing it in SwitchSuccWrapper, but this slowed down the
7440	// average compile time without having any impact on the worst case compile
7441	// time.
7442	BasicBlock *BB = BI->getSuccessor(i: `0`);
7443	SmallVector<Value *> PhiValsForBB;
7444	for (PHINode &Phi : BB->phis())
7445	PhiValsForBB.emplace_back(Args&: (*SSW->PhiPredIVs)[&Phi][BB]);
7446
7447	return hash_combine(args: BB, args: hash_combine_range(R&: PhiValsForBB));
7448	}
7449	static bool isEqual(const SwitchSuccWrapper *LHS,
7450	const SwitchSuccWrapper *RHS) {
7451	auto EKey = DenseMapInfo<SwitchSuccWrapper *>::getEmptyKey();
7452	auto TKey = DenseMapInfo<SwitchSuccWrapper *>::getTombstoneKey();
7453	if (LHS == EKey \|\| RHS == EKey \|\| LHS == TKey \|\| RHS == TKey)
7454	return LHS == RHS;
7455
7456	BasicBlock *A = LHS->Dest;
7457	BasicBlock *B = RHS->Dest;
7458
7459	// FIXME: we checked that the size of A and B are both 1 in
7460	// simplifyDuplicateSwitchArms to make the Case list smaller to
7461	// improve performance. If we decide to support BasicBlocks with more
7462	// than just a single instruction, we need to check that A.size() ==
7463	// B.size() here, and we need to check more than just the BranchInsts
7464	// for equality.
7465
7466	BranchInst *ABI = cast<BranchInst>(Val: A->getTerminator());
7467	BranchInst *BBI = cast<BranchInst>(Val: B->getTerminator());
7468	assert(ABI->isUnconditional() && BBI->isUnconditional() &&
7469	"Only supporting unconditional branches for now");
7470	if (ABI->getSuccessor(i: `0`) != BBI->getSuccessor(i: `0`))
7471	return false;
7472
7473	// Need to check that PHIs in successor have matching values
7474	BasicBlock *Succ = ABI->getSuccessor(i: `0`);
7475	for (PHINode &Phi : Succ->phis()) {
7476	auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
7477	if (PredIVs [A] != PredIVs [B])
7478	return false;
7479	}
7480
7481	return true;
7482	}
7483	};
7484	} // namespace llvm
7485
7486	bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
7487	DomTreeUpdater *DTU) {
7488	// Build Cases. Skip BBs that are not candidates for simplification. Mark
7489	// PHINodes which need to be processed into PhiPredIVs. We decide to process
7490	// an entire PHI at once after the loop, opposed to calling
7491	// getIncomingValueForBlock inside this loop, since each call to
7492	// getIncomingValueForBlock is O(\|Preds\|).
7493	SmallPtrSet<PHINode *, `8`> Phis;
7494	SmallPtrSet<BasicBlock *, `8`> Seen;
7495	DenseMap<PHINode , SmallDenseMap<BasicBlock , Value *, `8`>> PhiPredIVs;
7496	DenseMap<BasicBlock , SmallVector<unsigned*, `4`>> BBToSuccessorIndexes;
7497	SmallVector<SwitchSuccWrapper> Cases;
7498	Cases.reserve(N: SI->getNumSuccessors());
7499
7500	for (unsigned I = `0`; I < SI->getNumSuccessors(); ++I) {
7501	BasicBlock *BB = SI->getSuccessor(idx: I);
7502
7503	// FIXME: Support more than just a single BranchInst. One way we could do
7504	// this is by taking a hashing approach of all insts in BB.
7505	if (BB->size() != `1`)
7506	continue;
7507
7508	// FIXME: This case needs some extra care because the terminators other than
7509	// SI need to be updated. For now, consider only backedges to the SI.
7510	if (BB->hasNPredecessorsOrMore(N: `4`) \|\|
7511	BB->getUniquePredecessor() != SI->getParent())
7512	continue;
7513
7514	// FIXME: Relax that the terminator is a BranchInst by checking for equality
7515	// on other kinds of terminators. We decide to only support unconditional
7516	// branches for now for compile time reasons.
7517	auto *BI = dyn_cast<BranchInst>(Val: BB->getTerminator());
7518	if (!BI \|\| BI->isConditional())
7519	continue;
7520
7521	if (Seen.insert(Ptr: BB).second) {
7522	// Keep track of which PHIs we need as keys in PhiPredIVs below.
7523	for (BasicBlock *Succ : BI->successors())
7524	Phis.insert_range(R: llvm::make_pointer_range(Range: Succ->phis()));
7525	// Add the successor only if not previously visited.
7526	Cases.emplace_back(Args: SwitchSuccWrapper{.Dest: BB, .PhiPredIVs: &PhiPredIVs});
7527	}
7528
7529	BBToSuccessorIndexes [BB].emplace_back(Args&: I);
7530	}
7531
7532	// Precompute a data structure to improve performance of isEqual for
7533	// SwitchSuccWrapper.
7534	PhiPredIVs.reserve(NumEntries: Phis.size());
7535	for (PHINode *Phi : Phis) {
7536	auto &IVs =
7537	PhiPredIVs.try_emplace(Key: Phi, Args: Phi->getNumIncomingValues()).first ->second;
7538	for (auto &IV : Phi->incoming_values())
7539	IVs.insert(KV: {Phi->getIncomingBlock(U: IV), IV.get()});
7540	}
7541
7542	// Build a set such that if the SwitchSuccWrapper exists in the set and
7543	// another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper
7544	// which is not in the set should be replaced with the one in the set. If the
7545	// SwitchSuccWrapper is not in the set, then it should be added to the set so
7546	// other SwitchSuccWrappers can check against it in the same manner. We use
7547	// SwitchSuccWrapper instead of just BasicBlock because we'd like to pass
7548	// around information to isEquality, getHashValue, and when doing the
7549	// replacement with better performance.
7550	DenseSet<const SwitchSuccWrapper *> ReplaceWith;
7551	ReplaceWith.reserve(Size: Cases.size());
7552
7553	SmallVector<DominatorTree::UpdateType> Updates;
7554	Updates.reserve(N: ReplaceWith.size());
7555	bool MadeChange = false;
7556	for (auto &SSW : Cases) {
7557	// SSW is a candidate for simplification. If we find a duplicate BB,
7558	// replace it.
7559	const auto [It, Inserted] = ReplaceWith.insert(V: &SSW);
7560	if (!Inserted) {
7561	// We know that SI's parent BB no longer dominates the old case successor
7562	// since we are making it dead.
7563	Updates.push_back(Elt: {DominatorTree::Delete, SI->getParent(), SSW.Dest});
7564	const auto &Successors = BBToSuccessorIndexes.at(Val: SSW.Dest);
7565	for (unsigned Idx : Successors)
7566	SI->setSuccessor(idx: Idx, NewSucc: (*It)->Dest);
7567	MadeChange = true;
7568	}
7569	}
7570
7571	if (DTU)
7572	DTU->applyUpdates(Updates);
7573
7574	return MadeChange;
7575	}
7576
7577	bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7578	BasicBlock *BB = SI->getParent();
7579
7580	if (isValueEqualityComparison(TI: SI)) {
7581	// If we only have one predecessor, and if it is a branch on this value,
7582	// see if that predecessor totally determines the outcome of this switch.
7583	if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7584	if (simplifyEqualityComparisonWithOnlyPredecessor(TI: SI, Pred: OnlyPred, Builder))
7585	return requestResimplify();
7586
7587	Value *Cond = SI->getCondition();
7588	if (SelectInst *Select = dyn_cast<SelectInst>(Val: Cond))
7589	if (simplifySwitchOnSelect(SI, Select))
7590	return requestResimplify();
7591
7592	// If the block only contains the switch, see if we can fold the block
7593	// away into any preds.
7594	if (SI == &BB->instructionsWithoutDebug(SkipPseudoOp: false*).begin())
7595	if (foldValueComparisonIntoPredecessors(TI: SI, Builder))
7596	return requestResimplify();
7597	}
7598
7599	// Try to transform the switch into an icmp and a branch.
7600	// The conversion from switch to comparison may lose information on
7601	// impossible switch values, so disable it early in the pipeline.
7602	if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
7603	return requestResimplify();
7604
7605	// Remove unreachable cases.
7606	if (eliminateDeadSwitchCases(SI, DTU, AC: Options.AC, DL))
7607	return requestResimplify();
7608
7609	if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
7610	return requestResimplify();
7611
7612	if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
7613	return requestResimplify();
7614
7615	if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
7616	return requestResimplify();
7617
7618	// The conversion from switch to lookup tables results in difficult-to-analyze
7619	// code and makes pruning branches much harder. This is a problem if the
7620	// switch expression itself can still be restricted as a result of inlining or
7621	// CVP. Therefore, only apply this transformation during late stages of the
7622	// optimisation pipeline.
7623	if (Options.ConvertSwitchToLookupTable &&
7624	switchToLookupTable(SI, Builder, DTU, DL, TTI))
7625	return requestResimplify();
7626
7627	if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
7628	return requestResimplify();
7629
7630	if (reduceSwitchRange(SI, Builder, DL, TTI))
7631	return requestResimplify();
7632
7633	if (HoistCommon &&
7634	hoistCommonCodeFromSuccessors(TI: SI, AllInstsEqOnly: !Options.HoistCommonInsts))
7635	return requestResimplify();
7636
7637	if (simplifyDuplicateSwitchArms(SI, DTU))
7638	return requestResimplify();
7639
7640	return false;
7641	}
7642
7643	bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
7644	BasicBlock *BB = IBI->getParent();
7645	bool Changed = false;
7646
7647	// Eliminate redundant destinations.
7648	SmallPtrSet<Value *, `8`> Succs;
7649	SmallSetVector<BasicBlock *, `8`> RemovedSuccs;
7650	for (unsigned i = `0`, e = IBI->getNumDestinations(); i != e; ++i) {
7651	BasicBlock *Dest = IBI->getDestination(i);
7652	if (!Dest->hasAddressTaken() \|\| !Succs.insert(Ptr: Dest).second) {
7653	if (!Dest->hasAddressTaken())
7654	RemovedSuccs.insert(X: Dest);
7655	Dest->removePredecessor(Pred: BB);
7656	IBI->removeDestination(i);
7657	--i;
7658	--e;
7659	Changed = true;
7660	}
7661	}
7662
7663	if (DTU) {
7664	std::vector<DominatorTree::UpdateType> Updates;
7665	Updates.reserve(n: RemovedSuccs.size());
7666	for (auto *RemovedSucc : RemovedSuccs)
7667	Updates.push_back(x: {DominatorTree::Delete, BB, RemovedSucc});
7668	DTU->applyUpdates(Updates);
7669	}
7670
7671	if (IBI->getNumDestinations() == `0`) {
7672	// If the indirectbr has no successors, change it to unreachable.
7673	new UnreachableInst (IBI->getContext(), IBI->getIterator());
7674	eraseTerminatorAndDCECond(TI: IBI);
7675	return true;
7676	}
7677
7678	if (IBI->getNumDestinations() == `1`) {
7679	// If the indirectbr has one successor, change it to a direct branch.
7680	BranchInst::Create(IfTrue: IBI->getDestination(i: `0`), InsertBefore: IBI->getIterator());
7681	eraseTerminatorAndDCECond(TI: IBI);
7682	return true;
7683	}
7684
7685	if (SelectInst *SI = dyn_cast<SelectInst>(Val: IBI->getAddress())) {
7686	if (simplifyIndirectBrOnSelect(IBI, SI))
7687	return requestResimplify();
7688	}
7689	return Changed;
7690	}
7691
7692	/// Given an block with only a single landing pad and a unconditional branch
7693	/// try to find another basic block which this one can be merged with. This
7694	/// handles cases where we have multiple invokes with unique landing pads, but
7695	/// a shared handler.
7696	///
7697	/// We specifically choose to not worry about merging non-empty blocks
7698	/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
7699	/// practice, the optimizer produces empty landing pad blocks quite frequently
7700	/// when dealing with exception dense code. (see: instcombine, gvn, if-else
7701	/// sinking in this file)
7702	///
7703	/// This is primarily a code size optimization. We need to avoid performing
7704	/// any transform which might inhibit optimization (such as our ability to
7705	/// specialize a particular handler via tail commoning). We do this by not
7706	/// merging any blocks which require us to introduce a phi. Since the same
7707	/// values are flowing through both blocks, we don't lose any ability to
7708	/// specialize. If anything, we make such specialization more likely.
7709	///
7710	/// TODO - This transformation could remove entries from a phi in the target
7711	/// block when the inputs in the phi are the same for the two blocks being
7712	/// merged. In some cases, this could result in removal of the PHI entirely.
7713	static bool tryToMergeLandingPad(LandingPadInst LPad, BranchInst BI,
7714	BasicBlock BB, DomTreeUpdater DTU) {
7715	auto Succ = BB->getUniqueSuccessor();
7716	assert(Succ);
7717	// If there's a phi in the successor block, we'd likely have to introduce
7718	// a phi into the merged landing pad block.
7719	if (isa<PHINode>(Val: *Succ->begin()))
7720	return false;
7721
7722	for (BasicBlock *OtherPred : predecessors(BB: Succ)) {
7723	if (BB == OtherPred)
7724	continue;
7725	BasicBlock::iterator I = OtherPred->begin();
7726	LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(Val&: I);
7727	if (!LPad2 \|\| !LPad2->isIdenticalTo(I: LPad))
7728	continue;
7729	++I;
7730	BranchInst *BI2 = dyn_cast<BranchInst>(Val&: I);
7731	if (!BI2 \|\| !BI2->isIdenticalTo(I: BI))
7732	continue;
7733
7734	std::vector<DominatorTree::UpdateType> Updates;
7735
7736	// We've found an identical block. Update our predecessors to take that
7737	// path instead and make ourselves dead.
7738	SmallSetVector<BasicBlock *, `16`> UniquePreds(pred_begin(BB), pred_end(BB));
7739	for (BasicBlock *Pred : UniquePreds) {
7740	InvokeInst *II = cast<InvokeInst>(Val: Pred->getTerminator());
7741	assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
7742	"unexpected successor");
7743	II->setUnwindDest(OtherPred);
7744	if (DTU) {
7745	Updates.push_back(x: {DominatorTree::Insert, Pred, OtherPred});
7746	Updates.push_back(x: {DominatorTree::Delete, Pred, BB});
7747	}
7748	}
7749
7750	SmallSetVector<BasicBlock *, `16`> UniqueSuccs(succ_begin(BB), succ_end(BB));
7751	for (BasicBlock *Succ : UniqueSuccs) {
7752	Succ->removePredecessor(Pred: BB);
7753	if (DTU)
7754	Updates.push_back(x: {DominatorTree::Delete, BB, Succ});
7755	}
7756
7757	IRBuilder<> Builder(BI);
7758	Builder.CreateUnreachable();
7759	BI->eraseFromParent();
7760	if (DTU)
7761	DTU->applyUpdates(Updates);
7762	return true;
7763	}
7764	return false;
7765	}
7766
7767	bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
7768	return Branch->isUnconditional() ? simplifyUncondBranch(BI: Branch, Builder)
7769	: simplifyCondBranch(BI: Branch, Builder);
7770	}
7771
7772	bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
7773	IRBuilder<> &Builder) {
7774	BasicBlock *BB = BI->getParent();
7775	BasicBlock *Succ = BI->getSuccessor(i: `0`);
7776
7777	// If the Terminator is the only non-phi instruction, simplify the block.
7778	// If LoopHeader is provided, check if the block or its successor is a loop
7779	// header. (This is for early invocations before loop simplify and
7780	// vectorization to keep canonical loop forms for nested loops. These blocks
7781	// can be eliminated when the pass is invoked later in the back-end.)
7782	// Note that if BB has only one predecessor then we do not introduce new
7783	// backedge, so we can eliminate BB.
7784	bool NeedCanonicalLoop =
7785	Options.NeedCanonicalLoop &&
7786	(!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(N: `2`) &&
7787	(is_contained(Range&: LoopHeaders, Element: BB) \|\| is_contained(Range&: LoopHeaders, Element: Succ)));
7788	BasicBlock::iterator I = BB->getFirstNonPHIOrDbg();
7789	if (I ->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
7790	!NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
7791	return true;
7792
7793	// If the only instruction in the block is a seteq/setne comparison against a
7794	// constant, try to simplify the block.
7795	if (ICmpInst *ICI = dyn_cast<ICmpInst>(Val&: I))
7796	if (ICI->isEquality() && isa<ConstantInt>(Val: ICI->getOperand(i_nocapture: `1`))) {
7797	++I;
7798	if (I ->isTerminator() &&
7799	tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
7800	return true;
7801	}
7802
7803	// See if we can merge an empty landing pad block with another which is
7804	// equivalent.
7805	if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(Val&: I)) {
7806	++I;
7807	if (I ->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
7808	return true;
7809	}
7810
7811	// If this basic block is ONLY a compare and a branch, and if a predecessor
7812	// branches to us and our successor, fold the comparison into the
7813	// predecessor and use logical operations to update the incoming value
7814	// for PHI nodes in common successor.
7815	if (Options.SpeculateBlocks &&
7816	foldBranchToCommonDest(BI, DTU, /MSSAU=/nullptr, TTI: &TTI,
7817	BonusInstThreshold: Options.BonusInstThreshold))
7818	return requestResimplify();
7819	return false;
7820	}
7821
7822	static BasicBlock allPredecessorsComeFromSameSource(BasicBlock BB) {
7823	BasicBlock PredPred = nullptr*;
7824	for (auto *P : predecessors(BB)) {
7825	BasicBlock *PPred = P->getSinglePredecessor();
7826	if (!PPred \|\| (PredPred && PredPred != PPred))
7827	return nullptr;
7828	PredPred = PPred;
7829	}
7830	return PredPred;
7831	}
7832
7833	/// Fold the following pattern:
7834	/// bb0:
7835	/// br i1 %cond1, label %bb1, label %bb2
7836	/// bb1:
7837	/// br i1 %cond2, label %bb3, label %bb4
7838	/// bb2:
7839	/// br i1 %cond2, label %bb4, label %bb3
7840	/// bb3:
7841	/// ...
7842	/// bb4:
7843	/// ...
7844	/// into
7845	/// bb0:
7846	/// %cond = xor i1 %cond1, %cond2
7847	/// br i1 %cond, label %bb4, label %bb3
7848	/// bb3:
7849	/// ...
7850	/// bb4:
7851	/// ...
7852	/// NOTE: %cond2 always dominates the terminator of bb0.
7853	static bool mergeNestedCondBranch(BranchInst BI, DomTreeUpdater DTU) {
7854	BasicBlock *BB = BI->getParent();
7855	BasicBlock *BB1 = BI->getSuccessor(i: `0`);
7856	BasicBlock *BB2 = BI->getSuccessor(i: `1`);
7857	auto IsSimpleSuccessor = [BB](BasicBlock Succ, BranchInst &SuccBI) {
7858	if (Succ == BB)
7859	return false;
7860	if (&Succ->front() != Succ->getTerminator())
7861	return false;
7862	SuccBI = dyn_cast<BranchInst>(Val: Succ->getTerminator());
7863	if (!SuccBI \|\| !SuccBI->isConditional())
7864	return false;
7865	BasicBlock *Succ1 = SuccBI->getSuccessor(i: `0`);
7866	BasicBlock *Succ2 = SuccBI->getSuccessor(i: `1`);
7867	return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
7868	!isa<PHINode>(Val: Succ1->front()) && !isa<PHINode>(Val: Succ2->front());
7869	};
7870	BranchInst BB1BI, BB2BI;
7871	if (!IsSimpleSuccessor (BB1, BB1BI) \|\| !IsSimpleSuccessor (BB2, BB2BI))
7872	return false;
7873
7874	if (BB1BI->getCondition() != BB2BI->getCondition() \|\|
7875	BB1BI->getSuccessor(i: `0`) != BB2BI->getSuccessor(i: `1`) \|\|
7876	BB1BI->getSuccessor(i: `1`) != BB2BI->getSuccessor(i: `0`))
7877	return false;
7878
7879	BasicBlock *BB3 = BB1BI->getSuccessor(i: `0`);
7880	BasicBlock *BB4 = BB1BI->getSuccessor(i: `1`);
7881	IRBuilder<> Builder(BI);
7882	BI->setCondition(
7883	Builder.CreateXor(LHS: BI->getCondition(), RHS: BB1BI->getCondition()));
7884	BB1->removePredecessor(Pred: BB);
7885	BI->setSuccessor(idx: `0`, NewSucc: BB4);
7886	BB2->removePredecessor(Pred: BB);
7887	BI->setSuccessor(idx: `1`, NewSucc: BB3);
7888	if (DTU) {
7889	SmallVector<DominatorTree::UpdateType, `4`> Updates;
7890	Updates.push_back(Elt: {DominatorTree::Delete, BB, BB1});
7891	Updates.push_back(Elt: {DominatorTree::Insert, BB, BB4});
7892	Updates.push_back(Elt: {DominatorTree::Delete, BB, BB2});
7893	Updates.push_back(Elt: {DominatorTree::Insert, BB, BB3});
7894
7895	DTU->applyUpdates(Updates);
7896	}
7897	bool HasWeight = false;
7898	uint64_t BBTWeight, BBFWeight;
7899	if (extractBranchWeights(I: *BI, TrueVal&: BBTWeight, FalseVal&: BBFWeight))
7900	HasWeight = true;
7901	else
7902	BBTWeight = BBFWeight = `1`;
7903	uint64_t BB1TWeight, BB1FWeight;
7904	if (extractBranchWeights(I: *BB1BI, TrueVal&: BB1TWeight, FalseVal&: BB1FWeight))
7905	HasWeight = true;
7906	else
7907	BB1TWeight = BB1FWeight = `1`;
7908	uint64_t BB2TWeight, BB2FWeight;
7909	if (extractBranchWeights(I: *BB2BI, TrueVal&: BB2TWeight, FalseVal&: BB2FWeight))
7910	HasWeight = true;
7911	else
7912	BB2TWeight = BB2FWeight = `1`;
7913	if (HasWeight) {
7914	uint64_t Weights[`2`] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
7915	BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
7916	fitWeights(Weights);
7917	setBranchWeights(I: BI, TrueWeight: Weights[`0`], FalseWeight: Weights[`1`], /IsExpected=/false);
7918	}
7919	return true;
7920	}
7921
7922	bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
7923	assert(
7924	!isa<ConstantInt>(BI->getCondition()) &&
7925	BI->getSuccessor(`0`) != BI->getSuccessor(`1`) &&
7926	"Tautological conditional branch should have been eliminated already.");
7927
7928	BasicBlock *BB = BI->getParent();
7929	if (!Options.SimplifyCondBranch \|\|
7930	BI->getFunction()->hasFnAttribute(Kind: Attribute::OptForFuzzing))
7931	return false;
7932
7933	// Conditional branch
7934	if (isValueEqualityComparison(TI: BI)) {
7935	// If we only have one predecessor, and if it is a branch on this value,
7936	// see if that predecessor totally determines the outcome of this
7937	// switch.
7938	if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7939	if (simplifyEqualityComparisonWithOnlyPredecessor(TI: BI, Pred: OnlyPred, Builder))
7940	return requestResimplify();
7941
7942	// This block must be empty, except for the setcond inst, if it exists.
7943	// Ignore dbg and pseudo intrinsics.
7944	auto I = BB->instructionsWithoutDebug(SkipPseudoOp: true).begin();
7945	if (&*I == BI) {
7946	if (foldValueComparisonIntoPredecessors(TI: BI, Builder))
7947	return requestResimplify();
7948	} else if (&*I == cast<Instruction>(Val: BI->getCondition())) {
7949	++I;
7950	if (&*I == BI && foldValueComparisonIntoPredecessors(TI: BI, Builder))
7951	return requestResimplify();
7952	}
7953	}
7954
7955	// Try to turn "br (X == 0 \| X == 1), T, F" into a switch instruction.
7956	if (simplifyBranchOnICmpChain(BI, Builder, DL))
7957	return true;
7958
7959	// If this basic block has dominating predecessor blocks and the dominating
7960	// blocks' conditions imply BI's condition, we know the direction of BI.
7961	std::optional<bool> Imp = isImpliedByDomCondition(Cond: BI->getCondition(), ContextI: BI, DL);
7962	if (Imp) {
7963	// Turn this into a branch on constant.
7964	auto *OldCond = BI->getCondition();
7965	ConstantInt TorF = Imp ? ConstantInt::getTrue(Context&: BB->getContext())
7966	: ConstantInt::getFalse(Context&: BB->getContext());
7967	BI->setCondition(TorF);
7968	RecursivelyDeleteTriviallyDeadInstructions(V: OldCond);
7969	return requestResimplify();
7970	}
7971
7972	// If this basic block is ONLY a compare and a branch, and if a predecessor
7973	// branches to us and one of our successors, fold the comparison into the
7974	// predecessor and use logical operations to pick the right destination.
7975	if (Options.SpeculateBlocks &&
7976	foldBranchToCommonDest(BI, DTU, /MSSAU=/nullptr, TTI: &TTI,
7977	BonusInstThreshold: Options.BonusInstThreshold))
7978	return requestResimplify();
7979
7980	// We have a conditional branch to two blocks that are only reachable
7981	// from BI. We know that the condbr dominates the two blocks, so see if
7982	// there is any identical code in the "then" and "else" blocks. If so, we
7983	// can hoist it up to the branching block.
7984	if (BI->getSuccessor(i: `0`)->getSinglePredecessor()) {
7985	if (BI->getSuccessor(i: `1`)->getSinglePredecessor()) {
7986	if (HoistCommon &&
7987	hoistCommonCodeFromSuccessors(TI: BI, AllInstsEqOnly: !Options.HoistCommonInsts))
7988	return requestResimplify();
7989
7990	if (BI && Options.HoistLoadsStoresWithCondFaulting &&
7991	isProfitableToSpeculate(BI, Invert: std::nullopt, TTI)) {
7992	SmallVector<Instruction *, `2`> SpeculatedConditionalLoadsStores;
7993	auto CanSpeculateConditionalLoadsStores = [&]() {
7994	for (auto *Succ : successors(BB)) {
7995	for (Instruction &I : *Succ) {
7996	if (I.isTerminator()) {
7997	if (I.getNumSuccessors() > `1`)
7998	return false;
7999	continue;
8000	} else if (!isSafeCheapLoadStore(I: &I, TTI) \|\|
8001	SpeculatedConditionalLoadsStores.size() ==
8002	HoistLoadsStoresWithCondFaultingThreshold) {
8003	return false;
8004	}
8005	SpeculatedConditionalLoadsStores.push_back(Elt: &I);
8006	}
8007	}
8008	return !SpeculatedConditionalLoadsStores.empty();
8009	};
8010
8011	if (CanSpeculateConditionalLoadsStores ()) {
8012	hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8013	Invert: std::nullopt, Sel: nullptr);
8014	return requestResimplify();
8015	}
8016	}
8017	} else {
8018	// If Successor #1 has multiple preds, we may be able to conditionally
8019	// execute Successor #0 if it branches to Successor #1.
8020	Instruction *Succ0TI = BI->getSuccessor(i: `0`)->getTerminator();
8021	if (Succ0TI->getNumSuccessors() == `1` &&
8022	Succ0TI->getSuccessor(Idx: `0`) == BI->getSuccessor(i: `1`))
8023	if (speculativelyExecuteBB(BI, ThenBB: BI->getSuccessor(i: `0`)))
8024	return requestResimplify();
8025	}
8026	} else if (BI->getSuccessor(i: `1`)->getSinglePredecessor()) {
8027	// If Successor #0 has multiple preds, we may be able to conditionally
8028	// execute Successor #1 if it branches to Successor #0.
8029	Instruction *Succ1TI = BI->getSuccessor(i: `1`)->getTerminator();
8030	if (Succ1TI->getNumSuccessors() == `1` &&
8031	Succ1TI->getSuccessor(Idx: `0`) == BI->getSuccessor(i: `0`))
8032	if (speculativelyExecuteBB(BI, ThenBB: BI->getSuccessor(i: `1`)))
8033	return requestResimplify();
8034	}
8035
8036	// If this is a branch on something for which we know the constant value in
8037	// predecessors (e.g. a phi node in the current block), thread control
8038	// through this block.
8039	if (foldCondBranchOnValueKnownInPredecessor(BI, DTU, DL, AC: Options.AC))
8040	return requestResimplify();
8041
8042	// Scan predecessor blocks for conditional branches.
8043	for (BasicBlock *Pred : predecessors(BB))
8044	if (BranchInst *PBI = dyn_cast<BranchInst>(Val: Pred->getTerminator()))
8045	if (PBI != BI && PBI->isConditional())
8046	if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8047	return requestResimplify();
8048
8049	// Look for diamond patterns.
8050	if (MergeCondStores)
8051	if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
8052	if (BranchInst *PBI = dyn_cast<BranchInst>(Val: PrevBB->getTerminator()))
8053	if (PBI != BI && PBI->isConditional())
8054	if (mergeConditionalStores(PBI, QBI: BI, DTU, DL, TTI))
8055	return requestResimplify();
8056
8057	// Look for nested conditional branches.
8058	if (mergeNestedCondBranch(BI, DTU))
8059	return requestResimplify();
8060
8061	return false;
8062	}
8063
8064	/// Check if passing a value to an instruction will cause undefined behavior.
8065	static bool passingValueIsAlwaysUndefined(Value V, Instruction I, bool PtrValueMayBeModified) {
8066	assert(V->getType() == I->getType() && "Mismatched types");
8067	Constant *C = dyn_cast<Constant>(Val: V);
8068	if (!C)
8069	return false;
8070
8071	if (I->use_empty())
8072	return false;
8073
8074	if (C->isNullValue() \|\| isa<UndefValue>(Val: C)) {
8075	// Only look at the first use we can handle, avoid hurting compile time with
8076	// long uselists
8077	auto FindUse = llvm::find_if(Range: I->uses(), P: [](auto &U) {
8078	auto *Use = cast<Instruction>(U.getUser());
8079	// Change this list when we want to add new instructions.
8080	switch (Use->getOpcode()) {
8081	default:
8082	return false;
8083	case Instruction::GetElementPtr:
8084	case Instruction::Ret:
8085	case Instruction::BitCast:
8086	case Instruction::Load:
8087	case Instruction::Store:
8088	case Instruction::Call:
8089	case Instruction::CallBr:
8090	case Instruction::Invoke:
8091	case Instruction::UDiv:
8092	case Instruction::URem:
8093	// Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8094	// implemented to avoid code complexity as it is unclear how useful such
8095	// logic is.
8096	case Instruction::SDiv:
8097	case Instruction::SRem:
8098	return true;
8099	}
8100	});
8101	if (FindUse == I->use_end())
8102	return false;
8103	auto &Use = *FindUse;
8104	auto *User = cast<Instruction>(Val: Use.getUser());
8105	// Bail out if User is not in the same BB as I or User == I or User comes
8106	// before I in the block. The latter two can be the case if User is a
8107	// PHI node.
8108	if (User->getParent() != I->getParent() \|\| User == I \|\|
8109	User->comesBefore(Other: I))
8110	return false;
8111
8112	// Now make sure that there are no instructions in between that can alter
8113	// control flow (eg. calls)
8114	auto InstrRange =
8115	make_range(x: std::next(x: I->getIterator()), y: User->getIterator());
8116	if (any_of(Range&: InstrRange, P: [](Instruction &I) {
8117	return !isGuaranteedToTransferExecutionToSuccessor(I: &I);
8118	}))
8119	return false;
8120
8121	// Look through GEPs. A load from a GEP derived from NULL is still undefined
8122	if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Val: User))
8123	if (GEP->getPointerOperand() == I) {
8124	// The type of GEP may differ from the type of base pointer.
8125	// Bail out on vector GEPs, as they are not handled by other checks.
8126	if (GEP->getType()->isVectorTy())
8127	return false;
8128	// The current base address is null, there are four cases to consider:
8129	// getelementptr (TY, null, 0) -> null
8130	// getelementptr (TY, null, not zero) -> may be modified
8131	// getelementptr inbounds (TY, null, 0) -> null
8132	// getelementptr inbounds (TY, null, not zero) -> poison iff null is
8133	// undefined?
8134	if (!GEP->hasAllZeroIndices() &&
8135	(!GEP->isInBounds() \|\|
8136	NullPointerIsDefined(F: GEP->getFunction(),
8137	AS: GEP->getPointerAddressSpace())))
8138	PtrValueMayBeModified = true;
8139	return passingValueIsAlwaysUndefined(V, I: GEP, PtrValueMayBeModified);
8140	}
8141
8142	// Look through return.
8143	if (ReturnInst *Ret = dyn_cast<ReturnInst>(Val: User)) {
8144	bool HasNoUndefAttr =
8145	Ret->getFunction()->hasRetAttribute(Kind: Attribute::NoUndef);
8146	// Return undefined to a noundef return value is undefined.
8147	if (isa<UndefValue>(Val: C) && HasNoUndefAttr)
8148	return true;
8149	// Return null to a nonnull+noundef return value is undefined.
8150	if (C->isNullValue() && HasNoUndefAttr &&
8151	Ret->getFunction()->hasRetAttribute(Kind: Attribute::NonNull)) {
8152	return !PtrValueMayBeModified;
8153	}
8154	}
8155
8156	// Load from null is undefined.
8157	if (LoadInst *LI = dyn_cast<LoadInst>(Val: User))
8158	if (!LI->isVolatile())
8159	return !NullPointerIsDefined(F: LI->getFunction(),
8160	AS: LI->getPointerAddressSpace());
8161
8162	// Store to null is undefined.
8163	if (StoreInst *SI = dyn_cast<StoreInst>(Val: User))
8164	if (!SI->isVolatile())
8165	return (!NullPointerIsDefined(F: SI->getFunction(),
8166	AS: SI->getPointerAddressSpace())) &&
8167	SI->getPointerOperand() == I;
8168
8169	// llvm.assume(false/undef) always triggers immediate UB.
8170	if (auto *Assume = dyn_cast<AssumeInst>(Val: User)) {
8171	// Ignore assume operand bundles.
8172	if (I == Assume->getArgOperand(i: `0`))
8173	return true;
8174	}
8175
8176	if (auto *CB = dyn_cast<CallBase>(Val: User)) {
8177	if (C->isNullValue() && NullPointerIsDefined(F: CB->getFunction()))
8178	return false;
8179	// A call to null is undefined.
8180	if (CB->getCalledOperand() == I)
8181	return true;
8182
8183	if (CB->isArgOperand(U: &Use)) {
8184	unsigned ArgIdx = CB->getArgOperandNo(U: &Use);
8185	// Passing null to a nonnnull+noundef argument is undefined.
8186	if (isa<ConstantPointerNull>(Val: C) &&
8187	CB->paramHasNonNullAttr(ArgNo: ArgIdx, /AllowUndefOrPoison=/false))
8188	return !PtrValueMayBeModified;
8189	// Passing undef to a noundef argument is undefined.
8190	if (isa<UndefValue>(Val: C) && CB->isPassingUndefUB(ArgNo: ArgIdx))
8191	return true;
8192	}
8193	}
8194	// Div/Rem by zero is immediate UB
8195	if (match(V: User, P: m_BinOp(L: m_Value(), R: m_Specific(V: I))) && User->isIntDivRem())
8196	return true;
8197	}
8198	return false;
8199	}
8200
8201	/// If BB has an incoming value that will always trigger undefined behavior
8202	/// (eg. null pointer dereference), remove the branch leading here.
8203	static bool removeUndefIntroducingPredecessor(BasicBlock *BB,
8204	DomTreeUpdater *DTU,
8205	AssumptionCache *AC) {
8206	for (PHINode &PHI : BB->phis())
8207	for (unsigned i = `0`, e = PHI.getNumIncomingValues(); i != e; ++i)
8208	if (passingValueIsAlwaysUndefined(V: PHI.getIncomingValue(i), I: &PHI)) {
8209	BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8210	Instruction *T = Predecessor->getTerminator();
8211	IRBuilder<> Builder(T);
8212	if (BranchInst *BI = dyn_cast<BranchInst>(Val: T)) {
8213	BB->removePredecessor(Pred: Predecessor);
8214	// Turn unconditional branches into unreachables and remove the dead
8215	// destination from conditional branches.
8216	if (BI->isUnconditional())
8217	Builder.CreateUnreachable();
8218	else {
8219	// Preserve guarding condition in assume, because it might not be
8220	// inferrable from any dominating condition.
8221	Value *Cond = BI->getCondition();
8222	CallInst *Assumption;
8223	if (BI->getSuccessor(i: `0`) == BB)
8224	Assumption = Builder.CreateAssumption(Cond: Builder.CreateNot(V: Cond));
8225	else
8226	Assumption = Builder.CreateAssumption(Cond);
8227	if (AC)
8228	AC->registerAssumption(CI: cast<AssumeInst>(Val: Assumption));
8229	Builder.CreateBr(Dest: BI->getSuccessor(i: `0`) == BB ? BI->getSuccessor(i: `1`)
8230	: BI->getSuccessor(i: `0`));
8231	}
8232	BI->eraseFromParent();
8233	if (DTU)
8234	DTU->applyUpdates(Updates: {{DominatorTree::Delete, Predecessor, BB}});
8235	return true;
8236	} else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: T)) {
8237	// Redirect all branches leading to UB into
8238	// a newly created unreachable block.
8239	BasicBlock *Unreachable = BasicBlock::Create(
8240	Context&: Predecessor->getContext(), Name: "unreachable", Parent: BB->getParent(), InsertBefore: BB);
8241	Builder.SetInsertPoint(Unreachable);
8242	// The new block contains only one instruction: Unreachable
8243	Builder.CreateUnreachable();
8244	for (const auto &Case : SI->cases())
8245	if (Case.getCaseSuccessor() == BB) {
8246	BB->removePredecessor(Pred: Predecessor);
8247	Case.setSuccessor(Unreachable);
8248	}
8249	if (SI->getDefaultDest() == BB) {
8250	BB->removePredecessor(Pred: Predecessor);
8251	SI->setDefaultDest(Unreachable);
8252	}
8253
8254	if (DTU)
8255	DTU->applyUpdates(
8256	Updates: { { DominatorTree::Insert, Predecessor, Unreachable },
8257	{ DominatorTree::Delete, Predecessor, BB } });
8258	return true;
8259	}
8260	}
8261
8262	return false;
8263	}
8264
8265	bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8266	bool Changed = false;
8267
8268	assert(BB && BB->getParent() && "Block not embedded in function!");
8269	assert(BB->getTerminator() && "Degenerate basic block encountered!");
8270
8271	// Remove basic blocks that have no predecessors (except the entry block)...
8272	// or that just have themself as a predecessor. These are unreachable.
8273	if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) \|\|
8274	BB->getSinglePredecessor() == BB) {
8275	LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8276	DeleteDeadBlock(BB, DTU);
8277	return true;
8278	}
8279
8280	// Check to see if we can constant propagate this terminator instruction
8281	// away...
8282	Changed \|= ConstantFoldTerminator(BB, /DeleteDeadConditions=/true,
8283	/TLI=/nullptr, DTU);
8284
8285	// Check for and eliminate duplicate PHI nodes in this block.
8286	Changed \|= EliminateDuplicatePHINodes(BB);
8287
8288	// Check for and remove branches that will always cause undefined behavior.
8289	if (removeUndefIntroducingPredecessor(BB, DTU, AC: Options.AC))
8290	return requestResimplify();
8291
8292	// Merge basic blocks into their predecessor if there is only one distinct
8293	// pred, and if there is only one distinct successor of the predecessor, and
8294	// if there are no PHI nodes.
8295	if (MergeBlockIntoPredecessor(BB, DTU))
8296	return true;
8297
8298	if (SinkCommon && Options.SinkCommonInsts)
8299	if (sinkCommonCodeFromPredecessors(BB, DTU) \|\|
8300	mergeCompatibleInvokes(BB, DTU)) {
8301	// sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8302	// so we may now how duplicate PHI's.
8303	// Let's rerun EliminateDuplicatePHINodes() first,
8304	// before foldTwoEntryPHINode() potentially converts them into select's,
8305	// after which we'd need a whole EarlyCSE pass run to cleanup them.
8306	return true;
8307	}
8308
8309	IRBuilder<> Builder(BB);
8310
8311	if (Options.SpeculateBlocks &&
8312	!BB->getParent()->hasFnAttribute(Kind: Attribute::OptForFuzzing)) {
8313	// If there is a trivial two-entry PHI node in this basic block, and we can
8314	// eliminate it, do so now.
8315	if (auto *PN = dyn_cast<PHINode>(Val: BB->begin()))
8316	if (PN->getNumIncomingValues() == `2`)
8317	if (foldTwoEntryPHINode(PN, TTI, DTU, AC: Options.AC, DL,
8318	SpeculateUnpredictables: Options.SpeculateUnpredictables))
8319	return true;
8320	}
8321
8322	Instruction *Terminator = BB->getTerminator();
8323	Builder.SetInsertPoint(Terminator);
8324	switch (Terminator->getOpcode()) {
8325	case Instruction::Br:
8326	Changed \|= simplifyBranch(Branch: cast<BranchInst>(Val: Terminator), Builder);
8327	break;
8328	case Instruction::Resume:
8329	Changed \|= simplifyResume(RI: cast<ResumeInst>(Val: Terminator), Builder);
8330	break;
8331	case Instruction::CleanupRet:
8332	Changed \|= simplifyCleanupReturn(RI: cast<CleanupReturnInst>(Val: Terminator));
8333	break;
8334	case Instruction::Switch:
8335	Changed \|= simplifySwitch(SI: cast<SwitchInst>(Val: Terminator), Builder);
8336	break;
8337	case Instruction::Unreachable:
8338	Changed \|= simplifyUnreachable(UI: cast<UnreachableInst>(Val: Terminator));
8339	break;
8340	case Instruction::IndirectBr:
8341	Changed \|= simplifyIndirectBr(IBI: cast<IndirectBrInst>(Val: Terminator));
8342	break;
8343	}
8344
8345	return Changed;
8346	}
8347
8348	bool SimplifyCFGOpt::run(BasicBlock *BB) {
8349	bool Changed = false;
8350
8351	// Repeated simplify BB as long as resimplification is requested.
8352	do {
8353	Resimplify = false;
8354
8355	// Perform one round of simplifcation. Resimplify flag will be set if
8356	// another iteration is requested.
8357	Changed \|= simplifyOnce(BB);
8358	} while (Resimplify);
8359
8360	return Changed;
8361	}
8362
8363	bool llvm::simplifyCFG(BasicBlock BB, const* TargetTransformInfo &TTI,
8364	DomTreeUpdater DTU, const* SimplifyCFGOptions &Options,
8365	ArrayRef<WeakVH> LoopHeaders) {
8366	return SimplifyCFGOpt (TTI, DTU, BB->getDataLayout(), LoopHeaders,
8367	Options)
8368	.run(BB);
8369	}
8370

Browse the source code of llvm_projects/llvm/lib/Transforms/Utils/SimplifyCFG.cpp