SimplifyCFG.cpp source code [llvm_projects/llvm/lib/Transforms/Utils/SimplifyCFG.cpp]

1	//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Peephole optimize the CFG.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/ADT/APInt.h"
14	#include "llvm/ADT/ArrayRef.h"
15	#include "llvm/ADT/DenseMap.h"
16	#include "llvm/ADT/MapVector.h"
17	#include "llvm/ADT/STLExtras.h"
18	#include "llvm/ADT/Sequence.h"
19	#include "llvm/ADT/SetOperations.h"
20	#include "llvm/ADT/SetVector.h"
21	#include "llvm/ADT/SmallPtrSet.h"
22	#include "llvm/ADT/SmallVector.h"
23	#include "llvm/ADT/Statistic.h"
24	#include "llvm/ADT/StringRef.h"
25	#include "llvm/Analysis/AssumptionCache.h"
26	#include "llvm/Analysis/CaptureTracking.h"
27	#include "llvm/Analysis/ConstantFolding.h"
28	#include "llvm/Analysis/DomTreeUpdater.h"
29	#include "llvm/Analysis/GuardUtils.h"
30	#include "llvm/Analysis/InstructionSimplify.h"
31	#include "llvm/Analysis/Loads.h"
32	#include "llvm/Analysis/MemorySSA.h"
33	#include "llvm/Analysis/MemorySSAUpdater.h"
34	#include "llvm/Analysis/TargetTransformInfo.h"
35	#include "llvm/Analysis/ValueTracking.h"
36	#include "llvm/IR/Attributes.h"
37	#include "llvm/IR/BasicBlock.h"
38	#include "llvm/IR/CFG.h"
39	#include "llvm/IR/Constant.h"
40	#include "llvm/IR/ConstantRange.h"
41	#include "llvm/IR/Constants.h"
42	#include "llvm/IR/DataLayout.h"
43	#include "llvm/IR/DebugInfo.h"
44	#include "llvm/IR/DerivedTypes.h"
45	#include "llvm/IR/Function.h"
46	#include "llvm/IR/GlobalValue.h"
47	#include "llvm/IR/GlobalVariable.h"
48	#include "llvm/IR/IRBuilder.h"
49	#include "llvm/IR/InstrTypes.h"
50	#include "llvm/IR/Instruction.h"
51	#include "llvm/IR/Instructions.h"
52	#include "llvm/IR/IntrinsicInst.h"
53	#include "llvm/IR/LLVMContext.h"
54	#include "llvm/IR/MDBuilder.h"
55	#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
56	#include "llvm/IR/Metadata.h"
57	#include "llvm/IR/Module.h"
58	#include "llvm/IR/NoFolder.h"
59	#include "llvm/IR/Operator.h"
60	#include "llvm/IR/PatternMatch.h"
61	#include "llvm/IR/ProfDataUtils.h"
62	#include "llvm/IR/Type.h"
63	#include "llvm/IR/Use.h"
64	#include "llvm/IR/User.h"
65	#include "llvm/IR/Value.h"
66	#include "llvm/IR/ValueHandle.h"
67	#include "llvm/Support/BranchProbability.h"
68	#include "llvm/Support/Casting.h"
69	#include "llvm/Support/CommandLine.h"
70	#include "llvm/Support/Debug.h"
71	#include "llvm/Support/ErrorHandling.h"
72	#include "llvm/Support/KnownBits.h"
73	#include "llvm/Support/MathExtras.h"
74	#include "llvm/Support/raw_ostream.h"
75	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
76	#include "llvm/Transforms/Utils/Cloning.h"
77	#include "llvm/Transforms/Utils/Local.h"
78	#include "llvm/Transforms/Utils/LockstepReverseIterator.h"
79	#include "llvm/Transforms/Utils/ValueMapper.h"
80	#include <algorithm>
81	#include <cassert>
82	#include <climits>
83	#include <cmath>
84	#include <cstddef>
85	#include <cstdint>
86	#include <iterator>
87	#include <map>
88	#include <optional>
89	#include <set>
90	#include <tuple>
91	#include <utility>
92	#include <vector>
93
94	using namespace llvm;
95	using namespace PatternMatch;
96
97	#define DEBUG_TYPE "simplifycfg"
98
99	namespace llvm {
100
101	cl::opt<bool> RequireAndPreserveDomTree(
102	"simplifycfg-require-and-preserve-domtree", cl::Hidden,
103
104	cl::desc (
105	"Temporary development switch used to gradually uplift SimplifyCFG "
106	"into preserving DomTree,"));
107
108	// Chosen as 2 so as to be cheap, but still to have enough power to fold
109	// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
110	// To catch this, we need to fold a compare and a select, hence '2' being the
111	// minimum reasonable default.
112	static cl::opt<unsigned> PHINodeFoldingThreshold(
113	"phi-node-folding-threshold", cl::Hidden, cl::init(Val: `2`),
114	cl::desc (
115	"Control the amount of phi node folding to perform (default = 2)"));
116
117	static cl::opt<unsigned> TwoEntryPHINodeFoldingThreshold(
118	"two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(Val: `4`),
119	cl::desc ("Control the maximal total instruction cost that we are willing "
120	"to speculatively execute to fold a 2-entry PHI node into a "
121	"select (default = 4)"));
122
123	static cl::opt<bool>
124	HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(Val: true),
125	cl::desc ("Hoist common instructions up to the parent block"));
126
127	static cl::opt<bool> HoistLoadsWithCondFaulting(
128	"simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(Val: true),
129	cl::desc ("Hoist loads if the target supports conditional faulting"));
130
131	static cl::opt<bool> HoistStoresWithCondFaulting(
132	"simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(Val: true),
133	cl::desc ("Hoist stores if the target supports conditional faulting"));
134
135	static cl::opt<unsigned> HoistLoadsStoresWithCondFaultingThreshold(
136	"hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(Val: `6`),
137	cl::desc ("Control the maximal conditional load/store that we are willing "
138	"to speculatively execute to eliminate conditional branch "
139	"(default = 6)"));
140
141	static cl::opt<unsigned>
142	HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
143	cl::init(Val: `20`),
144	cl::desc ("Allow reordering across at most this many "
145	"instructions when hoisting"));
146
147	static cl::opt<bool>
148	SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(Val: true),
149	cl::desc ("Sink common instructions down to the end block"));
150
151	static cl::opt<bool> HoistCondStores(
152	"simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(Val: true),
153	cl::desc ("Hoist conditional stores if an unconditional store precedes"));
154
155	static cl::opt<bool> MergeCondStores(
156	"simplifycfg-merge-cond-stores", cl::Hidden, cl::init(Val: true),
157	cl::desc ("Hoist conditional stores even if an unconditional store does not "
158	"precede - hoist multiple conditional stores into a single "
159	"predicated store"));
160
161	static cl::opt<bool> MergeCondStoresAggressively(
162	"simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(Val: false),
163	cl::desc ("When merging conditional stores, do so even if the resultant "
164	"basic blocks are unlikely to be if-converted as a result"));
165
166	static cl::opt<bool> SpeculateOneExpensiveInst(
167	"speculate-one-expensive-inst", cl::Hidden, cl::init(Val: true),
168	cl::desc ("Allow exactly one expensive instruction to be speculatively "
169	"executed"));
170
171	static cl::opt<unsigned> MaxSpeculationDepth(
172	"max-speculation-depth", cl::Hidden, cl::init(Val: `10`),
173	cl::desc ("Limit maximum recursion depth when calculating costs of "
174	"speculatively executed instructions"));
175
176	static cl::opt<int>
177	MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
178	cl::init(Val: `10`),
179	cl::desc ("Max size of a block which is still considered "
180	"small enough to thread through"));
181
182	// Two is chosen to allow one negation and a logical combine.
183	static cl::opt<unsigned>
184	BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
185	cl::init(Val: `2`),
186	cl::desc ("Maximum cost of combining conditions when "
187	"folding branches"));
188
189	static cl::opt<unsigned> BranchFoldToCommonDestVectorMultiplier(
190	"simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
191	cl::init(Val: `2`),
192	cl::desc ("Multiplier to apply to threshold when determining whether or not "
193	"to fold branch to common destination when vector operations are "
194	"present"));
195
196	static cl::opt<bool> EnableMergeCompatibleInvokes(
197	"simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(Val: true),
198	cl::desc ("Allow SimplifyCFG to merge invokes together when appropriate"));
199
200	static cl::opt<unsigned> MaxSwitchCasesPerResult(
201	"max-switch-cases-per-result", cl::Hidden, cl::init(Val: `16`),
202	cl::desc ("Limit cases to analyze when converting a switch to select"));
203
204	static cl::opt<unsigned> MaxJumpThreadingLiveBlocks(
205	"max-jump-threading-live-blocks", cl::Hidden, cl::init(Val: `24`),
206	cl::desc ("Limit number of blocks a define in a threaded block is allowed "
207	"to be live in"));
208
209	extern cl::opt<bool> ProfcheckDisableMetadataFixes;
210
211	} // end namespace llvm
212
213	STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
214	STATISTIC(NumLinearMaps,
215	"Number of switch instructions turned into linear mapping");
216	STATISTIC(NumLookupTables,
217	"Number of switch instructions turned into lookup tables");
218	STATISTIC(
219	NumLookupTablesHoles,
220	"Number of switch instructions turned into lookup tables (holes checked)");
221	STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
222	STATISTIC(NumFoldValueComparisonIntoPredecessors,
223	"Number of value comparisons folded into predecessor basic blocks");
224	STATISTIC(NumFoldBranchToCommonDest,
225	"Number of branches folded into predecessor basic block");
226	STATISTIC(
227	NumHoistCommonCode,
228	"Number of common instruction 'blocks' hoisted up to the begin block");
229	STATISTIC(NumHoistCommonInstrs,
230	"Number of common instructions hoisted up to the begin block");
231	STATISTIC(NumSinkCommonCode,
232	"Number of common instruction 'blocks' sunk down to the end block");
233	STATISTIC(NumSinkCommonInstrs,
234	"Number of common instructions sunk down to the end block");
235	STATISTIC(NumSpeculations, "Number of speculative executed instructions");
236	STATISTIC(NumInvokes,
237	"Number of invokes with empty resume blocks simplified into calls");
238	STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
239	STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
240
241	namespace {
242
243	// The first field contains the value that the switch produces when a certain
244	// case group is selected, and the second field is a vector containing the
245	// cases composing the case group.
246	using SwitchCaseResultVectorTy =
247	SmallVector<std::pair<Constant , SmallVector<ConstantInt , `4`>>, `2`>;
248
249	// The first field contains the phi node that generates a result of the switch
250	// and the second field contains the value generated for a certain case in the
251	// switch for that PHI.
252	using SwitchCaseResultsTy = SmallVector<std::pair<PHINode , Constant >, `4`>;
253
254	/// ValueEqualityComparisonCase - Represents a case of a switch.
255	struct ValueEqualityComparisonCase {
256	ConstantInt *Value;
257	BasicBlock *Dest;
258
259	ValueEqualityComparisonCase(ConstantInt Value, BasicBlock Dest)
260	: Value(Value), Dest(Dest) {}
261
262	bool operator<(ValueEqualityComparisonCase RHS) const {
263	// Comparing pointers is ok as we only rely on the order for uniquing.
264	return Value < RHS.Value;
265	}
266
267	bool operator==(BasicBlock RHSDest) const* { return Dest == RHSDest; }
268	};
269
270	class SimplifyCFGOpt {
271	const TargetTransformInfo &TTI;
272	DomTreeUpdater *DTU;
273	const DataLayout &DL;
274	ArrayRef<WeakVH> LoopHeaders;
275	const SimplifyCFGOptions &Options;
276	bool Resimplify;
277
278	Value isValueEqualityComparison(Instruction TI);
279	BasicBlock *getValueEqualityComparisonCases(
280	Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
281	bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
282	BasicBlock *Pred,
283	IRBuilder<> &Builder);
284	bool performValueComparisonIntoPredecessorFolding(Instruction TI, Value &CV,
285	Instruction *PTI,
286	IRBuilder<> &Builder);
287	bool foldValueComparisonIntoPredecessors(Instruction *TI,
288	IRBuilder<> &Builder);
289
290	bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
291	bool simplifySingleResume(ResumeInst *RI);
292	bool simplifyCommonResume(ResumeInst *RI);
293	bool simplifyCleanupReturn(CleanupReturnInst *RI);
294	bool simplifyUnreachable(UnreachableInst *UI);
295	bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
296	bool simplifyDuplicateSwitchArms(SwitchInst SI, DomTreeUpdater DTU);
297	bool simplifyIndirectBr(IndirectBrInst *IBI);
298	bool simplifyUncondBranch(UncondBrInst *BI, IRBuilder<> &Builder);
299	bool simplifyCondBranch(CondBrInst *BI, IRBuilder<> &Builder);
300	bool foldCondBranchOnValueKnownInPredecessor(CondBrInst *BI);
301
302	bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
303	IRBuilder<> &Builder);
304	bool tryToSimplifyUncondBranchWithICmpSelectInIt(ICmpInst *ICI,
305	SelectInst *Select,
306	IRBuilder<> &Builder);
307	bool hoistCommonCodeFromSuccessors(Instruction TI, bool* AllInstsEqOnly);
308	bool hoistSuccIdenticalTerminatorToSwitchOrIf(
309	Instruction TI, Instruction I1,
310	SmallVectorImpl<Instruction *> &OtherSuccTIs,
311	ArrayRef<BasicBlock *> UniqueSuccessors);
312	bool speculativelyExecuteBB(CondBrInst BI, BasicBlock ThenBB);
313	bool simplifyTerminatorOnSelect(Instruction OldTerm, Value Cond,
314	BasicBlock TrueBB, BasicBlock FalseBB,
315	uint32_t TrueWeight, uint32_t FalseWeight);
316	bool simplifyBranchOnICmpChain(CondBrInst *BI, IRBuilder<> &Builder,
317	const DataLayout &DL);
318	bool simplifySwitchOnSelect(SwitchInst SI, SelectInst Select);
319	bool simplifyIndirectBrOnSelect(IndirectBrInst IBI, SelectInst SI);
320	bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
321	bool simplifyDuplicatePredecessors(BasicBlock Succ, DomTreeUpdater DTU);
322
323	public:
324	SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
325	const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
326	const SimplifyCFGOptions &Opts)
327	: TTI(TTI), DTU(DTU), DL(DL), LoopHeaders (LoopHeaders), Options(Opts) {
328	assert((!DTU \|\| !DTU->hasPostDomTree()) &&
329	"SimplifyCFG is not yet capable of maintaining validity of a "
330	"PostDomTree, so don't ask for it.");
331	}
332
333	bool simplifyOnce(BasicBlock *BB);
334	bool run(BasicBlock *BB);
335
336	// Helper to set Resimplify and return change indication.
337	bool requestResimplify() {
338	Resimplify = true;
339	return true;
340	}
341	};
342
343	// we synthesize a \|\| b as select a, true, b
344	// we synthesize a && b as select a, b, false
345	// this function determines if SI is playing one of those roles.
346	[[maybe_unused]] bool
347	isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) {
348	return ((isa<ConstantInt>(Val: SI->getTrueValue()) &&
349	(dyn_cast<ConstantInt>(Val: SI->getTrueValue())->isOne())) \|\|
350	(isa<ConstantInt>(Val: SI->getFalseValue()) &&
351	(dyn_cast<ConstantInt>(Val: SI->getFalseValue())->isNullValue())));
352	}
353
354	} // end anonymous namespace
355
356	/// Return true if all the PHI nodes in the basic block \p BB
357	/// receive compatible (identical) incoming values when coming from
358	/// all of the predecessor blocks that are specified in \p IncomingBlocks.
359	///
360	/// Note that if the values aren't exactly identical, but \p EquivalenceSet
361	/// is provided, and both* of the values are present in the set,*
362	/// then they are considered equal.
363	static bool incomingValuesAreCompatible(
364	BasicBlock BB, ArrayRef<BasicBlock > IncomingBlocks,
365	SmallPtrSetImpl<Value > EquivalenceSet = nullptr) {
366	assert(IncomingBlocks.size() == `2` &&
367	"Only for a pair of incoming blocks at the time!");
368
369	// FIXME: it is okay if one of the incoming values is an `undef` value,
370	// iff the other incoming value is guaranteed to be a non-poison value.
371	// FIXME: it is okay if one of the incoming values is a `poison` value.
372	return all_of(Range: BB->phis(), P: [IncomingBlocks, EquivalenceSet](PHINode &PN) {
373	Value *IV0 = PN.getIncomingValueForBlock(BB: IncomingBlocks [`0`]);
374	Value *IV1 = PN.getIncomingValueForBlock(BB: IncomingBlocks [`1`]);
375	if (IV0 == IV1)
376	return true;
377	if (EquivalenceSet && EquivalenceSet->contains(Ptr: IV0) &&
378	EquivalenceSet->contains(Ptr: IV1))
379	return true;
380	return false;
381	});
382	}
383
384	/// Return true if it is safe to merge these two
385	/// terminator instructions together.
386	static bool
387	safeToMergeTerminators(Instruction SI1, Instruction SI2,
388	SmallSetVector<BasicBlock , `4`> FailBlocks = nullptr) {
389	if (SI1 == SI2)
390	return false; // Can't merge with self!
391
392	// It is not safe to merge these two switch instructions if they have a common
393	// successor, and if that successor has a PHI node, and if that* PHI node has*
394	// conflicting incoming values from the two switch blocks.
395	BasicBlock *SI1BB = SI1->getParent();
396	BasicBlock *SI2BB = SI2->getParent();
397
398	SmallPtrSet<BasicBlock *, `16`> SI1Succs(llvm::from_range, successors(BB: SI1BB));
399	bool Fail = false;
400	for (BasicBlock *Succ : successors(BB: SI2BB)) {
401	if (!SI1Succs.count(Ptr: Succ))
402	continue;
403	if (incomingValuesAreCompatible(BB: Succ, IncomingBlocks: {SI1BB, SI2BB}))
404	continue;
405	Fail = true;
406	if (FailBlocks)
407	FailBlocks->insert(X: Succ);
408	else
409	break;
410	}
411
412	return !Fail;
413	}
414
415	/// Update PHI nodes in Succ to indicate that there will now be entries in it
416	/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
417	/// will be the same as those coming in from ExistPred, an existing predecessor
418	/// of Succ.
419	static void addPredecessorToBlock(BasicBlock Succ, BasicBlock NewPred,
420	BasicBlock *ExistPred,
421	MemorySSAUpdater MSSAU = nullptr*) {
422	for (PHINode &PN : Succ->phis())
423	PN.addIncoming(V: PN.getIncomingValueForBlock(BB: ExistPred), BB: NewPred);
424	if (MSSAU)
425	if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(BB: Succ))
426	MPhi->addIncoming(V: MPhi->getIncomingValueForBlock(BB: ExistPred), BB: NewPred);
427	}
428
429	/// Compute an abstract "cost" of speculating the given instruction,
430	/// which is assumed to be safe to speculate. TCC_Free means cheap,
431	/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
432	/// expensive.
433	static InstructionCost computeSpeculationCost(const User *I,
434	const TargetTransformInfo &TTI) {
435	return TTI.getInstructionCost(U: I, CostKind: TargetTransformInfo::TCK_SizeAndLatency);
436	}
437
438	/// If we have a merge point of an "if condition" as accepted above,
439	/// return true if the specified value dominates the block. We don't handle
440	/// the true generality of domination here, just a special case which works
441	/// well enough for us.
442	///
443	/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
444	/// see if V (which must be an instruction) and its recursive operands
445	/// that do not dominate BB have a combined cost lower than Budget and
446	/// are non-trapping. If both are true, the instruction is inserted into the
447	/// set and true is returned.
448	///
449	/// The cost for most non-trapping instructions is defined as 1 except for
450	/// Select whose cost is 2.
451	///
452	/// After this function returns, Cost is increased by the cost of
453	/// V plus its non-dominating operands. If that cost is greater than
454	/// Budget, false is returned and Cost is undefined.
455	static bool dominatesMergePoint(
456	Value V, BasicBlock BB, Instruction *InsertPt,
457	SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
458	InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC,
459	SmallPtrSetImpl<Instruction > &ZeroCostInstructions, unsigned* Depth = `0`) {
460	// It is possible to hit a zero-cost cycle (phi/gep instructions for example),
461	// so limit the recursion depth.
462	// TODO: While this recursion limit does prevent pathological behavior, it
463	// would be better to track visited instructions to avoid cycles.
464	if (Depth == MaxSpeculationDepth)
465	return false;
466
467	Instruction *I = dyn_cast<Instruction>(Val: V);
468	if (!I) {
469	// Non-instructions dominate all instructions and can be executed
470	// unconditionally.
471	return true;
472	}
473	BasicBlock *PBB = I->getParent();
474
475	// We don't want to allow weird loops that might have the "if condition" in
476	// the bottom of this block.
477	if (PBB == BB)
478	return false;
479
480	// If this instruction is defined in a block that contains an unconditional
481	// branch to BB, then it must be in the 'conditional' part of the "if
482	// statement". If not, it definitely dominates the region.
483	UncondBrInst *BI = dyn_cast<UncondBrInst>(Val: PBB->getTerminator());
484	if (!BI \|\| BI->getSuccessor() != BB)
485	return true;
486
487	// If we have seen this instruction before, don't count it again.
488	if (AggressiveInsts.count(Ptr: I))
489	return true;
490
491	// Okay, it looks like the instruction IS in the "condition". Check to
492	// see if it's a cheap instruction to unconditionally compute, and if it
493	// only uses stuff defined outside of the condition. If so, hoist it out.
494	if (!isSafeToSpeculativelyExecute(I, CtxI: InsertPt, AC))
495	return false;
496
497	// Overflow arithmetic instruction plus extract value are usually generated
498	// when a division is being replaced. But, in this case, the zero check may
499	// still be kept in the code. In that case it would be worth to hoist these
500	// two instruction out of the basic block. Let's treat this pattern as one
501	// single cheap instruction here!
502	WithOverflowInst *OverflowInst;
503	if (match(V: I, P: m_ExtractValue<`1`>(V: m_OneUse(SubPattern: m_WithOverflowInst(I&: OverflowInst))))) {
504	ZeroCostInstructions.insert(Ptr: OverflowInst);
505	Cost += `1`;
506	} else if (!ZeroCostInstructions.contains(Ptr: I))
507	Cost += computeSpeculationCost(I, TTI);
508
509	// Allow exactly one instruction to be speculated regardless of its cost
510	// (as long as it is safe to do so).
511	// This is intended to flatten the CFG even if the instruction is a division
512	// or other expensive operation. The speculation of an expensive instruction
513	// is expected to be undone in CodeGenPrepare if the speculation has not
514	// enabled further IR optimizations.
515	if (Cost > Budget &&
516	(!SpeculateOneExpensiveInst \|\| !AggressiveInsts.empty() \|\| Depth > `0` \|\|
517	!Cost.isValid()))
518	return false;
519
520	// Okay, we can only really hoist these out if their operands do
521	// not take us over the cost threshold.
522	for (Use &Op : I->operands())
523	if (!dominatesMergePoint(V: Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
524	TTI, AC, ZeroCostInstructions, Depth: Depth + `1`))
525	return false;
526	// Okay, it's safe to do this! Remember this instruction.
527	AggressiveInsts.insert(Ptr: I);
528	return true;
529	}
530
531	/// Extract ConstantInt from value, looking through IntToPtr
532	/// and PointerNullValue. Return NULL if value is not a constant int.
533	static ConstantInt getConstantInt(Value V, const DataLayout &DL) {
534	// Normal constant int.
535	ConstantInt *CI = dyn_cast<ConstantInt>(Val: V);
536	if (CI \|\| !isa<Constant>(Val: V) \|\| !V->getType()->isPointerTy())
537	return CI;
538
539	// It is not safe to look through inttoptr or ptrtoint when using unstable
540	// pointer types.
541	if (DL.hasUnstableRepresentation(Ty: V->getType()))
542	return nullptr;
543
544	// This is some kind of pointer constant. Turn it into a pointer-sized
545	// ConstantInt if possible.
546	IntegerType *IntPtrTy = cast<IntegerType>(Val: DL.getIntPtrType(V->getType()));
547
548	// Null pointer means 0, see SelectionDAGBuilder::getValue(const Value).*
549	if (isa<ConstantPointerNull>(Val: V))
550	return ConstantInt::get(Ty: IntPtrTy, V: `0`);
551
552	// IntToPtr const int, we can look through this if the semantics of
553	// inttoptr for this address space are a simple (truncating) bitcast.
554	if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: V))
555	if (CE->getOpcode() == Instruction::IntToPtr)
556	if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: CE->getOperand(i_nocapture: `0`))) {
557	// The constant is very likely to have the right type already.
558	if (CI->getType() == IntPtrTy)
559	return CI;
560	else
561	return cast<ConstantInt>(
562	Val: ConstantFoldIntegerCast(C: CI, DestTy: IntPtrTy, /isSigned=/IsSigned: false, DL));
563	}
564	return nullptr;
565	}
566
567	namespace {
568
569	/// Given a chain of or (\|\|) or and (&&) comparison of a value against a
570	/// constant, this will try to recover the information required for a switch
571	/// structure.
572	/// It will depth-first traverse the chain of comparison, seeking for patterns
573	/// like %a == 12 or %a < 4 and combine them to produce a set of integer
574	/// representing the different cases for the switch.
575	/// Note that if the chain is composed of '\|\|' it will build the set of elements
576	/// that matches the comparisons (i.e. any of this value validate the chain)
577	/// while for a chain of '&&' it will build the set elements that make the test
578	/// fail.
579	struct ConstantComparesGatherer {
580	const DataLayout &DL;
581
582	/// Value found for the switch comparison
583	Value CompValue = nullptr*;
584
585	/// Extra clause to be checked before the switch
586	Value Extra = nullptr*;
587
588	/// Set of integers to match in switch
589	SmallVector<ConstantInt *, `8`> Vals;
590
591	/// Number of comparisons matched in the and/or chain
592	unsigned UsedICmps = `0`;
593
594	/// If the elements in Vals matches the comparisons
595	bool IsEq = false;
596
597	// Used to check if the first matched CompValue shall be the Extra check.
598	bool IgnoreFirstMatch = false;
599	bool MultipleMatches = false;
600
601	/// Construct and compute the result for the comparison instruction Cond
602	ConstantComparesGatherer(Instruction Cond, const* DataLayout &DL) : DL(DL) {
603	gather(V: Cond);
604	if (CompValue \|\| !MultipleMatches)
605	return;
606	Extra = nullptr;
607	Vals.clear();
608	UsedICmps = `0`;
609	IgnoreFirstMatch = true;
610	gather(V: Cond);
611	}
612
613	ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
614	ConstantComparesGatherer &
615	operator=(const ConstantComparesGatherer &) = delete;
616
617	private:
618	/// Try to set the current value used for the comparison, it succeeds only if
619	/// it wasn't set before or if the new value is the same as the old one
620	bool setValueOnce(Value *NewVal) {
621	if (IgnoreFirstMatch) {
622	IgnoreFirstMatch = false;
623	return false;
624	}
625	if (CompValue && CompValue != NewVal) {
626	MultipleMatches = true;
627	return false;
628	}
629	CompValue = NewVal;
630	return true;
631	}
632
633	/// Try to match Instruction "I" as a comparison against a constant and
634	/// populates the array Vals with the set of values that match (or do not
635	/// match depending on isEQ).
636	/// Return false on failure. On success, the Value the comparison matched
637	/// against is placed in CompValue.
638	/// If CompValue is already set, the function is expected to fail if a match
639	/// is found but the value compared to is different.
640	bool matchInstruction(Instruction I, bool* isEQ) {
641	if (match(V: I, P: m_Not(V: m_Instruction(I))))
642	isEQ = !isEQ;
643
644	Value *Val;
645	if (match(V: I, P: m_NUWTrunc(Op: m_Value(V&: Val)))) {
646	// If we already have a value for the switch, it has to match!
647	if (!setValueOnce(Val))
648	return false;
649	UsedICmps++;
650	Vals.push_back(Elt: ConstantInt::get(Ty: cast<IntegerType>(Val: Val->getType()), V: isEQ));
651	return true;
652	}
653	// If this is an icmp against a constant, handle this as one of the cases.
654	ICmpInst *ICI;
655	ConstantInt *C;
656	if (!((ICI = dyn_cast<ICmpInst>(Val: I)) &&
657	(C = getConstantInt(V: I->getOperand(i: `1`), DL)))) {
658	return false;
659	}
660
661	Value *RHSVal;
662	const APInt *RHSC;
663
664	// Pattern match a special case
665	// (x & ~2^z) == y --> x == y \|\| x == y\|2^z
666	// This undoes a transformation done by instcombine to fuse 2 compares.
667	if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
668	// It's a little bit hard to see why the following transformations are
669	// correct. Here is a CVC3 program to verify them for 64-bit values:
670
671	/*
672	ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
673	x : BITVECTOR(64);
674	y : BITVECTOR(64);
675	z : BITVECTOR(64);
676	mask : BITVECTOR(64) = BVSHL(ONE, z);
677	QUERY( (y & ~mask = y) =>
678	((x & ~mask = y) <=> (x = y OR x = (y \| mask)))
679	);
680	QUERY( (y \| mask = y) =>
681	((x \| mask = y) <=> (x = y OR x = (y & ~mask)))
682	);
683	*/
684
685	// Please note that each pattern must be a dual implication (<--> or
686	// iff). One directional implication can create spurious matches. If the
687	// implication is only one-way, an unsatisfiable condition on the left
688	// side can imply a satisfiable condition on the right side. Dual
689	// implication ensures that satisfiable conditions are transformed to
690	// other satisfiable conditions and unsatisfiable conditions are
691	// transformed to other unsatisfiable conditions.
692
693	// Here is a concrete example of a unsatisfiable condition on the left
694	// implying a satisfiable condition on the right:
695	//
696	// mask = (1 << z)
697	// (x & ~mask) == y --> (x == y \|\| x == (y \| mask))
698	//
699	// Substituting y = 3, z = 0 yields:
700	// (x & -2) == 3 --> (x == 3 \|\| x == 2)
701
702	// Pattern match a special case:
703	/*
704	QUERY( (y & ~mask = y) =>
705	((x & ~mask = y) <=> (x = y OR x = (y \| mask)))
706	);
707	*/
708	if (match(V: ICI->getOperand(i_nocapture: `0`),
709	P: m_And(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
710	APInt Mask = ~*RHSC;
711	if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
712	// If we already have a value for the switch, it has to match!
713	if (!setValueOnce(RHSVal))
714	return false;
715
716	Vals.push_back(Elt: C);
717	Vals.push_back(
718	Elt: ConstantInt::get(Context&: C->getContext(),
719	V: C->getValue() \| Mask));
720	UsedICmps++;
721	return true;
722	}
723	}
724
725	// Pattern match a special case:
726	/*
727	QUERY( (y \| mask = y) =>
728	((x \| mask = y) <=> (x = y OR x = (y & ~mask)))
729	);
730	*/
731	if (match(V: ICI->getOperand(i_nocapture: `0`),
732	P: m_Or(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
733	APInt Mask = *RHSC;
734	if (Mask.isPowerOf2() && (C->getValue() \| Mask) == C->getValue()) {
735	// If we already have a value for the switch, it has to match!
736	if (!setValueOnce(RHSVal))
737	return false;
738
739	Vals.push_back(Elt: C);
740	Vals.push_back(Elt: ConstantInt::get(Context&: C->getContext(),
741	V: C->getValue() & ~Mask));
742	UsedICmps++;
743	return true;
744	}
745	}
746
747	// If we already have a value for the switch, it has to match!
748	if (!setValueOnce(ICI->getOperand(i_nocapture: `0`)))
749	return false;
750
751	UsedICmps++;
752	Vals.push_back(Elt: C);
753	return true;
754	}
755
756	// If we have "x ult 3", for example, then we can add 0,1,2 to the set.
757	ConstantRange Span =
758	ConstantRange::makeExactICmpRegion(Pred: ICI->getPredicate(), Other: C->getValue());
759
760	// Shift the range if the compare is fed by an add. This is the range
761	// compare idiom as emitted by instcombine.
762	Value *CandidateVal = I->getOperand(i: `0`);
763	if (match(V: I->getOperand(i: `0`), P: m_Add(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
764	Span = Span.subtract(CI: *RHSC);
765	CandidateVal = RHSVal;
766	}
767
768	// If this is an and/!= check, then we are looking to build the set of
769	// value that don't* pass the and chain. I.e. to turn "x ugt 2" into*
770	// x != 0 && x != 1.
771	if (!isEQ)
772	Span = Span.inverse();
773
774	// If there are a ton of values, we don't want to make a ginormous switch.
775	if (Span.isSizeLargerThan(MaxSize: `8`) \|\| Span.isEmptySet()) {
776	return false;
777	}
778
779	// If we already have a value for the switch, it has to match!
780	if (!setValueOnce(CandidateVal))
781	return false;
782
783	// Add all values from the range to the set
784	APInt Tmp = Span.getLower();
785	do
786	Vals.push_back(Elt: ConstantInt::get(Context&: I->getContext(), V: Tmp));
787	while (++Tmp != Span.getUpper());
788
789	UsedICmps++;
790	return true;
791	}
792
793	/// Given a potentially 'or'd or 'and'd together collection of icmp
794	/// eq/ne/lt/gt instructions that compare a value against a constant, extract
795	/// the value being compared, and stick the list constants into the Vals
796	/// vector.
797	/// One "Extra" case is allowed to differ from the other.
798	void gather(Value *V) {
799	Value Op0, Op1;
800	if (match(V, P: m_LogicalOr(L: m_Value(V&: Op0), R: m_Value(V&: Op1))))
801	IsEq = true;
802	else if (match(V, P: m_LogicalAnd(L: m_Value(V&: Op0), R: m_Value(V&: Op1))))
803	IsEq = false;
804	else
805	return;
806	// Keep a stack (SmallVector for efficiency) for depth-first traversal
807	SmallVector<Value *, `8`> DFT{Op0, Op1};
808	SmallPtrSet<Value *, `8`> Visited{V, Op0, Op1};
809
810	while (!DFT.empty()) {
811	V = DFT.pop_back_val();
812
813	if (Instruction *I = dyn_cast<Instruction>(Val: V)) {
814	// If it is a \|\| (or && depending on isEQ), process the operands.
815	if (IsEq ? match(V: I, P: m_LogicalOr(L: m_Value(V&: Op0), R: m_Value(V&: Op1)))
816	: match(V: I, P: m_LogicalAnd(L: m_Value(V&: Op0), R: m_Value(V&: Op1)))) {
817	if (Visited.insert(Ptr: Op1).second)
818	DFT.push_back(Elt: Op1);
819	if (Visited.insert(Ptr: Op0).second)
820	DFT.push_back(Elt: Op0);
821
822	continue;
823	}
824
825	// Try to match the current instruction
826	if (matchInstruction(I, isEQ: IsEq))
827	// Match succeed, continue the loop
828	continue;
829	}
830
831	// One element of the sequence of \|\| (or &&) could not be match as a
832	// comparison against the same value as the others.
833	// We allow only one "Extra" case to be checked before the switch
834	if (!Extra) {
835	Extra = V;
836	continue;
837	}
838	// Failed to parse a proper sequence, abort now
839	CompValue = nullptr;
840	break;
841	}
842	}
843	};
844
845	} // end anonymous namespace
846
847	static void eraseTerminatorAndDCECond(Instruction *TI,
848	MemorySSAUpdater MSSAU = nullptr*) {
849	Instruction Cond = nullptr*;
850	if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
851	Cond = dyn_cast<Instruction>(Val: SI->getCondition());
852	} else if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: TI)) {
853	Cond = dyn_cast<Instruction>(Val: BI->getCondition());
854	} else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(Val: TI)) {
855	Cond = dyn_cast<Instruction>(Val: IBI->getAddress());
856	}
857
858	TI->eraseFromParent();
859	if (Cond)
860	RecursivelyDeleteTriviallyDeadInstructions(V: Cond, TLI: nullptr, MSSAU);
861	}
862
863	/// Return true if the specified terminator checks
864	/// to see if a value is equal to constant integer value.
865	Value SimplifyCFGOpt::isValueEqualityComparison(Instruction TI) {
866	Value CV = nullptr*;
867	if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
868	// Do not permit merging of large switch instructions into their
869	// predecessors unless there is only one predecessor.
870	if (!SI->getParent()->hasNPredecessorsOrMore(N: `128` / SI->getNumSuccessors()))
871	CV = SI->getCondition();
872	} else if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: TI))
873	if (BI->getCondition()->hasOneUse()) {
874	if (ICmpInst *ICI = dyn_cast<ICmpInst>(Val: BI->getCondition())) {
875	if (ICI->isEquality() && getConstantInt(V: ICI->getOperand(i_nocapture: `1`), DL))
876	CV = ICI->getOperand(i_nocapture: `0`);
877	} else if (auto *Trunc = dyn_cast<TruncInst>(Val: BI->getCondition())) {
878	if (Trunc->hasNoUnsignedWrap())
879	CV = Trunc->getOperand(i_nocapture: `0`);
880	}
881	}
882
883	// Unwrap any lossless ptrtoint cast (except for unstable pointers).
884	if (CV) {
885	if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(Val: CV)) {
886	Value *Ptr = PTII->getPointerOperand();
887	if (DL.hasUnstableRepresentation(Ty: Ptr->getType()))
888	return CV;
889	if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
890	CV = Ptr;
891	}
892	}
893	return CV;
894	}
895
896	/// Given a value comparison instruction,
897	/// decode all of the 'cases' that it represents and return the 'default' block.
898	BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
899	Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
900	if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
901	Cases.reserve(n: SI->getNumCases());
902	for (auto Case : SI->cases())
903	Cases.push_back(x: ValueEqualityComparisonCase (Case.getCaseValue(),
904	Case.getCaseSuccessor()));
905	return SI->getDefaultDest();
906	}
907
908	CondBrInst *BI = cast<CondBrInst>(Val: TI);
909	Value *Cond = BI->getCondition();
910	ICmpInst::Predicate Pred;
911	ConstantInt *C;
912	if (auto *ICI = dyn_cast<ICmpInst>(Val: Cond)) {
913	Pred = ICI->getPredicate();
914	C = getConstantInt(V: ICI->getOperand(i_nocapture: `1`), DL);
915	} else {
916	Pred = ICmpInst::ICMP_NE;
917	auto *Trunc = cast<TruncInst>(Val: Cond);
918	C = ConstantInt::get(Ty: cast<IntegerType>(Val: Trunc->getOperand(i_nocapture: `0`)->getType()), V: `0`);
919	}
920	BasicBlock *Succ = BI->getSuccessor(i: Pred == ICmpInst::ICMP_NE);
921	Cases.push_back(x: ValueEqualityComparisonCase (C, Succ));
922	return BI->getSuccessor(i: Pred == ICmpInst::ICMP_EQ);
923	}
924
925	/// Given a vector of bb/value pairs, remove any entries
926	/// in the list that match the specified block.
927	static void
928	eliminateBlockCases(BasicBlock *BB,
929	std::vector<ValueEqualityComparisonCase> &Cases) {
930	llvm::erase(C&: Cases, V: BB);
931	}
932
933	/// Return true if there are any keys in C1 that exist in C2 as well.
934	static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
935	std::vector<ValueEqualityComparisonCase> &C2) {
936	std::vector<ValueEqualityComparisonCase> V1 = &C1, V2 = &C2;
937
938	// Make V1 be smaller than V2.
939	if (V1->size() > V2->size())
940	std::swap(a&: V1, b&: V2);
941
942	if (V1->empty())
943	return false;
944	if (V1->size() == `1`) {
945	// Just scan V2.
946	ConstantInt TheVal = (V1)[`0`].Value;
947	for (const ValueEqualityComparisonCase &VECC : *V2)
948	if (TheVal == VECC.Value)
949	return true;
950	}
951
952	// Otherwise, just sort both lists and compare element by element.
953	array_pod_sort(Start: V1->begin(), End: V1->end());
954	array_pod_sort(Start: V2->begin(), End: V2->end());
955	unsigned i1 = `0`, i2 = `0`, e1 = V1->size(), e2 = V2->size();
956	while (i1 != e1 && i2 != e2) {
957	if ((V1)[i1].Value == (V2)[i2].Value)
958	return true;
959	if ((V1)[i1].Value < (V2)[i2].Value)
960	++i1;
961	else
962	++i2;
963	}
964	return false;
965	}
966
967	/// If TI is known to be a terminator instruction and its block is known to
968	/// only have a single predecessor block, check to see if that predecessor is
969	/// also a value comparison with the same value, and if that comparison
970	/// determines the outcome of this comparison. If so, simplify TI. This does a
971	/// very limited form of jump threading.
972	bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
973	Instruction TI, BasicBlock Pred, IRBuilder<> &Builder) {
974	Value *PredVal = isValueEqualityComparison(TI: Pred->getTerminator());
975	if (!PredVal)
976	return false; // Not a value comparison in predecessor.
977
978	Value *ThisVal = isValueEqualityComparison(TI);
979	assert(ThisVal && "This isn't a value comparison!!");
980	if (ThisVal != PredVal)
981	return false; // Different predicates.
982
983	// TODO: Preserve branch weight metadata, similarly to how
984	// foldValueComparisonIntoPredecessors preserves it.
985
986	// Find out information about when control will move from Pred to TI's block.
987	std::vector<ValueEqualityComparisonCase> PredCases;
988	BasicBlock *PredDef =
989	getValueEqualityComparisonCases(TI: Pred->getTerminator(), Cases&: PredCases);
990	eliminateBlockCases(BB: PredDef, Cases&: PredCases); // Remove default from cases.
991
992	// Find information about how control leaves this block.
993	std::vector<ValueEqualityComparisonCase> ThisCases;
994	BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, Cases&: ThisCases);
995	eliminateBlockCases(BB: ThisDef, Cases&: ThisCases); // Remove default from cases.
996
997	// If TI's block is the default block from Pred's comparison, potentially
998	// simplify TI based on this knowledge.
999	if (PredDef == TI->getParent()) {
1000	// If we are here, we know that the value is none of those cases listed in
1001	// PredCases. If there are any cases in ThisCases that are in PredCases, we
1002	// can simplify TI.
1003	if (!valuesOverlap(C1&: PredCases, C2&: ThisCases))
1004	return false;
1005
1006	if (isa<CondBrInst>(Val: TI)) {
1007	// Okay, one of the successors of this condbr is dead. Convert it to a
1008	// uncond br.
1009	assert(ThisCases.size() == `1` && "Branch can only have one case!");
1010	// Insert the new branch.
1011	Instruction *NI = Builder.CreateBr(Dest: ThisDef);
1012	(void)NI;
1013
1014	// Remove PHI node entries for the dead edge.
1015	ThisCases [`0`].Dest->removePredecessor(Pred: PredDef);
1016
1017	LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1018	<< "Through successor TI: " << TI << "Leaving: " << NI
1019	<< "\n");
1020
1021	eraseTerminatorAndDCECond(TI);
1022
1023	if (DTU)
1024	DTU->applyUpdates(
1025	Updates: {{DominatorTree::Delete, PredDef, ThisCases [`0`].Dest}});
1026
1027	return true;
1028	}
1029
1030	SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(Val: TI);
1031	// Okay, TI has cases that are statically dead, prune them away.
1032	SmallPtrSet<Constant *, `16`> DeadCases;
1033	for (const ValueEqualityComparisonCase &Case : PredCases)
1034	DeadCases.insert(Ptr: Case.Value);
1035
1036	LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1037	<< "Through successor TI: " << *TI);
1038
1039	SmallDenseMap<BasicBlock , int*, `8`> NumPerSuccessorCases;
1040	for (SwitchInst::CaseIt i = SI ->case_end(), e = SI ->case_begin(); i != e;) {
1041	--i;
1042	auto *Successor = i ->getCaseSuccessor();
1043	if (DTU)
1044	++NumPerSuccessorCases [Successor];
1045	if (DeadCases.count(Ptr: i ->getCaseValue())) {
1046	Successor->removePredecessor(Pred: PredDef);
1047	SI.removeCase(I: i);
1048	if (DTU)
1049	--NumPerSuccessorCases [Successor];
1050	}
1051	}
1052
1053	if (DTU) {
1054	std::vector<DominatorTree::UpdateType> Updates;
1055	for (const std::pair<BasicBlock , int*> &I : NumPerSuccessorCases)
1056	if (I.second == `0`)
1057	Updates.push_back(x: {DominatorTree::Delete, PredDef, I.first});
1058	DTU->applyUpdates(Updates);
1059	}
1060
1061	LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1062	return true;
1063	}
1064
1065	// Otherwise, TI's block must correspond to some matched value. Find out
1066	// which value (or set of values) this is.
1067	ConstantInt TIV = nullptr*;
1068	BasicBlock *TIBB = TI->getParent();
1069	for (const auto &[Value, Dest] : PredCases)
1070	if (Dest == TIBB) {
1071	if (TIV)
1072	return false; // Cannot handle multiple values coming to this block.
1073	TIV = Value;
1074	}
1075	assert(TIV && "No edge from pred to succ?");
1076
1077	// Okay, we found the one constant that our value can be if we get into TI's
1078	// BB. Find out which successor will unconditionally be branched to.
1079	BasicBlock TheRealDest = nullptr*;
1080	for (const auto &[Value, Dest] : ThisCases)
1081	if (Value == TIV) {
1082	TheRealDest = Dest;
1083	break;
1084	}
1085
1086	// If not handled by any explicit cases, it is handled by the default case.
1087	if (!TheRealDest)
1088	TheRealDest = ThisDef;
1089
1090	SmallPtrSet<BasicBlock *, `2`> RemovedSuccs;
1091
1092	// Remove PHI node entries for dead edges.
1093	BasicBlock *CheckEdge = TheRealDest;
1094	for (BasicBlock *Succ : successors(BB: TIBB))
1095	if (Succ != CheckEdge) {
1096	if (Succ != TheRealDest)
1097	RemovedSuccs.insert(Ptr: Succ);
1098	Succ->removePredecessor(Pred: TIBB);
1099	} else
1100	CheckEdge = nullptr;
1101
1102	// Insert the new branch.
1103	Instruction *NI = Builder.CreateBr(Dest: TheRealDest);
1104	(void)NI;
1105
1106	LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1107	<< "Through successor TI: " << TI << "Leaving: " << NI
1108	<< "\n");
1109
1110	eraseTerminatorAndDCECond(TI);
1111	if (DTU) {
1112	SmallVector<DominatorTree::UpdateType, `2`> Updates;
1113	Updates.reserve(N: RemovedSuccs.size());
1114	for (auto *RemovedSucc : RemovedSuccs)
1115	Updates.push_back(Elt: {DominatorTree::Delete, TIBB, RemovedSucc});
1116	DTU->applyUpdates(Updates);
1117	}
1118	return true;
1119	}
1120
1121	namespace {
1122
1123	/// This class implements a stable ordering of constant
1124	/// integers that does not depend on their address. This is important for
1125	/// applications that sort ConstantInt's to ensure uniqueness.
1126	struct ConstantIntOrdering {
1127	bool operator()(const ConstantInt LHS, const* ConstantInt RHS) const* {
1128	return LHS->getValue().ult(RHS: RHS->getValue());
1129	}
1130	};
1131
1132	} // end anonymous namespace
1133
1134	static int constantIntSortPredicate(ConstantInt *const *P1,
1135	ConstantInt *const *P2) {
1136	const ConstantInt LHS = P1;
1137	const ConstantInt RHS = P2;
1138	if (LHS == RHS)
1139	return `0`;
1140	return LHS->getValue().ult(RHS: RHS->getValue()) ? `1` : -`1`;
1141	}
1142
1143	/// Get Weights of a given terminator, the default weight is at the front
1144	/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1145	/// metadata.
1146	static void getBranchWeights(Instruction *TI,
1147	SmallVectorImpl<uint64_t> &Weights) {
1148	MDNode *MD = TI->getMetadata(KindID: LLVMContext::MD_prof);
1149	assert(MD && "Invalid branch-weight metadata");
1150	extractFromBranchWeightMD64(ProfileData: MD, Weights);
1151
1152	// If TI is a conditional eq, the default case is the false case,
1153	// and the corresponding branch-weight data is at index 2. We swap the
1154	// default weight to be the first entry.
1155	if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: TI)) {
1156	assert(Weights.size() == `2`);
1157	auto *ICI = dyn_cast<ICmpInst>(Val: BI->getCondition());
1158	if (!ICI)
1159	return;
1160
1161	if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1162	std::swap(a&: Weights.front(), b&: Weights.back());
1163	}
1164	}
1165
1166	static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
1167	BasicBlock BB, BasicBlock PredBlock, ValueToValueMapTy &VMap) {
1168	Instruction *PTI = PredBlock->getTerminator();
1169
1170	// If we have bonus instructions, clone them into the predecessor block.
1171	// Note that there may be multiple predecessor blocks, so we cannot move
1172	// bonus instructions to a predecessor block.
1173	for (Instruction &BonusInst : *BB) {
1174	if (BonusInst.isTerminator())
1175	continue;
1176
1177	Instruction *NewBonusInst = BonusInst.clone();
1178
1179	if (!NewBonusInst->getDebugLoc().isSameSourceLocation(Other: PTI->getDebugLoc())) {
1180	// Unless the instruction has the same !dbg location as the original
1181	// branch, drop it. When we fold the bonus instructions we want to make
1182	// sure we reset their debug locations in order to avoid stepping on
1183	// dead code caused by folding dead branches.
1184	NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1185	} else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1186	mapAtomInstance(DL, VMap);
1187	}
1188
1189	RemapInstruction(I: NewBonusInst, VM&: VMap,
1190	Flags: RF_NoModuleLevelChanges \| RF_IgnoreMissingLocals);
1191
1192	// If we speculated an instruction, we need to drop any metadata that may
1193	// result in undefined behavior, as the metadata might have been valid
1194	// only given the branch precondition.
1195	// Similarly strip attributes on call parameters that may cause UB in
1196	// location the call is moved to.
1197	NewBonusInst->dropUBImplyingAttrsAndMetadata();
1198
1199	NewBonusInst->insertInto(ParentBB: PredBlock, It: PTI->getIterator());
1200	auto Range = NewBonusInst->cloneDebugInfoFrom(From: &BonusInst);
1201	RemapDbgRecordRange(M: NewBonusInst->getModule(), Range, VM&: VMap,
1202	Flags: RF_NoModuleLevelChanges \| RF_IgnoreMissingLocals);
1203
1204	NewBonusInst->takeName(V: &BonusInst);
1205	BonusInst.setName(NewBonusInst->getName() + ".old");
1206	VMap [&BonusInst] = NewBonusInst;
1207
1208	// Update (liveout) uses of bonus instructions,
1209	// now that the bonus instruction has been cloned into predecessor.
1210	// Note that we expect to be in a block-closed SSA form for this to work!
1211	for (Use &U : make_early_inc_range(Range: BonusInst.uses())) {
1212	auto *UI = cast<Instruction>(Val: U.getUser());
1213	auto *PN = dyn_cast<PHINode>(Val: UI);
1214	if (!PN) {
1215	assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1216	"If the user is not a PHI node, then it should be in the same "
1217	"block as, and come after, the original bonus instruction.");
1218	continue; // Keep using the original bonus instruction.
1219	}
1220	// Is this the block-closed SSA form PHI node?
1221	if (PN->getIncomingBlock(U) == BB)
1222	continue; // Great, keep using the original bonus instruction.
1223	// The only other alternative is an "use" when coming from
1224	// the predecessor block - here we should refer to the cloned bonus instr.
1225	assert(PN->getIncomingBlock(U) == PredBlock &&
1226	"Not in block-closed SSA form?");
1227	U.set(NewBonusInst);
1228	}
1229	}
1230
1231	// Key Instructions: We may have propagated atom info into the pred. If the
1232	// pred's terminator already has atom info do nothing as merging would drop
1233	// one atom group anyway. If it doesn't, propagte the remapped atom group
1234	// from BB's terminator.
1235	if (auto &PredDL = PTI->getDebugLoc()) {
1236	auto &DL = BB->getTerminator()->getDebugLoc();
1237	if (!PredDL ->getAtomGroup() && DL && DL ->getAtomGroup() &&
1238	PredDL.isSameSourceLocation(Other: DL)) {
1239	PTI->setDebugLoc(DL);
1240	RemapSourceAtom(I: PTI, VM&: VMap);
1241	}
1242	}
1243	}
1244
1245	bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1246	Instruction TI, Value &CV, Instruction *PTI, IRBuilder<> &Builder) {
1247	BasicBlock *BB = TI->getParent();
1248	BasicBlock *Pred = PTI->getParent();
1249
1250	SmallVector<DominatorTree::UpdateType, `32`> Updates;
1251
1252	// Figure out which 'cases' to copy from SI to PSI.
1253	std::vector<ValueEqualityComparisonCase> BBCases;
1254	BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, Cases&: BBCases);
1255
1256	std::vector<ValueEqualityComparisonCase> PredCases;
1257	BasicBlock *PredDefault = getValueEqualityComparisonCases(TI: PTI, Cases&: PredCases);
1258
1259	// Based on whether the default edge from PTI goes to BB or not, fill in
1260	// PredCases and PredDefault with the new switch cases we would like to
1261	// build.
1262	SmallMapVector<BasicBlock , int*, `8`> NewSuccessors;
1263
1264	// Update the branch weight metadata along the way
1265	SmallVector<uint64_t, `8`> Weights;
1266	bool PredHasWeights = hasBranchWeightMD(I: *PTI);
1267	bool SuccHasWeights = hasBranchWeightMD(I: *TI);
1268
1269	if (PredHasWeights) {
1270	getBranchWeights(TI: PTI, Weights);
1271	// branch-weight metadata is inconsistent here.
1272	if (Weights.size() != `1` + PredCases.size())
1273	PredHasWeights = SuccHasWeights = false;
1274	} else if (SuccHasWeights)
1275	// If there are no predecessor weights but there are successor weights,
1276	// populate Weights with 1, which will later be scaled to the sum of
1277	// successor's weights
1278	Weights.assign(NumElts: `1` + PredCases.size(), Elt: `1`);
1279
1280	SmallVector<uint64_t, `8`> SuccWeights;
1281	if (SuccHasWeights) {
1282	getBranchWeights(TI, Weights&: SuccWeights);
1283	// branch-weight metadata is inconsistent here.
1284	if (SuccWeights.size() != `1` + BBCases.size())
1285	PredHasWeights = SuccHasWeights = false;
1286	} else if (PredHasWeights)
1287	SuccWeights.assign(NumElts: `1` + BBCases.size(), Elt: `1`);
1288
1289	if (PredDefault == BB) {
1290	// If this is the default destination from PTI, only the edges in TI
1291	// that don't occur in PTI, or that branch to BB will be activated.
1292	std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1293	for (unsigned i = `0`, e = PredCases.size(); i != e; ++i)
1294	if (PredCases [i].Dest != BB)
1295	PTIHandled.insert(x: PredCases [i].Value);
1296	else {
1297	// The default destination is BB, we don't need explicit targets.
1298	std::swap(a&: PredCases [i], b&: PredCases.back());
1299
1300	if (PredHasWeights \|\| SuccHasWeights) {
1301	// Increase weight for the default case.
1302	Weights [`0`] += Weights [i + `1`];
1303	std::swap(a&: Weights [i + `1`], b&: Weights.back());
1304	Weights.pop_back();
1305	}
1306
1307	PredCases.pop_back();
1308	--i;
1309	--e;
1310	}
1311
1312	// Reconstruct the new switch statement we will be building.
1313	if (PredDefault != BBDefault) {
1314	PredDefault->removePredecessor(Pred);
1315	if (DTU && PredDefault != BB)
1316	Updates.push_back(Elt: {DominatorTree::Delete, Pred, PredDefault});
1317	PredDefault = BBDefault;
1318	++NewSuccessors [BBDefault];
1319	}
1320
1321	unsigned CasesFromPred = Weights.size();
1322	uint64_t ValidTotalSuccWeight = `0`;
1323	for (unsigned i = `0`, e = BBCases.size(); i != e; ++i)
1324	if (!PTIHandled.count(x: BBCases [i].Value) && BBCases [i].Dest != BBDefault) {
1325	PredCases.push_back(x: BBCases [i]);
1326	++NewSuccessors [BBCases [i].Dest];
1327	if (SuccHasWeights \|\| PredHasWeights) {
1328	// The default weight is at index 0, so weight for the ith case
1329	// should be at index i+1. Scale the cases from successor by
1330	// PredDefaultWeight (Weights[0]).
1331	Weights.push_back(Elt: Weights [`0`] * SuccWeights [i + `1`]);
1332	ValidTotalSuccWeight += SuccWeights [i + `1`];
1333	}
1334	}
1335
1336	if (SuccHasWeights \|\| PredHasWeights) {
1337	ValidTotalSuccWeight += SuccWeights [`0`];
1338	// Scale the cases from predecessor by ValidTotalSuccWeight.
1339	for (unsigned i = `1`; i < CasesFromPred; ++i)
1340	Weights [i] *= ValidTotalSuccWeight;
1341	// Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1342	Weights [`0`] *= SuccWeights [`0`];
1343	}
1344	} else {
1345	// If this is not the default destination from PSI, only the edges
1346	// in SI that occur in PSI with a destination of BB will be
1347	// activated.
1348	std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1349	std::map<ConstantInt *, uint64_t> WeightsForHandled;
1350	for (unsigned i = `0`, e = PredCases.size(); i != e; ++i)
1351	if (PredCases [i].Dest == BB) {
1352	PTIHandled.insert(x: PredCases [i].Value);
1353
1354	if (PredHasWeights \|\| SuccHasWeights) {
1355	WeightsForHandled [PredCases [i].Value] = Weights [i + `1`];
1356	std::swap(a&: Weights [i + `1`], b&: Weights.back());
1357	Weights.pop_back();
1358	}
1359
1360	std::swap(a&: PredCases [i], b&: PredCases.back());
1361	PredCases.pop_back();
1362	--i;
1363	--e;
1364	}
1365
1366	// Okay, now we know which constants were sent to BB from the
1367	// predecessor. Figure out where they will all go now.
1368	for (const ValueEqualityComparisonCase &Case : BBCases)
1369	if (PTIHandled.count(x: Case.Value)) {
1370	// If this is one we are capable of getting...
1371	if (PredHasWeights \|\| SuccHasWeights)
1372	Weights.push_back(Elt: WeightsForHandled [Case.Value]);
1373	PredCases.push_back(x: Case);
1374	++NewSuccessors [Case.Dest];
1375	PTIHandled.erase(x: Case.Value); // This constant is taken care of
1376	}
1377
1378	// If there are any constants vectored to BB that TI doesn't handle,
1379	// they must go to the default destination of TI.
1380	for (ConstantInt *I : PTIHandled) {
1381	if (PredHasWeights \|\| SuccHasWeights)
1382	Weights.push_back(Elt: WeightsForHandled [I]);
1383	PredCases.push_back(x: ValueEqualityComparisonCase (I, BBDefault));
1384	++NewSuccessors [BBDefault];
1385	}
1386	}
1387
1388	// Okay, at this point, we know which new successor Pred will get. Make
1389	// sure we update the number of entries in the PHI nodes for these
1390	// successors.
1391	SmallPtrSet<BasicBlock *, `2`> SuccsOfPred;
1392	if (DTU) {
1393	SuccsOfPred = {llvm::from_range, successors(BB: Pred)};
1394	Updates.reserve(N: Updates.size() + NewSuccessors.size());
1395	}
1396	for (const std::pair<BasicBlock , int* /Num/> &NewSuccessor :
1397	NewSuccessors) {
1398	for (auto I : seq(Size: NewSuccessor.second)) {
1399	(void)I;
1400	addPredecessorToBlock(Succ: NewSuccessor.first, NewPred: Pred, ExistPred: BB);
1401	}
1402	if (DTU && !SuccsOfPred.contains(Ptr: NewSuccessor.first))
1403	Updates.push_back(Elt: {DominatorTree::Insert, Pred, NewSuccessor.first});
1404	}
1405
1406	Builder.SetInsertPoint(PTI);
1407	// Convert pointer to int before we switch.
1408	if (CV->getType()->isPointerTy()) {
1409	assert(!DL.hasUnstableRepresentation(CV->getType()) &&
1410	"Should not end up here with unstable pointers");
1411	CV =
1412	Builder.CreatePtrToInt(V: CV, DestTy: DL.getIntPtrType(CV->getType()), Name: "magicptr");
1413	}
1414
1415	// Now that the successors are updated, create the new Switch instruction.
1416	SwitchInst *NewSI = Builder.CreateSwitch(V: CV, Dest: PredDefault, NumCases: PredCases.size());
1417	NewSI->setDebugLoc(PTI->getDebugLoc());
1418	for (ValueEqualityComparisonCase &V : PredCases)
1419	NewSI->addCase(OnVal: V.Value, Dest: V.Dest);
1420
1421	if (PredHasWeights \|\| SuccHasWeights)
1422	setFittedBranchWeights(I&: NewSI, Weights, /IsExpected=/*false,
1423	/ElideAllZero=/true);
1424
1425	eraseTerminatorAndDCECond(TI: PTI);
1426
1427	// Okay, last check. If BB is still a successor of PSI, then we must
1428	// have an infinite loop case. If so, add an infinitely looping block
1429	// to handle the case to preserve the behavior of the code.
1430	BasicBlock InfLoopBlock = nullptr*;
1431	for (unsigned i = `0`, e = NewSI->getNumSuccessors(); i != e; ++i)
1432	if (NewSI->getSuccessor(idx: i) == BB) {
1433	if (!InfLoopBlock) {
1434	// Insert it at the end of the function, because it's either code,
1435	// or it won't matter if it's hot. :)
1436	InfLoopBlock =
1437	BasicBlock::Create(Context&: BB->getContext(), Name: "infloop", Parent: BB->getParent());
1438	UncondBrInst::Create(IfTrue: InfLoopBlock, InsertBefore: InfLoopBlock);
1439	if (DTU)
1440	Updates.push_back(
1441	Elt: {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1442	}
1443	NewSI->setSuccessor(idx: i, NewSucc: InfLoopBlock);
1444	}
1445
1446	if (DTU) {
1447	if (InfLoopBlock)
1448	Updates.push_back(Elt: {DominatorTree::Insert, Pred, InfLoopBlock});
1449
1450	Updates.push_back(Elt: {DominatorTree::Delete, Pred, BB});
1451
1452	DTU->applyUpdates(Updates);
1453	}
1454
1455	++NumFoldValueComparisonIntoPredecessors;
1456	return true;
1457	}
1458
1459	/// The specified terminator is a value equality comparison instruction
1460	/// (either a switch or a branch on "X == c").
1461	/// See if any of the predecessors of the terminator block are value comparisons
1462	/// on the same value. If so, and if safe to do so, fold them together.
1463	bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1464	IRBuilder<> &Builder) {
1465	BasicBlock *BB = TI->getParent();
1466	Value CV = isValueEqualityComparison(TI); // CondVal*
1467	assert(CV && "Not a comparison?");
1468
1469	bool Changed = false;
1470
1471	SmallSetVector<BasicBlock *, `16`> Preds(pred_begin(BB), pred_end(BB));
1472	while (!Preds.empty()) {
1473	BasicBlock *Pred = Preds.pop_back_val();
1474	Instruction *PTI = Pred->getTerminator();
1475
1476	// Don't try to fold into itself.
1477	if (Pred == BB)
1478	continue;
1479
1480	// See if the predecessor is a comparison with the same value.
1481	Value PCV = isValueEqualityComparison(TI: PTI); // PredCondVal*
1482	if (PCV != CV)
1483	continue;
1484
1485	SmallSetVector<BasicBlock *, `4`> FailBlocks;
1486	if (!safeToMergeTerminators(SI1: TI, SI2: PTI, FailBlocks: &FailBlocks)) {
1487	for (auto *Succ : FailBlocks) {
1488	if (!SplitBlockPredecessors(BB: Succ, Preds: TI->getParent(), Suffix: ".fold.split", DTU))
1489	return false;
1490	}
1491	}
1492
1493	performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1494	Changed = true;
1495	}
1496	return Changed;
1497	}
1498
1499	// If we would need to insert a select that uses the value of this invoke
1500	// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1501	// need to do this), we can't hoist the invoke, as there is nowhere to put the
1502	// select in this case.
1503	static bool isSafeToHoistInvoke(BasicBlock BB1, BasicBlock BB2,
1504	Instruction I1, Instruction I2) {
1505	for (BasicBlock *Succ : successors(BB: BB1)) {
1506	for (const PHINode &PN : Succ->phis()) {
1507	Value *BB1V = PN.getIncomingValueForBlock(BB: BB1);
1508	Value *BB2V = PN.getIncomingValueForBlock(BB: BB2);
1509	if (BB1V != BB2V && (BB1V == I1 \|\| BB2V == I2)) {
1510	return false;
1511	}
1512	}
1513	}
1514	return true;
1515	}
1516
1517	// Get interesting characteristics of instructions that
1518	// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1519	// instructions can be reordered across.
1520	enum SkipFlags {
1521	SkipReadMem = `1`,
1522	SkipSideEffect = `2`,
1523	SkipImplicitControlFlow = `4`
1524	};
1525
1526	static unsigned skippedInstrFlags(Instruction *I) {
1527	unsigned Flags = `0`;
1528	if (I->mayReadFromMemory())
1529	Flags \|= SkipReadMem;
1530	// We can't arbitrarily move around allocas, e.g. moving allocas (especially
1531	// inalloca) across stacksave/stackrestore boundaries.
1532	if (I->mayHaveSideEffects() \|\| isa<AllocaInst>(Val: I))
1533	Flags \|= SkipSideEffect;
1534	if (!isGuaranteedToTransferExecutionToSuccessor(I))
1535	Flags \|= SkipImplicitControlFlow;
1536	return Flags;
1537	}
1538
1539	// Returns true if it is safe to reorder an instruction across preceding
1540	// instructions in a basic block.
1541	static bool isSafeToHoistInstr(Instruction I, unsigned* Flags) {
1542	// Don't reorder a store over a load.
1543	if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1544	return false;
1545
1546	// If we have seen an instruction with side effects, it's unsafe to reorder an
1547	// instruction which reads memory or itself has side effects.
1548	if ((Flags & SkipSideEffect) &&
1549	(I->mayReadFromMemory() \|\| I->mayHaveSideEffects() \|\| isa<AllocaInst>(Val: I)))
1550	return false;
1551
1552	// Reordering across an instruction which does not necessarily transfer
1553	// control to the next instruction is speculation.
1554	if ((Flags & SkipImplicitControlFlow) && !isSafeToSpeculativelyExecute(I))
1555	return false;
1556
1557	// Hoisting of llvm.deoptimize is only legal together with the next return
1558	// instruction, which this pass is not always able to do.
1559	if (auto *CB = dyn_cast<CallBase>(Val: I))
1560	if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1561	return false;
1562
1563	// It's also unsafe/illegal to hoist an instruction above its instruction
1564	// operands
1565	BasicBlock *BB = I->getParent();
1566	for (Value *Op : I->operands()) {
1567	if (auto *J = dyn_cast<Instruction>(Val: Op))
1568	if (J->getParent() == BB)
1569	return false;
1570	}
1571
1572	return true;
1573	}
1574
1575	static bool passingValueIsAlwaysUndefined(Value V, Instruction I, bool PtrValueMayBeModified = false);
1576
1577	/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1578	/// instructions \p I1 and \p I2 can and should be hoisted.
1579	static bool shouldHoistCommonInstructions(Instruction I1, Instruction I2,
1580	const TargetTransformInfo &TTI) {
1581	// If we're going to hoist a call, make sure that the two instructions
1582	// we're commoning/hoisting are both marked with musttail, or neither of
1583	// them is marked as such. Otherwise, we might end up in a situation where
1584	// we hoist from a block where the terminator is a `ret` to a block where
1585	// the terminator is a `br`, and `musttail` calls expect to be followed by
1586	// a return.
1587	auto *C1 = dyn_cast<CallInst>(Val: I1);
1588	auto *C2 = dyn_cast<CallInst>(Val: I2);
1589	if (C1 && C2)
1590	if (C1->isMustTailCall() != C2->isMustTailCall())
1591	return false;
1592
1593	if (!TTI.isProfitableToHoist(I: I1) \|\| !TTI.isProfitableToHoist(I: I2))
1594	return false;
1595
1596	// If any of the two call sites has nomerge or convergent attribute, stop
1597	// hoisting.
1598	if (const auto *CB1 = dyn_cast<CallBase>(Val: I1))
1599	if (CB1->cannotMerge() \|\| CB1->isConvergent())
1600	return false;
1601	if (const auto *CB2 = dyn_cast<CallBase>(Val: I2))
1602	if (CB2->cannotMerge() \|\| CB2->isConvergent())
1603	return false;
1604
1605	return true;
1606	}
1607
1608	/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1609	/// in lock-step to \p TI. This matches how dbg. intrinsics are hoisting in*
1610	/// hoistCommonCodeFromSuccessors. e.g. The input:
1611	/// I1 DVRs: { x, z },
1612	/// OtherInsts: { I2 DVRs: { x, y, z } }
1613	/// would result in hoisting only DbgVariableRecord x.
1614	static void hoistLockstepIdenticalDbgVariableRecords(
1615	Instruction TI, Instruction I1,
1616	SmallVectorImpl<Instruction *> &OtherInsts) {
1617	if (!I1->hasDbgRecords())
1618	return;
1619	using CurrentAndEndIt =
1620	std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1621	// Vector of {Current, End} iterators.
1622	SmallVector<CurrentAndEndIt> Itrs;
1623	Itrs.reserve(N: OtherInsts.size() + `1`);
1624	// Helper lambdas for lock-step checks:
1625	// Return true if this Current == End.
1626	auto atEnd = [](const CurrentAndEndIt &Pair) {
1627	return Pair.first == Pair.second;
1628	};
1629	// Return true if all Current are identical.
1630	auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1631	return all_of(Range: make_first_range(c: ArrayRef(Itrs).drop_front()),
1632	P: [&](DbgRecord::self_iterator I) {
1633	return Itrs [`0`].first ->isIdenticalToWhenDefined(R: *I);
1634	});
1635	};
1636
1637	// Collect the iterators.
1638	Itrs.push_back(
1639	Elt: {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1640	for (Instruction *Other : OtherInsts) {
1641	if (!Other->hasDbgRecords())
1642	return;
1643	Itrs.push_back(
1644	Elt: {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1645	}
1646
1647	// Iterate in lock-step until any of the DbgRecord lists are exausted. If
1648	// the lock-step DbgRecord are identical, hoist all of them to TI.
1649	// This replicates the dbg. intrinsic behaviour in*
1650	// hoistCommonCodeFromSuccessors.
1651	while (none_of(Range&: Itrs, P: atEnd)) {
1652	bool HoistDVRs = allIdentical (Itrs);
1653	for (CurrentAndEndIt &Pair : Itrs) {
1654	// Increment Current iterator now as we may be about to move the
1655	// DbgRecord.
1656	DbgRecord &DR = *Pair.first ++;
1657	if (HoistDVRs) {
1658	DR.removeFromParent();
1659	TI->getParent()->insertDbgRecordBefore(DR: &DR, Here: TI->getIterator());
1660	}
1661	}
1662	}
1663	}
1664
1665	static bool areIdenticalUpToCommutativity(const Instruction *I1,
1666	const Instruction *I2) {
1667	if (I1->isIdenticalToWhenDefined(I: I2, /IntersectAttrs=/true))
1668	return true;
1669
1670	if (auto *Cmp1 = dyn_cast<CmpInst>(Val: I1))
1671	if (auto *Cmp2 = dyn_cast<CmpInst>(Val: I2))
1672	return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1673	Cmp1->getOperand(i_nocapture: `0`) == Cmp2->getOperand(i_nocapture: `1`) &&
1674	Cmp1->getOperand(i_nocapture: `1`) == Cmp2->getOperand(i_nocapture: `0`);
1675
1676	if (I1->isCommutative() && I1->isSameOperationAs(I: I2)) {
1677	return I1->getOperand(i: `0`) == I2->getOperand(i: `1`) &&
1678	I1->getOperand(i: `1`) == I2->getOperand(i: `0`) &&
1679	equal(LRange: drop_begin(RangeOrContainer: I1->operands(), N: `2`), RRange: drop_begin(RangeOrContainer: I2->operands(), N: `2`));
1680	}
1681
1682	return false;
1683	}
1684
1685	/// If the target supports conditional faulting,
1686	/// we look for the following pattern:
1687	/// \code
1688	/// BB:
1689	/// ...
1690	/// %cond = icmp ult %x, %y
1691	/// br i1 %cond, label %TrueBB, label %FalseBB
1692	/// FalseBB:
1693	/// store i32 1, ptr %q, align 4
1694	/// ...
1695	/// TrueBB:
1696	/// %maskedloadstore = load i32, ptr %b, align 4
1697	/// store i32 %maskedloadstore, ptr %p, align 4
1698	/// ...
1699	/// \endcode
1700	///
1701	/// and transform it into:
1702	///
1703	/// \code
1704	/// BB:
1705	/// ...
1706	/// %cond = icmp ult %x, %y
1707	/// %maskedloadstore = cload i32, ptr %b, %cond
1708	/// cstore i32 %maskedloadstore, ptr %p, %cond
1709	/// cstore i32 1, ptr %q, ~%cond
1710	/// br i1 %cond, label %TrueBB, label %FalseBB
1711	/// FalseBB:
1712	/// ...
1713	/// TrueBB:
1714	/// ...
1715	/// \endcode
1716	///
1717	/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1718	/// e.g.
1719	///
1720	/// \code
1721	/// %vcond = bitcast i1 %cond to <1 x i1>
1722	/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1723	/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1724	/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1725	/// call void @llvm.masked.store.v1i32.p0
1726	/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1727	/// %cond.not = xor i1 %cond, true
1728	/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1729	/// call void @llvm.masked.store.v1i32.p0
1730	/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1731	/// \endcode
1732	///
1733	/// So we need to turn hoisted load/store into cload/cstore.
1734	///
1735	/// \param BI The branch instruction.
1736	/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1737	/// will be speculated.
1738	/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1739	static void hoistConditionalLoadsStores(
1740	CondBrInst *BI,
1741	SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1742	std::optional<bool> Invert, Instruction *Sel) {
1743	auto &Context = BI->getParent()->getContext();
1744	auto *VCondTy = FixedVectorType::get(ElementType: Type::getInt1Ty(C&: Context), NumElts: `1`);
1745	auto *Cond = BI->getCondition();
1746	// Construct the condition if needed.
1747	BasicBlock *BB = BI->getParent();
1748	Value Mask = nullptr*;
1749	Value MaskFalse = nullptr*;
1750	Value MaskTrue = nullptr*;
1751	if (Invert.has_value()) {
1752	IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1753	Mask = Builder.CreateBitCast(
1754	V: *Invert ? Builder.CreateXor(LHS: Cond, RHS: ConstantInt::getTrue(Context)) : Cond,
1755	DestTy: VCondTy);
1756	} else {
1757	IRBuilder<> Builder(BI);
1758	MaskFalse = Builder.CreateBitCast(
1759	V: Builder.CreateXor(LHS: Cond, RHS: ConstantInt::getTrue(Context)), DestTy: VCondTy);
1760	MaskTrue = Builder.CreateBitCast(V: Cond, DestTy: VCondTy);
1761	}
1762	auto PeekThroughBitcasts = [](Value *V) {
1763	while (auto *BitCast = dyn_cast<BitCastInst>(Val: V))
1764	V = BitCast->getOperand(i_nocapture: `0`);
1765	return V;
1766	};
1767	for (auto *I : SpeculatedConditionalLoadsStores) {
1768	IRBuilder<> Builder(Invert.has_value() ? I : BI);
1769	if (!Invert.has_value())
1770	Mask = I->getParent() == BI->getSuccessor(i: `0`) ? MaskTrue : MaskFalse;
1771	// We currently assume conditional faulting load/store is supported for
1772	// scalar types only when creating new instructions. This can be easily
1773	// extended for vector types in the future.
1774	assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1775	auto *Op0 = I->getOperand(i: `0`);
1776	CallInst MaskedLoadStore = nullptr*;
1777	if (auto *LI = dyn_cast<LoadInst>(Val: I)) {
1778	// Handle Load.
1779	auto *Ty = I->getType();
1780	PHINode PN = nullptr*;
1781	Value PassThru = nullptr*;
1782	if (Invert.has_value())
1783	for (User *U : I->users()) {
1784	if ((PN = dyn_cast<PHINode>(Val: U))) {
1785	PassThru = Builder.CreateBitCast(
1786	V: PeekThroughBitcasts (PN->getIncomingValueForBlock(BB)),
1787	DestTy: FixedVectorType::get(ElementType: Ty, NumElts: `1`));
1788	} else if (auto *Ins = cast<Instruction>(Val: U);
1789	Sel && Ins->getParent() == BB) {
1790	// This happens when store or/and a speculative instruction between
1791	// load and store were hoisted to the BB. Make sure the masked load
1792	// inserted before its use.
1793	// We assume there's one of such use.
1794	Builder.SetInsertPoint(Ins);
1795	}
1796	}
1797	MaskedLoadStore = Builder.CreateMaskedLoad(
1798	Ty: FixedVectorType::get(ElementType: Ty, NumElts: `1`), Ptr: Op0, Alignment: LI->getAlign(), Mask, PassThru);
1799	Value *NewLoadStore = Builder.CreateBitCast(V: MaskedLoadStore, DestTy: Ty);
1800	if (PN)
1801	PN->setIncomingValue(i: PN->getBasicBlockIndex(BB), V: NewLoadStore);
1802	I->replaceAllUsesWith(V: NewLoadStore);
1803	} else {
1804	// Handle Store.
1805	auto *StoredVal = Builder.CreateBitCast(
1806	V: PeekThroughBitcasts (Op0), DestTy: FixedVectorType::get(ElementType: Op0->getType(), NumElts: `1`));
1807	MaskedLoadStore = Builder.CreateMaskedStore(
1808	Val: StoredVal, Ptr: I->getOperand(i: `1`), Alignment: cast<StoreInst>(Val: I)->getAlign(), Mask);
1809	}
1810	// For non-debug metadata, only !annotation, !range, !nonnull and !align are
1811	// kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1812	//
1813	// !nonnull, !align : Not support pointer type, no need to keep.
1814	// !range: Load type is changed from scalar to vector, but the metadata on
1815	// vector specifies a per-element range, so the semantics stay the
1816	// same. Keep it.
1817	// !annotation: Not impact semantics. Keep it.
1818	if (const MDNode *Ranges = I->getMetadata(KindID: LLVMContext::MD_range))
1819	MaskedLoadStore->addRangeRetAttr(CR: getConstantRangeFromMetadata(RangeMD: *Ranges));
1820	I->dropUBImplyingAttrsAndUnknownMetadata(KnownIDs: {LLVMContext::MD_annotation});
1821	// FIXME: DIAssignID is not supported for masked store yet.
1822	// (Verifier::visitDIAssignIDMetadata)
1823	at::deleteAssignmentMarkers(Inst: I);
1824	I->eraseMetadataIf(Pred: [](unsigned MDKind, MDNode *Node) {
1825	return Node->getMetadataID() == Metadata::DIAssignIDKind;
1826	});
1827	MaskedLoadStore->copyMetadata(SrcInst: *I);
1828	I->eraseFromParent();
1829	}
1830	}
1831
1832	static bool isSafeCheapLoadStore(const Instruction *I,
1833	const TargetTransformInfo &TTI) {
1834	// Not handle volatile or atomic.
1835	bool IsStore = false;
1836	if (auto *L = dyn_cast<LoadInst>(Val: I)) {
1837	if (!L->isSimple() \|\| !HoistLoadsWithCondFaulting)
1838	return false;
1839	} else if (auto *S = dyn_cast<StoreInst>(Val: I)) {
1840	if (!S->isSimple() \|\| !HoistStoresWithCondFaulting)
1841	return false;
1842	IsStore = true;
1843	} else
1844	return false;
1845
1846	// llvm.masked.load/store use i32 for alignment while load/store use i64.
1847	// That's why we have the alignment limitation.
1848	// FIXME: Update the prototype of the intrinsics?
1849	return TTI.hasConditionalLoadStoreForType(Ty: getLoadStoreType(I), IsStore) &&
1850	getLoadStoreAlignment(I) < Value::MaximumAlignment;
1851	}
1852
1853	/// Hoist any common code in the successor blocks up into the block. This
1854	/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1855	/// given, only perform hoisting in case all successors blocks contain matching
1856	/// instructions only. In that case, all instructions can be hoisted and the
1857	/// original branch will be replaced and selects for PHIs are added.
1858	bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1859	bool AllInstsEqOnly) {
1860	// This does very trivial matching, with limited scanning, to find identical
1861	// instructions in the two blocks. In particular, we don't want to get into
1862	// O(N1N2...) situations here where Ni are the sizes of these successors. As
1863	// such, we currently just scan for obviously identical instructions in an
1864	// identical order, possibly separated by the same number of non-identical
1865	// instructions.
1866	BasicBlock *BB = TI->getParent();
1867	unsigned int SuccSize = succ_size(BB);
1868	if (SuccSize < `2`)
1869	return false;
1870
1871	// If either of the blocks has it's address taken, then we can't do this fold,
1872	// because the code we'd hoist would no longer run when we jump into the block
1873	// by it's address.
1874	SmallSetVector<BasicBlock *, `4`> UniqueSuccessors(from_range, successors(BB));
1875	for (auto *Succ : UniqueSuccessors) {
1876	if (Succ->hasAddressTaken())
1877	return false;
1878	// Use getUniquePredecessor instead of getSinglePredecessor to support
1879	// multi-cases successors in switch.
1880	if (Succ->getUniquePredecessor())
1881	continue;
1882	// If Succ has >1 predecessors, continue to check if the Succ contains only
1883	// one `unreachable` inst. Since executing `unreachable` inst is an UB, we
1884	// can relax the condition based on the assumptiom that the program would
1885	// never enter Succ and trigger such an UB.
1886	if (isa<UnreachableInst>(Val: *Succ->begin()))
1887	continue;
1888	return false;
1889	}
1890	// The second of pair is a SkipFlags bitmask.
1891	using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1892	SmallVector<SuccIterPair, `8`> SuccIterPairs;
1893	for (auto *Succ : UniqueSuccessors) {
1894	BasicBlock::iterator SuccItr = Succ->begin();
1895	if (isa<PHINode>(Val: *SuccItr))
1896	return false;
1897	SuccIterPairs.push_back(Elt: SuccIterPair (SuccItr, `0`));
1898	}
1899
1900	if (AllInstsEqOnly) {
1901	// Check if all instructions in the successor blocks match. This allows
1902	// hoisting all instructions and removing the blocks we are hoisting from,
1903	// so does not add any new instructions.
1904
1905	// Check if sizes and terminators of all successors match.
1906	unsigned Size0 = UniqueSuccessors [`0`]->size();
1907	Instruction *Term0 = UniqueSuccessors [`0`]->getTerminator();
1908	bool AllSame =
1909	all_of(Range: drop_begin(RangeOrContainer&: UniqueSuccessors), P: [Term0, Size0](BasicBlock *Succ) {
1910	return Succ->getTerminator()->isIdenticalTo(I: Term0) &&
1911	Succ->size() == Size0;
1912	});
1913	if (!AllSame)
1914	return false;
1915	LockstepReverseIterator<true> LRI(UniqueSuccessors.getArrayRef());
1916	while (LRI.isValid()) {
1917	Instruction I0 = (LRI)[`0`];
1918	if (any_of(Range: LRI, P: [I0](Instruction I) {
1919	return !areIdenticalUpToCommutativity(I1: I0, I2: I);
1920	})) {
1921	return false;
1922	}
1923	--LRI;
1924	}
1925	// Now we know that all instructions in all successors can be hoisted. Let
1926	// the loop below handle the hoisting.
1927	}
1928
1929	// Count how many instructions were not hoisted so far. There's a limit on how
1930	// many instructions we skip, serving as a compilation time control as well as
1931	// preventing excessive increase of life ranges.
1932	unsigned NumSkipped = `0`;
1933	// If we find an unreachable instruction at the beginning of a basic block, we
1934	// can still hoist instructions from the rest of the basic blocks.
1935	if (SuccIterPairs.size() > `2`) {
1936	erase_if(C&: SuccIterPairs,
1937	P: [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1938	if (SuccIterPairs.size() < `2`)
1939	return false;
1940	}
1941
1942	bool Changed = false;
1943
1944	for (;;) {
1945	auto *SuccIterPairBegin = SuccIterPairs.begin();
1946	auto &BB1ItrPair = *SuccIterPairBegin++;
1947	auto OtherSuccIterPairRange =
1948	iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1949	auto OtherSuccIterRange = make_first_range(c&: OtherSuccIterPairRange);
1950
1951	Instruction I1 = &BB1ItrPair.first;
1952
1953	bool AllInstsAreIdentical = true;
1954	bool HasTerminator = I1->isTerminator();
1955	for (auto &SuccIter : OtherSuccIterRange) {
1956	Instruction I2 = &SuccIter;
1957	HasTerminator \|= I2->isTerminator();
1958	if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) \|\|
1959	MMRAMetadata (I1) != MMRAMetadata (I2)))
1960	AllInstsAreIdentical = false;
1961	}
1962
1963	SmallVector<Instruction *, `8`> OtherInsts;
1964	for (auto &SuccIter : OtherSuccIterRange)
1965	OtherInsts.push_back(Elt: &*SuccIter);
1966
1967	// If we are hoisting the terminator instruction, don't move one (making a
1968	// broken BB), instead clone it, and remove BI.
1969	if (HasTerminator) {
1970	// Even if BB, which contains only one unreachable instruction, is ignored
1971	// at the beginning of the loop, we can hoist the terminator instruction.
1972	// If any instructions remain in the block, we cannot hoist terminators.
1973	if (NumSkipped \|\| !AllInstsAreIdentical) {
1974	hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1975	return Changed;
1976	}
1977
1978	return hoistSuccIdenticalTerminatorToSwitchOrIf(
1979	TI, I1, OtherSuccTIs&: OtherInsts, UniqueSuccessors: UniqueSuccessors.getArrayRef()) \|\|
1980	Changed;
1981	}
1982
1983	if (AllInstsAreIdentical) {
1984	unsigned SkipFlagsBB1 = BB1ItrPair.second;
1985	AllInstsAreIdentical =
1986	isSafeToHoistInstr(I: I1, Flags: SkipFlagsBB1) &&
1987	all_of(Range&: OtherSuccIterPairRange, P: [=](const auto &Pair) {
1988	Instruction I2 = &Pair.first;
1989	unsigned SkipFlagsBB2 = Pair.second;
1990	// Even if the instructions are identical, it may not
1991	// be safe to hoist them if we have skipped over
1992	// instructions with side effects or their operands
1993	// weren't hoisted.
1994	return isSafeToHoistInstr(I: I2, Flags: SkipFlagsBB2) &&
1995	shouldHoistCommonInstructions(I1, I2, TTI);
1996	});
1997	}
1998
1999	if (AllInstsAreIdentical) {
2000	BB1ItrPair.first ++;
2001	// For a normal instruction, we just move one to right before the
2002	// branch, then replace all uses of the other with the first. Finally,
2003	// we remove the now redundant second instruction.
2004	hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2005	// We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2006	// and leave any that were not hoisted behind (by calling moveBefore
2007	// rather than moveBeforePreserving).
2008	I1->moveBefore(InsertPos: TI->getIterator());
2009	for (auto &SuccIter : OtherSuccIterRange) {
2010	Instruction I2 = &SuccIter ++;
2011	assert(I2 != I1);
2012	if (!I2->use_empty())
2013	I2->replaceAllUsesWith(V: I1);
2014	I1->andIRFlags(V: I2);
2015	if (auto *CB = dyn_cast<CallBase>(Val: I1)) {
2016	bool Success = CB->tryIntersectAttributes(Other: cast<CallBase>(Val: I2));
2017	assert(Success && "We should not be trying to hoist callbases "
2018	"with non-intersectable attributes");
2019	// For NDEBUG Compile.
2020	(void)Success;
2021	}
2022
2023	combineMetadataForCSE(K: I1, J: I2, DoesKMove: true);
2024	// I1 and I2 are being combined into a single instruction. Its debug
2025	// location is the merged locations of the original instructions.
2026	I1->applyMergedLocation(LocA: I1->getDebugLoc(), LocB: I2->getDebugLoc());
2027	I2->eraseFromParent();
2028	}
2029	if (!Changed)
2030	NumHoistCommonCode += SuccIterPairs.size();
2031	Changed = true;
2032	NumHoistCommonInstrs += SuccIterPairs.size();
2033	} else {
2034	if (NumSkipped >= HoistCommonSkipLimit) {
2035	hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2036	return Changed;
2037	}
2038	// We are about to skip over a pair of non-identical instructions. Record
2039	// if any have characteristics that would prevent reordering instructions
2040	// across them.
2041	for (auto &SuccIterPair : SuccIterPairs) {
2042	Instruction I = &SuccIterPair.first ++;
2043	SuccIterPair.second \|= skippedInstrFlags(I);
2044	}
2045	++NumSkipped;
2046	}
2047	}
2048	}
2049
2050	bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2051	Instruction TI, Instruction I1,
2052	SmallVectorImpl<Instruction *> &OtherSuccTIs,
2053	ArrayRef<BasicBlock *> UniqueSuccessors) {
2054
2055	auto *BI = dyn_cast<CondBrInst>(Val: TI);
2056
2057	bool Changed = false;
2058	BasicBlock *TIParent = TI->getParent();
2059	BasicBlock *BB1 = I1->getParent();
2060
2061	// Use only for an if statement.
2062	auto I2 = OtherSuccTIs.begin();
2063	auto *BB2 = I2->getParent();
2064	if (BI) {
2065	assert(OtherSuccTIs.size() == `1`);
2066	assert(BI->getSuccessor(`0`) == I1->getParent());
2067	assert(BI->getSuccessor(`1`) == I2->getParent());
2068	}
2069
2070	// In the case of an if statement, we try to hoist an invoke.
2071	// FIXME: Can we define a safety predicate for CallBr?
2072	// FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2073	// removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2074	if (isa<InvokeInst>(Val: I1) && (!BI \|\| !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2075	return false;
2076
2077	// TODO: callbr hoisting currently disabled pending further study.
2078	if (isa<CallBrInst>(Val: I1))
2079	return false;
2080
2081	for (BasicBlock *Succ : successors(BB: BB1)) {
2082	for (PHINode &PN : Succ->phis()) {
2083	Value *BB1V = PN.getIncomingValueForBlock(BB: BB1);
2084	for (Instruction *OtherSuccTI : OtherSuccTIs) {
2085	Value *BB2V = PN.getIncomingValueForBlock(BB: OtherSuccTI->getParent());
2086	if (BB1V == BB2V)
2087	continue;
2088
2089	// In the case of an if statement, check for
2090	// passingValueIsAlwaysUndefined here because we would rather eliminate
2091	// undefined control flow then converting it to a select.
2092	if (!BI \|\| passingValueIsAlwaysUndefined(V: BB1V, I: &PN) \|\|
2093	passingValueIsAlwaysUndefined(V: BB2V, I: &PN))
2094	return false;
2095	}
2096	}
2097	}
2098
2099	// Hoist DbgVariableRecords attached to the terminator to match dbg.*
2100	// intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2101	hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts&: OtherSuccTIs);
2102	// Clone the terminator and hoist it into the pred, without any debug info.
2103	Instruction *NT = I1->clone();
2104	NT->insertInto(ParentBB: TIParent, It: TI->getIterator());
2105	if (!NT->getType()->isVoidTy()) {
2106	I1->replaceAllUsesWith(V: NT);
2107	for (Instruction *OtherSuccTI : OtherSuccTIs)
2108	OtherSuccTI->replaceAllUsesWith(V: NT);
2109	NT->takeName(V: I1);
2110	}
2111	Changed = true;
2112	NumHoistCommonInstrs += OtherSuccTIs.size() + `1`;
2113
2114	// Ensure terminator gets a debug location, even an unknown one, in case
2115	// it involves inlinable calls.
2116	SmallVector<DebugLoc, `4`> Locs;
2117	Locs.push_back(Elt: I1->getDebugLoc());
2118	for (auto *OtherSuccTI : OtherSuccTIs)
2119	Locs.push_back(Elt: OtherSuccTI->getDebugLoc());
2120	NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2121
2122	// PHIs created below will adopt NT's merged DebugLoc.
2123	IRBuilder<NoFolder> Builder(NT);
2124
2125	// In the case of an if statement, hoisting one of the terminators from our
2126	// successor is a great thing. Unfortunately, the successors of the if/else
2127	// blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2128	// must agree for all PHI nodes, so we insert select instruction to compute
2129	// the final result.
2130	if (BI) {
2131	std::map<std::pair<Value , Value >, SelectInst *> InsertedSelects;
2132	for (BasicBlock *Succ : successors(BB: BB1)) {
2133	for (PHINode &PN : Succ->phis()) {
2134	Value *BB1V = PN.getIncomingValueForBlock(BB: BB1);
2135	Value *BB2V = PN.getIncomingValueForBlock(BB: BB2);
2136	if (BB1V == BB2V)
2137	continue;
2138
2139	// These values do not agree. Insert a select instruction before NT
2140	// that determines the right value.
2141	SelectInst *&SI = InsertedSelects [std::make_pair(x&: BB1V, y&: BB2V)];
2142	if (!SI) {
2143	// Propagate fast-math-flags from phi node to its replacement select.
2144	SI = cast<SelectInst>(Val: Builder.CreateSelectFMF(
2145	C: BI->getCondition(), True: BB1V, False: BB2V,
2146	FMFSource: isa<FPMathOperator>(Val: PN) ? &PN : nullptr,
2147	Name: BB1V->getName() + "." + BB2V->getName(), MDFrom: BI));
2148	}
2149
2150	// Make the PHI node use the select for all incoming values for BB1/BB2
2151	for (unsigned i = `0`, e = PN.getNumIncomingValues(); i != e; ++i)
2152	if (PN.getIncomingBlock(i) == BB1 \|\| PN.getIncomingBlock(i) == BB2)
2153	PN.setIncomingValue(i, V: SI);
2154	}
2155	}
2156	}
2157
2158	SmallVector<DominatorTree::UpdateType, `4`> Updates;
2159
2160	// Update any PHI nodes in our new successors.
2161	for (BasicBlock *Succ : successors(BB: BB1)) {
2162	addPredecessorToBlock(Succ, NewPred: TIParent, ExistPred: BB1);
2163	if (DTU)
2164	Updates.push_back(Elt: {DominatorTree::Insert, TIParent, Succ});
2165	}
2166
2167	if (DTU) {
2168	// TI might be a switch with multi-cases destination, so we need to care for
2169	// the duplication of successors.
2170	for (BasicBlock *Succ : UniqueSuccessors)
2171	Updates.push_back(Elt: {DominatorTree::Delete, TIParent, Succ});
2172	}
2173
2174	eraseTerminatorAndDCECond(TI);
2175	if (DTU)
2176	DTU->applyUpdates(Updates);
2177	return Changed;
2178	}
2179
2180	// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2181	// into variables.
2182	static bool replacingOperandWithVariableIsCheap(const Instruction *I,
2183	int OpIdx) {
2184	// Divide/Remainder by constant is typically much cheaper than by variable.
2185	if (I->isIntDivRem())
2186	return OpIdx != `1`;
2187	return !isa<IntrinsicInst>(Val: I);
2188	}
2189
2190	// All instructions in Insts belong to different blocks that all unconditionally
2191	// branch to a common successor. Analyze each instruction and return true if it
2192	// would be possible to sink them into their successor, creating one common
2193	// instruction instead. For every value that would be required to be provided by
2194	// PHI node (because an operand varies in each input block), add to PHIOperands.
2195	static bool canSinkInstructions(
2196	ArrayRef<Instruction *> Insts,
2197	DenseMap<const Use , SmallVector<Value , `4`>> &PHIOperands) {
2198	// Prune out obviously bad instructions to move. Each instruction must have
2199	// the same number of uses, and we check later that the uses are consistent.
2200	std::optional<unsigned> NumUses;
2201	for (auto *I : Insts) {
2202	// These instructions may change or break semantics if moved.
2203	if (isa<PHINode>(Val: I) \|\| I->isEHPad() \|\| isa<AllocaInst>(Val: I) \|\|
2204	I->getType()->isTokenTy())
2205	return false;
2206
2207	// Do not try to sink an instruction in an infinite loop - it can cause
2208	// this algorithm to infinite loop.
2209	if (I->getParent()->getSingleSuccessor() == I->getParent())
2210	return false;
2211
2212	// Conservatively return false if I is an inline-asm instruction. Sinking
2213	// and merging inline-asm instructions can potentially create arguments
2214	// that cannot satisfy the inline-asm constraints.
2215	// If the instruction has nomerge or convergent attribute, return false.
2216	if (const auto *C = dyn_cast<CallBase>(Val: I))
2217	if (C->isInlineAsm() \|\| C->cannotMerge() \|\| C->isConvergent())
2218	return false;
2219
2220	if (!NumUses)
2221	NumUses = I->getNumUses();
2222	else if (NumUses != I->getNumUses())
2223	return false;
2224	}
2225
2226	const Instruction *I0 = Insts.front();
2227	const auto I0MMRA = MMRAMetadata (*I0);
2228	for (auto *I : Insts) {
2229	if (!I->isSameOperationAs(I: I0, flags: Instruction::CompareUsingIntersectedAttrs))
2230	return false;
2231
2232	// Treat MMRAs conservatively. This pass can be quite aggressive and
2233	// could drop a lot of MMRAs otherwise.
2234	if (MMRAMetadata (*I) != I0MMRA)
2235	return false;
2236	}
2237
2238	// Uses must be consistent: If I0 is used in a phi node in the sink target,
2239	// then the other phi operands must match the instructions from Insts. This
2240	// also has to hold true for any phi nodes that would be created as a result
2241	// of sinking. Both of these cases are represented by PhiOperands.
2242	for (const Use &U : I0->uses()) {
2243	auto It = PHIOperands.find(Val: &U);
2244	if (It == PHIOperands.end())
2245	// There may be uses in other blocks when sinking into a loop header.
2246	return false;
2247	if (!equal(LRange&: Insts, RRange&: It ->second))
2248	return false;
2249	}
2250
2251	// For calls to be sinkable, they must all be indirect, or have same callee.
2252	// I.e. if we have two direct calls to different callees, we don't want to
2253	// turn that into an indirect call. Likewise, if we have an indirect call,
2254	// and a direct call, we don't actually want to have a single indirect call.
2255	if (isa<CallBase>(Val: I0)) {
2256	auto IsIndirectCall = [](const Instruction *I) {
2257	return cast<CallBase>(Val: I)->isIndirectCall();
2258	};
2259	bool HaveIndirectCalls = any_of(Range&: Insts, P: IsIndirectCall);
2260	bool AllCallsAreIndirect = all_of(Range&: Insts, P: IsIndirectCall);
2261	if (HaveIndirectCalls) {
2262	if (!AllCallsAreIndirect)
2263	return false;
2264	} else {
2265	// All callees must be identical.
2266	Value Callee = nullptr*;
2267	for (const Instruction *I : Insts) {
2268	Value *CurrCallee = cast<CallBase>(Val: I)->getCalledOperand();
2269	if (!Callee)
2270	Callee = CurrCallee;
2271	else if (Callee != CurrCallee)
2272	return false;
2273	}
2274	}
2275	}
2276
2277	for (unsigned OI = `0`, OE = I0->getNumOperands(); OI != OE; ++OI) {
2278	Value *Op = I0->getOperand(i: OI);
2279	auto SameAsI0 = [&I0, OI](const Instruction *I) {
2280	assert(I->getNumOperands() == I0->getNumOperands());
2281	return I->getOperand(i: OI) == I0->getOperand(i: OI);
2282	};
2283	if (!all_of(Range&: Insts, P: SameAsI0)) {
2284	if ((isa<Constant>(Val: Op) && !replacingOperandWithVariableIsCheap(I: I0, OpIdx: OI)) \|\|
2285	!canReplaceOperandWithVariable(I: I0, OpIdx: OI))
2286	// We can't create a PHI from this GEP.
2287	return false;
2288	auto &Ops = PHIOperands [&I0->getOperandUse(i: OI)];
2289	for (auto *I : Insts)
2290	Ops.push_back(Elt: I->getOperand(i: OI));
2291	}
2292	}
2293	return true;
2294	}
2295
2296	// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2297	// instruction of every block in Blocks to their common successor, commoning
2298	// into one instruction.
2299	static void sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
2300	auto *BBEnd = Blocks [`0`]->getTerminator()->getSuccessor(Idx: `0`);
2301
2302	// canSinkInstructions returning true guarantees that every block has at
2303	// least one non-terminator instruction.
2304	SmallVector<Instruction*,`4`> Insts;
2305	for (auto *BB : Blocks) {
2306	Instruction *I = BB->getTerminator();
2307	I = I->getPrevNode();
2308	Insts.push_back(Elt: I);
2309	}
2310
2311	// We don't need to do any more checking here; canSinkInstructions should
2312	// have done it all for us.
2313	SmallVector<Value*, `4`> NewOperands;
2314	Instruction *I0 = Insts.front();
2315	for (unsigned O = `0`, E = I0->getNumOperands(); O != E; ++O) {
2316	// This check is different to that in canSinkInstructions. There, we
2317	// cared about the global view once simplifycfg (and instcombine) have
2318	// completed - it takes into account PHIs that become trivially
2319	// simplifiable. However here we need a more local view; if an operand
2320	// differs we create a PHI and rely on instcombine to clean up the very
2321	// small mess we may make.
2322	bool NeedPHI = any_of(Range&: Insts, P: [&I0, O](const Instruction *I) {
2323	return I->getOperand(i: O) != I0->getOperand(i: O);
2324	});
2325	if (!NeedPHI) {
2326	NewOperands.push_back(Elt: I0->getOperand(i: O));
2327	continue;
2328	}
2329
2330	// Create a new PHI in the successor block and populate it.
2331	auto *Op = I0->getOperand(i: O);
2332	assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2333	auto *PN =
2334	PHINode::Create(Ty: Op->getType(), NumReservedValues: Insts.size(), NameStr: Op->getName() + ".sink");
2335	PN->insertBefore(InsertPos: BBEnd->begin());
2336	for (auto *I : Insts)
2337	PN->addIncoming(V: I->getOperand(i: O), BB: I->getParent());
2338	NewOperands.push_back(Elt: PN);
2339	}
2340
2341	// Arbitrarily use I0 as the new "common" instruction; remap its operands
2342	// and move it to the start of the successor block.
2343	for (unsigned O = `0`, E = I0->getNumOperands(); O != E; ++O)
2344	I0->getOperandUse(i: O).set(NewOperands [O]);
2345
2346	I0->moveBefore(BB&: *BBEnd, I: BBEnd->getFirstInsertionPt());
2347
2348	// Update metadata and IR flags, and merge debug locations.
2349	for (auto *I : Insts)
2350	if (I != I0) {
2351	// The debug location for the "common" instruction is the merged locations
2352	// of all the commoned instructions. We start with the original location
2353	// of the "common" instruction and iteratively merge each location in the
2354	// loop below.
2355	// This is an N-way merge, which will be inefficient if I0 is a CallInst.
2356	// However, as N-way merge for CallInst is rare, so we use simplified API
2357	// instead of using complex API for N-way merge.
2358	I0->applyMergedLocation(LocA: I0->getDebugLoc(), LocB: I->getDebugLoc());
2359	combineMetadataForCSE(K: I0, J: I, DoesKMove: true);
2360	I0->andIRFlags(V: I);
2361	if (auto *CB = dyn_cast<CallBase>(Val: I0)) {
2362	bool Success = CB->tryIntersectAttributes(Other: cast<CallBase>(Val: I));
2363	assert(Success && "We should not be trying to sink callbases "
2364	"with non-intersectable attributes");
2365	// For NDEBUG Compile.
2366	(void)Success;
2367	}
2368	}
2369
2370	for (User *U : make_early_inc_range(Range: I0->users())) {
2371	// canSinkLastInstruction checked that all instructions are only used by
2372	// phi nodes in a way that allows replacing the phi node with the common
2373	// instruction.
2374	auto *PN = cast<PHINode>(Val: U);
2375	PN->replaceAllUsesWith(V: I0);
2376	PN->eraseFromParent();
2377	}
2378
2379	// Finally nuke all instructions apart from the common instruction.
2380	for (auto *I : Insts) {
2381	if (I == I0)
2382	continue;
2383	// The remaining uses are debug users, replace those with the common inst.
2384	// In most (all?) cases this just introduces a use-before-def.
2385	assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2386	I->replaceAllUsesWith(V: I0);
2387	I->eraseFromParent();
2388	}
2389	}
2390
2391	/// Check whether BB's predecessors end with unconditional branches. If it is
2392	/// true, sink any common code from the predecessors to BB.
2393	static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
2394	DomTreeUpdater *DTU) {
2395	// We support two situations:
2396	// (1) all incoming arcs are unconditional
2397	// (2) there are non-unconditional incoming arcs
2398	//
2399	// (2) is very common in switch defaults and
2400	// else-if patterns;
2401	//
2402	// if (a) f(1);
2403	// else if (b) f(2);
2404	//
2405	// produces:
2406	//
2407	// [if]
2408	// / \
2409	// [f(1)] [if]
2410	// \| \| \
2411	// \| \| \|
2412	// \| [f(2)]\|
2413	// \ \| /
2414	// [ end ]
2415	//
2416	// [end] has two unconditional predecessor arcs and one conditional. The
2417	// conditional refers to the implicit empty 'else' arc. This conditional
2418	// arc can also be caused by an empty default block in a switch.
2419	//
2420	// In this case, we attempt to sink code from all unconditional* arcs.*
2421	// If we can sink instructions from these arcs (determined during the scan
2422	// phase below) we insert a common successor for all unconditional arcs and
2423	// connect that to [end], to enable sinking:
2424	//
2425	// [if]
2426	// / \
2427	// [x(1)] [if]
2428	// \| \| \
2429	// \| \| \
2430	// \| [x(2)] \|
2431	// \ / \|
2432	// [sink.split] \|
2433	// \ /
2434	// [ end ]
2435	//
2436	SmallVector<BasicBlock*,`4`> UnconditionalPreds;
2437	bool HaveNonUnconditionalPredecessors = false;
2438	for (auto *PredBB : predecessors(BB)) {
2439	auto *PredBr = dyn_cast<UncondBrInst>(Val: PredBB->getTerminator());
2440	if (PredBr)
2441	UnconditionalPreds.push_back(Elt: PredBB);
2442	else
2443	HaveNonUnconditionalPredecessors = true;
2444	}
2445	if (UnconditionalPreds.size() < `2`)
2446	return false;
2447
2448	// We take a two-step approach to tail sinking. First we scan from the end of
2449	// each block upwards in lockstep. If the n'th instruction from the end of each
2450	// block can be sunk, those instructions are added to ValuesToSink and we
2451	// carry on. If we can sink an instruction but need to PHI-merge some operands
2452	// (because they're not identical in each instruction) we add these to
2453	// PHIOperands.
2454	// We prepopulate PHIOperands with the phis that already exist in BB.
2455	DenseMap<const Use , SmallVector<Value , `4`>> PHIOperands;
2456	for (PHINode &PN : BB->phis()) {
2457	SmallDenseMap<BasicBlock , const* Use *, `4`> IncomingVals;
2458	for (const Use &U : PN.incoming_values())
2459	IncomingVals.insert(KV: {PN.getIncomingBlock(U), &U});
2460	auto &Ops = PHIOperands [IncomingVals [UnconditionalPreds [`0`]]];
2461	for (BasicBlock *Pred : UnconditionalPreds)
2462	Ops.push_back(Elt: *IncomingVals [Pred]);
2463	}
2464
2465	int ScanIdx = `0`;
2466	SmallPtrSet<Value*,`4`> InstructionsToSink;
2467	LockstepReverseIterator<true> LRI(UnconditionalPreds);
2468	while (LRI.isValid() &&
2469	canSinkInstructions(Insts: *LRI, PHIOperands)) {
2470	LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << (LRI)[`0`]
2471	<< "\n");
2472	InstructionsToSink.insert_range(R: *LRI);
2473	++ScanIdx;
2474	--LRI;
2475	}
2476
2477	// If no instructions can be sunk, early-return.
2478	if (ScanIdx == `0`)
2479	return false;
2480
2481	bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2482
2483	if (!followedByDeoptOrUnreachable) {
2484	// Check whether this is the pointer operand of a load/store.
2485	auto IsMemOperand = [](Use &U) {
2486	auto *I = cast<Instruction>(Val: U.getUser());
2487	if (isa<LoadInst>(Val: I))
2488	return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2489	if (isa<StoreInst>(Val: I))
2490	return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2491	return false;
2492	};
2493
2494	// Okay, we could* sink last ScanIdx instructions. But how many can we*
2495	// actually sink before encountering instruction that is unprofitable to
2496	// sink?
2497	auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2498	unsigned NumPHIInsts = `0`;
2499	for (Use &U : (*LRI)[`0`]->operands()) {
2500	auto It = PHIOperands.find(Val: &U);
2501	if (It != PHIOperands.end() && !all_of(Range&: It ->second, P: [&](Value *V) {
2502	return InstructionsToSink.contains(Ptr: V);
2503	})) {
2504	++NumPHIInsts;
2505	// Do not separate a load/store from the gep producing the address.
2506	// The gep can likely be folded into the load/store as an addressing
2507	// mode. Additionally, a load of a gep is easier to analyze than a
2508	// load of a phi.
2509	if (IsMemOperand (U) &&
2510	any_of(Range&: It ->second, P: [](Value V) { return* isa<GEPOperator>(Val: V); }))
2511	return false;
2512	// FIXME: this check is overly optimistic. We may end up not sinking
2513	// said instruction, due to the very same profitability check.
2514	// See @creating_too_many_phis in sink-common-code.ll.
2515	}
2516	}
2517	LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2518	return NumPHIInsts <= `1`;
2519	};
2520
2521	// We've determined that we are going to sink last ScanIdx instructions,
2522	// and recorded them in InstructionsToSink. Now, some instructions may be
2523	// unprofitable to sink. But that determination depends on the instructions
2524	// that we are going to sink.
2525
2526	// First, forward scan: find the first instruction unprofitable to sink,
2527	// recording all the ones that are profitable to sink.
2528	// FIXME: would it be better, after we detect that not all are profitable.
2529	// to either record the profitable ones, or erase the unprofitable ones?
2530	// Maybe we need to choose (at runtime) the one that will touch least
2531	// instrs?
2532	LRI.reset();
2533	int Idx = `0`;
2534	SmallPtrSet<Value *, `4`> InstructionsProfitableToSink;
2535	while (Idx < ScanIdx) {
2536	if (!ProfitableToSinkInstruction (LRI)) {
2537	// Too many PHIs would be created.
2538	LLVM_DEBUG(
2539	dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2540	break;
2541	}
2542	InstructionsProfitableToSink.insert_range(R: *LRI);
2543	--LRI;
2544	++Idx;
2545	}
2546
2547	// If no instructions can be sunk, early-return.
2548	if (Idx == `0`)
2549	return false;
2550
2551	// Did we determine that (only) some instructions are unprofitable to sink?
2552	if (Idx < ScanIdx) {
2553	// Okay, some instructions are unprofitable.
2554	ScanIdx = Idx;
2555	InstructionsToSink = InstructionsProfitableToSink;
2556
2557	// But, that may make other instructions unprofitable, too.
2558	// So, do a backward scan, do any earlier instructions become
2559	// unprofitable?
2560	assert(
2561	!ProfitableToSinkInstruction(LRI) &&
2562	"We already know that the last instruction is unprofitable to sink");
2563	++LRI;
2564	--Idx;
2565	while (Idx >= `0`) {
2566	// If we detect that an instruction becomes unprofitable to sink,
2567	// all earlier instructions won't be sunk either,
2568	// so preemptively keep InstructionsProfitableToSink in sync.
2569	// FIXME: is this the most performant approach?
2570	for (auto I : LRI)
2571	InstructionsProfitableToSink.erase(Ptr: I);
2572	if (!ProfitableToSinkInstruction (LRI)) {
2573	// Everything starting with this instruction won't be sunk.
2574	ScanIdx = Idx;
2575	InstructionsToSink = InstructionsProfitableToSink;
2576	}
2577	++LRI;
2578	--Idx;
2579	}
2580	}
2581
2582	// If no instructions can be sunk, early-return.
2583	if (ScanIdx == `0`)
2584	return false;
2585	}
2586
2587	bool Changed = false;
2588
2589	if (HaveNonUnconditionalPredecessors) {
2590	if (!followedByDeoptOrUnreachable) {
2591	// It is always legal to sink common instructions from unconditional
2592	// predecessors. However, if not all predecessors are unconditional,
2593	// this transformation might be pessimizing. So as a rule of thumb,
2594	// don't do it unless we'd sink at least one non-speculatable instruction.
2595	// See https://bugs.llvm.org/show_bug.cgi?id=30244
2596	LRI.reset();
2597	int Idx = `0`;
2598	bool Profitable = false;
2599	while (Idx < ScanIdx) {
2600	if (!isSafeToSpeculativelyExecute(I: (*LRI)[`0`])) {
2601	Profitable = true;
2602	break;
2603	}
2604	--LRI;
2605	++Idx;
2606	}
2607	if (!Profitable)
2608	return false;
2609	}
2610
2611	LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2612	// We have a conditional edge and we're going to sink some instructions.
2613	// Insert a new block postdominating all blocks we're going to sink from.
2614	if (!SplitBlockPredecessors(BB, Preds: UnconditionalPreds, Suffix: ".sink.split", DTU))
2615	// Edges couldn't be split.
2616	return false;
2617	Changed = true;
2618	}
2619
2620	// Now that we've analyzed all potential sinking candidates, perform the
2621	// actual sink. We iteratively sink the last non-terminator of the source
2622	// blocks into their common successor unless doing so would require too
2623	// many PHI instructions to be generated (currently only one PHI is allowed
2624	// per sunk instruction).
2625	//
2626	// We can use InstructionsToSink to discount values needing PHI-merging that will
2627	// actually be sunk in a later iteration. This allows us to be more
2628	// aggressive in what we sink. This does allow a false positive where we
2629	// sink presuming a later value will also be sunk, but stop half way through
2630	// and never actually sink it which means we produce more PHIs than intended.
2631	// This is unlikely in practice though.
2632	int SinkIdx = `0`;
2633	for (; SinkIdx != ScanIdx; ++SinkIdx) {
2634	LLVM_DEBUG(dbgs() << "SINK: Sink: "
2635	<< *UnconditionalPreds[`0`]->getTerminator()->getPrevNode()
2636	<< "\n");
2637
2638	// Because we've sunk every instruction in turn, the current instruction to
2639	// sink is always at index 0.
2640	LRI.reset();
2641
2642	sinkLastInstruction(Blocks: UnconditionalPreds);
2643	NumSinkCommonInstrs ++;
2644	Changed = true;
2645	}
2646	if (SinkIdx != `0`)
2647	++NumSinkCommonCode;
2648	return Changed;
2649	}
2650
2651	namespace {
2652
2653	struct CompatibleSets {
2654	using SetTy = SmallVector<InvokeInst *, `2`>;
2655
2656	SmallVector<SetTy, `1`> Sets;
2657
2658	static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2659
2660	SetTy &getCompatibleSet(InvokeInst *II);
2661
2662	void insert(InvokeInst *II);
2663	};
2664
2665	CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2666	// Perform a linear scan over all the existing sets, see if the new `invoke`
2667	// is compatible with any particular set. Since we know that all the `invokes`
2668	// within a set are compatible, only check the first `invoke` in each set.
2669	// WARNING: at worst, this has quadratic complexity.
2670	for (CompatibleSets::SetTy &Set : Sets) {
2671	if (CompatibleSets::shouldBelongToSameSet(Invokes: {Set.front(), II}))
2672	return Set;
2673	}
2674
2675	// Otherwise, we either had no sets yet, or this invoke forms a new set.
2676	return Sets.emplace_back();
2677	}
2678
2679	void CompatibleSets::insert(InvokeInst *II) {
2680	getCompatibleSet(II).emplace_back(Args&: II);
2681	}
2682
2683	bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2684	assert(Invokes.size() == `2` && "Always called with exactly two candidates.");
2685
2686	// Can we theoretically merge these `invoke`s?
2687	auto IsIllegalToMerge = [](InvokeInst *II) {
2688	return II->cannotMerge() \|\| II->isInlineAsm();
2689	};
2690	if (any_of(Range&: Invokes, P: IsIllegalToMerge))
2691	return false;
2692
2693	// Either both `invoke`s must be direct,
2694	// or both `invoke`s must be indirect.
2695	auto IsIndirectCall = [](InvokeInst II) { return* II->isIndirectCall(); };
2696	bool HaveIndirectCalls = any_of(Range&: Invokes, P: IsIndirectCall);
2697	bool AllCallsAreIndirect = all_of(Range&: Invokes, P: IsIndirectCall);
2698	if (HaveIndirectCalls) {
2699	if (!AllCallsAreIndirect)
2700	return false;
2701	} else {
2702	// All callees must be identical.
2703	Value Callee = nullptr*;
2704	for (InvokeInst *II : Invokes) {
2705	Value *CurrCallee = II->getCalledOperand();
2706	assert(CurrCallee && "There is always a called operand.");
2707	if (!Callee)
2708	Callee = CurrCallee;
2709	else if (Callee != CurrCallee)
2710	return false;
2711	}
2712	}
2713
2714	// Either both `invoke`s must not have a normal destination,
2715	// or both `invoke`s must have a normal destination,
2716	auto HasNormalDest = [](InvokeInst *II) {
2717	return !isa<UnreachableInst>(Val: II->getNormalDest()->getFirstNonPHIOrDbg());
2718	};
2719	if (any_of(Range&: Invokes, P: HasNormalDest)) {
2720	// Do not merge `invoke` that does not have a normal destination with one
2721	// that does have a normal destination, even though doing so would be legal.
2722	if (!all_of(Range&: Invokes, P: HasNormalDest))
2723	return false;
2724
2725	// All normal destinations must be identical.
2726	BasicBlock NormalBB = nullptr*;
2727	for (InvokeInst *II : Invokes) {
2728	BasicBlock *CurrNormalBB = II->getNormalDest();
2729	assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2730	if (!NormalBB)
2731	NormalBB = CurrNormalBB;
2732	else if (NormalBB != CurrNormalBB)
2733	return false;
2734	}
2735
2736	// In the normal destination, the incoming values for these two `invoke`s
2737	// must be compatible.
2738	SmallPtrSet<Value *, `16`> EquivalenceSet(llvm::from_range, Invokes);
2739	if (!incomingValuesAreCompatible(
2740	BB: NormalBB, IncomingBlocks: {Invokes [`0`]->getParent(), Invokes [`1`]->getParent()},
2741	EquivalenceSet: &EquivalenceSet))
2742	return false;
2743	}
2744
2745	#ifndef NDEBUG
2746	// All unwind destinations must be identical.
2747	// We know that because we have started from said unwind destination.
2748	BasicBlock UnwindBB = nullptr*;
2749	for (InvokeInst *II : Invokes) {
2750	BasicBlock *CurrUnwindBB = II->getUnwindDest();
2751	assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2752	if (!UnwindBB)
2753	UnwindBB = CurrUnwindBB;
2754	else
2755	assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2756	}
2757	#endif
2758
2759	// In the unwind destination, the incoming values for these two `invoke`s
2760	// must be compatible.
2761	if (!incomingValuesAreCompatible(
2762	BB: Invokes.front()->getUnwindDest(),
2763	IncomingBlocks: {Invokes [`0`]->getParent(), Invokes [`1`]->getParent()}))
2764	return false;
2765
2766	// Ignoring arguments, these `invoke`s must be identical,
2767	// including operand bundles.
2768	const InvokeInst *II0 = Invokes.front();
2769	for (auto *II : Invokes.drop_front())
2770	if (!II->isSameOperationAs(I: II0, flags: Instruction::CompareUsingIntersectedAttrs))
2771	return false;
2772
2773	// Can we theoretically form the data operands for the merged `invoke`?
2774	auto IsIllegalToMergeArguments = [](auto Ops) {
2775	Use &U0 = std::get<`0`>(Ops);
2776	Use &U1 = std::get<`1`>(Ops);
2777	if (U0 == U1)
2778	return false;
2779	return !canReplaceOperandWithVariable(I: cast<Instruction>(Val: U0.getUser()),
2780	OpIdx: U0.getOperandNo());
2781	};
2782	assert(Invokes.size() == `2` && "Always called with exactly two candidates.");
2783	if (any_of(Range: zip(t: Invokes [`0`]->data_ops(), u: Invokes [`1`]->data_ops()),
2784	P: IsIllegalToMergeArguments))
2785	return false;
2786
2787	return true;
2788	}
2789
2790	} // namespace
2791
2792	// Merge all invokes in the provided set, all of which are compatible
2793	// as per the `CompatibleSets::shouldBelongToSameSet()`.
2794	static void mergeCompatibleInvokesImpl(ArrayRef<InvokeInst *> Invokes,
2795	DomTreeUpdater *DTU) {
2796	assert(Invokes.size() >= `2` && "Must have at least two invokes to merge.");
2797
2798	SmallVector<DominatorTree::UpdateType, `8`> Updates;
2799	if (DTU)
2800	Updates.reserve(N: `2` + `3` * Invokes.size());
2801
2802	bool HasNormalDest =
2803	!isa<UnreachableInst>(Val: Invokes [`0`]->getNormalDest()->getFirstNonPHIOrDbg());
2804
2805	// Clone one of the invokes into a new basic block.
2806	// Since they are all compatible, it doesn't matter which invoke is cloned.
2807	InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2808	InvokeInst *II0 = Invokes.front();
2809	BasicBlock *II0BB = II0->getParent();
2810	BasicBlock *InsertBeforeBlock =
2811	II0->getParent()->getIterator()->getNextNode();
2812	Function *Func = II0BB->getParent();
2813	LLVMContext &Ctx = II0->getContext();
2814
2815	BasicBlock *MergedInvokeBB = BasicBlock::Create(
2816	Context&: Ctx, Name: II0BB->getName() + ".invoke", Parent: Func, InsertBefore: InsertBeforeBlock);
2817
2818	auto *MergedInvoke = cast<InvokeInst>(Val: II0->clone());
2819	// NOTE: all invokes have the same attributes, so no handling needed.
2820	MergedInvoke->insertInto(ParentBB: MergedInvokeBB, It: MergedInvokeBB->end());
2821
2822	if (!HasNormalDest) {
2823	// This set does not have a normal destination,
2824	// so just form a new block with unreachable terminator.
2825	BasicBlock *MergedNormalDest = BasicBlock::Create(
2826	Context&: Ctx, Name: II0BB->getName() + ".cont", Parent: Func, InsertBefore: InsertBeforeBlock);
2827	auto UI = new* UnreachableInst (Ctx, MergedNormalDest);
2828	UI->setDebugLoc(DebugLoc::getTemporary());
2829	MergedInvoke->setNormalDest(MergedNormalDest);
2830	}
2831
2832	// The unwind destination, however, remainds identical for all invokes here.
2833
2834	return MergedInvoke;
2835	}();
2836
2837	if (DTU) {
2838	// Predecessor blocks that contained these invokes will now branch to
2839	// the new block that contains the merged invoke, ...
2840	for (InvokeInst *II : Invokes)
2841	Updates.push_back(
2842	Elt: {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2843
2844	// ... which has the new `unreachable` block as normal destination,
2845	// or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2846	for (BasicBlock *SuccBBOfMergedInvoke : successors(I: MergedInvoke))
2847	Updates.push_back(Elt: {DominatorTree::Insert, MergedInvoke->getParent(),
2848	SuccBBOfMergedInvoke});
2849
2850	// Since predecessor blocks now unconditionally branch to a new block,
2851	// they no longer branch to their original successors.
2852	for (InvokeInst *II : Invokes)
2853	for (BasicBlock *SuccOfPredBB : successors(BB: II->getParent()))
2854	Updates.push_back(
2855	Elt: {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2856	}
2857
2858	bool IsIndirectCall = Invokes [`0`]->isIndirectCall();
2859
2860	// Form the merged operands for the merged invoke.
2861	for (Use &U : MergedInvoke->operands()) {
2862	// Only PHI together the indirect callees and data operands.
2863	if (MergedInvoke->isCallee(U: &U)) {
2864	if (!IsIndirectCall)
2865	continue;
2866	} else if (!MergedInvoke->isDataOperand(U: &U))
2867	continue;
2868
2869	// Don't create trivial PHI's with all-identical incoming values.
2870	bool NeedPHI = any_of(Range&: Invokes, P: [&U](InvokeInst *II) {
2871	return II->getOperand(i_nocapture: U.getOperandNo()) != U.get();
2872	});
2873	if (!NeedPHI)
2874	continue;
2875
2876	// Form a PHI out of all the data ops under this index.
2877	PHINode *PN = PHINode::Create(
2878	Ty: U ->getType(), /NumReservedValues=/Invokes.size(), NameStr: "", InsertBefore: MergedInvoke->getIterator());
2879	for (InvokeInst *II : Invokes)
2880	PN->addIncoming(V: II->getOperand(i_nocapture: U.getOperandNo()), BB: II->getParent());
2881
2882	U.set(PN);
2883	}
2884
2885	// We've ensured that each PHI node has compatible (identical) incoming values
2886	// when coming from each of the `invoke`s in the current merge set,
2887	// so update the PHI nodes accordingly.
2888	for (BasicBlock *Succ : successors(I: MergedInvoke))
2889	addPredecessorToBlock(Succ, /NewPred=/MergedInvoke->getParent(),
2890	/ExistPred=/Invokes.front()->getParent());
2891
2892	// And finally, replace the original `invoke`s with an unconditional branch
2893	// to the block with the merged `invoke`. Also, give that merged `invoke`
2894	// the merged debugloc of all the original `invoke`s.
2895	DILocation MergedDebugLoc = nullptr*;
2896	for (InvokeInst *II : Invokes) {
2897	// Compute the debug location common to all the original `invoke`s.
2898	if (!MergedDebugLoc)
2899	MergedDebugLoc = II->getDebugLoc();
2900	else
2901	MergedDebugLoc =
2902	DebugLoc::getMergedLocation(LocA: MergedDebugLoc, LocB: II->getDebugLoc());
2903
2904	// And replace the old `invoke` with an unconditionally branch
2905	// to the block with the merged `invoke`.
2906	for (BasicBlock *OrigSuccBB : successors(BB: II->getParent()))
2907	OrigSuccBB->removePredecessor(Pred: II->getParent());
2908	auto *BI = UncondBrInst::Create(IfTrue: MergedInvoke->getParent(), InsertBefore: II->getParent());
2909	// The unconditional branch is part of the replacement for the original
2910	// invoke, so should use its DebugLoc.
2911	BI->setDebugLoc(II->getDebugLoc());
2912	bool Success = MergedInvoke->tryIntersectAttributes(Other: II);
2913	assert(Success && "Merged invokes with incompatible attributes");
2914	// For NDEBUG Compile
2915	(void)Success;
2916	II->replaceAllUsesWith(V: MergedInvoke);
2917	II->eraseFromParent();
2918	++NumInvokesMerged;
2919	}
2920	MergedInvoke->setDebugLoc(MergedDebugLoc);
2921	++NumInvokeSetsFormed;
2922
2923	if (DTU)
2924	DTU->applyUpdates(Updates);
2925	}
2926
2927	/// If this block is a `landingpad` exception handling block, categorize all
2928	/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2929	/// being "mergeable" together, and then merge invokes in each set together.
2930	///
2931	/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2932	/// [...] [...]
2933	/// \| \|
2934	/// [invoke0] [invoke1]
2935	/// / \ / \
2936	/// [cont0] [landingpad] [cont1]
2937	/// to:
2938	/// [...] [...]
2939	/// \ /
2940	/// [invoke]
2941	/// / \
2942	/// [cont] [landingpad]
2943	///
2944	/// But of course we can only do that if the invokes share the `landingpad`,
2945	/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2946	/// and the invoked functions are "compatible".
2947	static bool mergeCompatibleInvokes(BasicBlock BB, DomTreeUpdater DTU) {
2948	if (!EnableMergeCompatibleInvokes)
2949	return false;
2950
2951	bool Changed = false;
2952
2953	// FIXME: generalize to all exception handling blocks?
2954	if (!BB->isLandingPad())
2955	return Changed;
2956
2957	CompatibleSets Grouper;
2958
2959	// Record all the predecessors of this `landingpad`. As per verifier,
2960	// the only allowed predecessor is the unwind edge of an `invoke`.
2961	// We want to group "compatible" `invokes` into the same set to be merged.
2962	for (BasicBlock *PredBB : predecessors(BB))
2963	Grouper.insert(II: cast<InvokeInst>(Val: PredBB->getTerminator()));
2964
2965	// And now, merge `invoke`s that were grouped togeter.
2966	for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2967	if (Invokes.size() < `2`)
2968	continue;
2969	Changed = true;
2970	mergeCompatibleInvokesImpl(Invokes, DTU);
2971	}
2972
2973	return Changed;
2974	}
2975
2976	namespace {
2977	/// Track ephemeral values, which should be ignored for cost-modelling
2978	/// purposes. Requires walking instructions in reverse order.
2979	class EphemeralValueTracker {
2980	SmallPtrSet<const Instruction *, `32`> EphValues;
2981
2982	bool isEphemeral(const Instruction *I) {
2983	if (isa<AssumeInst>(Val: I))
2984	return true;
2985	return !I->mayHaveSideEffects() && !I->isTerminator() &&
2986	all_of(Range: I->users(), P: [&](const User *U) {
2987	return EphValues.count(Ptr: cast<Instruction>(Val: U));
2988	});
2989	}
2990
2991	public:
2992	bool track(const Instruction *I) {
2993	if (isEphemeral(I)) {
2994	EphValues.insert(Ptr: I);
2995	return true;
2996	}
2997	return false;
2998	}
2999
3000	bool contains(const Instruction I) const* { return EphValues.contains(Ptr: I); }
3001	};
3002	} // namespace
3003
3004	/// Determine if we can hoist sink a sole store instruction out of a
3005	/// conditional block.
3006	///
3007	/// We are looking for code like the following:
3008	/// BrBB:
3009	/// store i32 %add, i32 %arrayidx2*
3010	/// ... // No other stores or function calls (we could be calling a memory
3011	/// ... // function).
3012	/// %cmp = icmp ult %x, %y
3013	/// br i1 %cmp, label %EndBB, label %ThenBB
3014	/// ThenBB:
3015	/// store i32 %add5, i32 %arrayidx2*
3016	/// br label EndBB
3017	/// EndBB:
3018	/// ...
3019	/// We are going to transform this into:
3020	/// BrBB:
3021	/// store i32 %add, i32 %arrayidx2*
3022	/// ... //
3023	/// %cmp = icmp ult %x, %y
3024	/// %add.add5 = select i1 %cmp, i32 %add, %add5
3025	/// store i32 %add.add5, i32 %arrayidx2*
3026	/// ...
3027	///
3028	/// \return The pointer to the value of the previous store if the store can be
3029	/// hoisted into the predecessor block. 0 otherwise.
3030	static Value isSafeToSpeculateStore(Instruction I, BasicBlock *BrBB,
3031	BasicBlock StoreBB, BasicBlock EndBB) {
3032	StoreInst *StoreToHoist = dyn_cast<StoreInst>(Val: I);
3033	if (!StoreToHoist)
3034	return nullptr;
3035
3036	// Volatile or atomic.
3037	if (!StoreToHoist->isSimple())
3038	return nullptr;
3039
3040	Value *StorePtr = StoreToHoist->getPointerOperand();
3041	Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3042
3043	// Look for a store to the same pointer in BrBB.
3044	unsigned MaxNumInstToLookAt = `9`;
3045	// Skip pseudo probe intrinsic calls which are not really killing any memory
3046	// accesses.
3047	for (Instruction &CurI : reverse(C: BrBB->instructionsWithoutDebug(SkipPseudoOp: true))) {
3048	if (!MaxNumInstToLookAt)
3049	break;
3050	--MaxNumInstToLookAt;
3051
3052	// Could be calling an instruction that affects memory like free().
3053	if (CurI.mayWriteToMemory() && !isa<StoreInst>(Val: CurI))
3054	return nullptr;
3055
3056	if (auto *SI = dyn_cast<StoreInst>(Val: &CurI)) {
3057	// Found the previous store to same location and type. Make sure it is
3058	// simple, to avoid introducing a spurious non-atomic write after an
3059	// atomic write.
3060	if (SI->getPointerOperand() == StorePtr &&
3061	SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3062	SI->getAlign() >= StoreToHoist->getAlign())
3063	// Found the previous store, return its value operand.
3064	return SI->getValueOperand();
3065	return nullptr; // Unknown store.
3066	}
3067
3068	if (auto *LI = dyn_cast<LoadInst>(Val: &CurI)) {
3069	if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3070	LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3071	Value *Obj = getUnderlyingObject(V: StorePtr);
3072	bool ExplicitlyDereferenceableOnly;
3073	if (isWritableObject(Object: Obj, ExplicitlyDereferenceableOnly) &&
3074	capturesNothing(
3075	CC: PointerMayBeCaptured(V: Obj, /ReturnCaptures=/false,
3076	Mask: CaptureComponents::Provenance)) &&
3077	(!ExplicitlyDereferenceableOnly \|\|
3078	isDereferenceablePointer(V: StorePtr, Ty: StoreTy,
3079	DL: LI->getDataLayout()))) {
3080	// Found a previous load, return it.
3081	return LI;
3082	}
3083	}
3084	// The load didn't work out, but we may still find a store.
3085	}
3086	}
3087
3088	return nullptr;
3089	}
3090
3091	/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3092	/// converted to selects.
3093	static bool validateAndCostRequiredSelects(BasicBlock BB, BasicBlock ThenBB,
3094	BasicBlock *EndBB,
3095	unsigned &SpeculatedInstructions,
3096	InstructionCost &Cost,
3097	const TargetTransformInfo &TTI) {
3098	TargetTransformInfo::TargetCostKind CostKind =
3099	BB->getParent()->hasMinSize()
3100	? TargetTransformInfo::TCK_CodeSize
3101	: TargetTransformInfo::TCK_SizeAndLatency;
3102
3103	bool HaveRewritablePHIs = false;
3104	for (PHINode &PN : EndBB->phis()) {
3105	Value *OrigV = PN.getIncomingValueForBlock(BB);
3106	Value *ThenV = PN.getIncomingValueForBlock(BB: ThenBB);
3107
3108	// FIXME: Try to remove some of the duplication with
3109	// hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3110	if (ThenV == OrigV)
3111	continue;
3112
3113	Cost += TTI.getCmpSelInstrCost(Opcode: Instruction::Select, ValTy: PN.getType(),
3114	CondTy: CmpInst::makeCmpResultType(opnd_type: PN.getType()),
3115	VecPred: CmpInst::BAD_ICMP_PREDICATE, CostKind);
3116
3117	// Don't convert to selects if we could remove undefined behavior instead.
3118	if (passingValueIsAlwaysUndefined(V: OrigV, I: &PN) \|\|
3119	passingValueIsAlwaysUndefined(V: ThenV, I: &PN))
3120	return false;
3121
3122	HaveRewritablePHIs = true;
3123	ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(Val: OrigV);
3124	ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(Val: ThenV);
3125	if (!OrigCE && !ThenCE)
3126	continue; // Known cheap (FIXME: Maybe not true for aggregates).
3127
3128	InstructionCost OrigCost = OrigCE ? computeSpeculationCost(I: OrigCE, TTI) : `0`;
3129	InstructionCost ThenCost = ThenCE ? computeSpeculationCost(I: ThenCE, TTI) : `0`;
3130	InstructionCost MaxCost =
3131	`2` * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3132	if (OrigCost + ThenCost > MaxCost)
3133	return false;
3134
3135	// Account for the cost of an unfolded ConstantExpr which could end up
3136	// getting expanded into Instructions.
3137	// FIXME: This doesn't account for how many operations are combined in the
3138	// constant expression.
3139	++SpeculatedInstructions;
3140	if (SpeculatedInstructions > `1`)
3141	return false;
3142	}
3143
3144	return HaveRewritablePHIs;
3145	}
3146
3147	static bool isProfitableToSpeculate(const CondBrInst *BI,
3148	std::optional<bool> Invert,
3149	const TargetTransformInfo &TTI) {
3150	// If the branch is non-unpredictable, and is predicted to not* branch to*
3151	// the `then` block, then avoid speculating it.
3152	if (BI->getMetadata(KindID: LLVMContext::MD_unpredictable))
3153	return true;
3154
3155	uint64_t TWeight, FWeight;
3156	if (!extractBranchWeights(I: *BI, TrueVal&: TWeight, FalseVal&: FWeight) \|\| (TWeight + FWeight) == `0`)
3157	return true;
3158
3159	if (!Invert.has_value())
3160	return false;
3161
3162	uint64_t EndWeight = *Invert ? TWeight : FWeight;
3163	BranchProbability BIEndProb =
3164	BranchProbability::getBranchProbability(Numerator: EndWeight, Denominator: TWeight + FWeight);
3165	BranchProbability Likely = TTI.getPredictableBranchThreshold();
3166	return BIEndProb < Likely;
3167	}
3168
3169	/// Speculate a conditional basic block flattening the CFG.
3170	///
3171	/// Note that this is a very risky transform currently. Speculating
3172	/// instructions like this is most often not desirable. Instead, there is an MI
3173	/// pass which can do it with full awareness of the resource constraints.
3174	/// However, some cases are "obvious" and we should do directly. An example of
3175	/// this is speculating a single, reasonably cheap instruction.
3176	///
3177	/// There is only one distinct advantage to flattening the CFG at the IR level:
3178	/// it makes very common but simplistic optimizations such as are common in
3179	/// instcombine and the DAG combiner more powerful by removing CFG edges and
3180	/// modeling their effects with easier to reason about SSA value graphs.
3181	///
3182	///
3183	/// An illustration of this transform is turning this IR:
3184	/// \code
3185	/// BB:
3186	/// %cmp = icmp ult %x, %y
3187	/// br i1 %cmp, label %EndBB, label %ThenBB
3188	/// ThenBB:
3189	/// %sub = sub %x, %y
3190	/// br label BB2
3191	/// EndBB:
3192	/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3193	/// ...
3194	/// \endcode
3195	///
3196	/// Into this IR:
3197	/// \code
3198	/// BB:
3199	/// %cmp = icmp ult %x, %y
3200	/// %sub = sub %x, %y
3201	/// %cond = select i1 %cmp, 0, %sub
3202	/// ...
3203	/// \endcode
3204	///
3205	/// \returns true if the conditional block is removed.
3206	bool SimplifyCFGOpt::speculativelyExecuteBB(CondBrInst *BI,
3207	BasicBlock *ThenBB) {
3208	if (!Options.SpeculateBlocks)
3209	return false;
3210
3211	// Be conservative for now. FP select instruction can often be expensive.
3212	Value *BrCond = BI->getCondition();
3213	if (isa<FCmpInst>(Val: BrCond))
3214	return false;
3215
3216	BasicBlock *BB = BI->getParent();
3217	BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(Idx: `0`);
3218	InstructionCost Budget =
3219	PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3220
3221	// If ThenBB is actually on the false edge of the conditional branch, remember
3222	// to swap the select operands later.
3223	bool Invert = false;
3224	if (ThenBB != BI->getSuccessor(i: `0`)) {
3225	assert(ThenBB == BI->getSuccessor(`1`) && "No edge from 'if' block?");
3226	Invert = true;
3227	}
3228	assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3229
3230	if (!isProfitableToSpeculate(BI, Invert, TTI))
3231	return false;
3232
3233	// Keep a count of how many times instructions are used within ThenBB when
3234	// they are candidates for sinking into ThenBB. Specifically:
3235	// - They are defined in BB, and
3236	// - They have no side effects, and
3237	// - All of their uses are in ThenBB.
3238	SmallDenseMap<Instruction , unsigned*, `4`> SinkCandidateUseCounts;
3239
3240	SmallVector<Instruction *, `4`> SpeculatedPseudoProbes;
3241
3242	unsigned SpeculatedInstructions = `0`;
3243	bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3244	SmallVector<Instruction *, `2`> SpeculatedConditionalLoadsStores;
3245	Value SpeculatedStoreValue = nullptr*;
3246	StoreInst SpeculatedStore = nullptr*;
3247	EphemeralValueTracker EphTracker;
3248	for (Instruction &I : reverse(C: drop_end(RangeOrContainer&: *ThenBB))) {
3249	// Skip pseudo probes. The consequence is we lose track of the branch
3250	// probability for ThenBB, which is fine since the optimization here takes
3251	// place regardless of the branch probability.
3252	if (isa<PseudoProbeInst>(Val: I)) {
3253	// The probe should be deleted so that it will not be over-counted when
3254	// the samples collected on the non-conditional path are counted towards
3255	// the conditional path. We leave it for the counts inference algorithm to
3256	// figure out a proper count for an unknown probe.
3257	SpeculatedPseudoProbes.push_back(Elt: &I);
3258	continue;
3259	}
3260
3261	// Ignore ephemeral values, they will be dropped by the transform.
3262	if (EphTracker.track(I: &I))
3263	continue;
3264
3265	// Only speculatively execute a single instruction (not counting the
3266	// terminator) for now.
3267	bool IsSafeCheapLoadStore = HoistLoadsStores &&
3268	isSafeCheapLoadStore(I: &I, TTI) &&
3269	SpeculatedConditionalLoadsStores.size() <
3270	HoistLoadsStoresWithCondFaultingThreshold;
3271	// Not count load/store into cost if target supports conditional faulting
3272	// b/c it's cheap to speculate it.
3273	if (IsSafeCheapLoadStore)
3274	SpeculatedConditionalLoadsStores.push_back(Elt: &I);
3275	else
3276	++SpeculatedInstructions;
3277
3278	if (SpeculatedInstructions > `1`)
3279	return false;
3280
3281	// Don't hoist the instruction if it's unsafe or expensive.
3282	if (!IsSafeCheapLoadStore &&
3283	!isSafeToSpeculativelyExecute(I: &I, CtxI: BI, AC: Options.AC) &&
3284	!(HoistCondStores && !SpeculatedStoreValue &&
3285	(SpeculatedStoreValue =
3286	isSafeToSpeculateStore(I: &I, BrBB: BB, StoreBB: ThenBB, EndBB))))
3287	return false;
3288	if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3289	computeSpeculationCost(I: &I, TTI) >
3290	PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
3291	return false;
3292
3293	// Store the store speculation candidate.
3294	if (!SpeculatedStore && SpeculatedStoreValue)
3295	SpeculatedStore = cast<StoreInst>(Val: &I);
3296
3297	// Do not hoist the instruction if any of its operands are defined but not
3298	// used in BB. The transformation will prevent the operand from
3299	// being sunk into the use block.
3300	for (Use &Op : I.operands()) {
3301	Instruction *OpI = dyn_cast<Instruction>(Val&: Op);
3302	if (!OpI \|\| OpI->getParent() != BB \|\| OpI->mayHaveSideEffects())
3303	continue; // Not a candidate for sinking.
3304
3305	++SinkCandidateUseCounts [OpI];
3306	}
3307	}
3308
3309	// Consider any sink candidates which are only used in ThenBB as costs for
3310	// speculation. Note, while we iterate over a DenseMap here, we are summing
3311	// and so iteration order isn't significant.
3312	for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3313	if (Inst->hasNUses(N: Count)) {
3314	++SpeculatedInstructions;
3315	if (SpeculatedInstructions > `1`)
3316	return false;
3317	}
3318
3319	// Check that we can insert the selects and that it's not too expensive to do
3320	// so.
3321	bool Convert =
3322	SpeculatedStore != nullptr \|\| !SpeculatedConditionalLoadsStores.empty();
3323	InstructionCost Cost = `0`;
3324	Convert \|= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3325	SpeculatedInstructions, Cost, TTI);
3326	if (!Convert \|\| Cost > Budget)
3327	return false;
3328
3329	// If we get here, we can hoist the instruction and if-convert.
3330	LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3331
3332	Instruction Sel = nullptr*;
3333	// Insert a select of the value of the speculated store.
3334	if (SpeculatedStoreValue) {
3335	IRBuilder<NoFolder> Builder(BI);
3336	Value *OrigV = SpeculatedStore->getValueOperand();
3337	Value *TrueV = SpeculatedStore->getValueOperand();
3338	Value *FalseV = SpeculatedStoreValue;
3339	if (Invert)
3340	std::swap(a&: TrueV, b&: FalseV);
3341	Value *S = Builder.CreateSelect(
3342	C: BrCond, True: TrueV, False: FalseV, Name: "spec.store.select", MDFrom: BI);
3343	Sel = cast<Instruction>(Val: S);
3344	SpeculatedStore->setOperand(i_nocapture: `0`, Val_nocapture: S);
3345	SpeculatedStore->applyMergedLocation(LocA: BI->getDebugLoc(),
3346	LocB: SpeculatedStore->getDebugLoc());
3347	// The value stored is still conditional, but the store itself is now
3348	// unconditonally executed, so we must be sure that any linked dbg.assign
3349	// intrinsics are tracking the new stored value (the result of the
3350	// select). If we don't, and the store were to be removed by another pass
3351	// (e.g. DSE), then we'd eventually end up emitting a location describing
3352	// the conditional value, unconditionally.
3353	//
3354	// === Before this transformation ===
3355	// pred:
3356	// store %one, %x.dest, !DIAssignID !1
3357	// dbg.assign %one, "x", ..., !1, ...
3358	// br %cond if.then
3359	//
3360	// if.then:
3361	// store %two, %x.dest, !DIAssignID !2
3362	// dbg.assign %two, "x", ..., !2, ...
3363	//
3364	// === After this transformation ===
3365	// pred:
3366	// store %one, %x.dest, !DIAssignID !1
3367	// dbg.assign %one, "x", ..., !1
3368	/// ...
3369	// %merge = select %cond, %two, %one
3370	// store %merge, %x.dest, !DIAssignID !2
3371	// dbg.assign %merge, "x", ..., !2
3372	for (DbgVariableRecord *DbgAssign :
3373	at::getDVRAssignmentMarkers(Inst: SpeculatedStore))
3374	if (llvm::is_contained(Range: DbgAssign->location_ops(), Element: OrigV))
3375	DbgAssign->replaceVariableLocationOp(OldValue: OrigV, NewValue: S);
3376	}
3377
3378	// Metadata can be dependent on the condition we are hoisting above.
3379	// Strip all UB-implying metadata on the instruction. Drop the debug loc
3380	// to avoid making it appear as if the condition is a constant, which would
3381	// be misleading while debugging.
3382	// Similarly strip attributes that maybe dependent on condition we are
3383	// hoisting above.
3384	for (auto &I : make_early_inc_range(Range&: *ThenBB)) {
3385	if (!SpeculatedStoreValue \|\| &I != SpeculatedStore) {
3386	I.dropLocation();
3387	}
3388	I.dropUBImplyingAttrsAndMetadata();
3389
3390	// Drop ephemeral values.
3391	if (EphTracker.contains(I: &I)) {
3392	I.replaceAllUsesWith(V: PoisonValue::get(T: I.getType()));
3393	I.eraseFromParent();
3394	}
3395	}
3396
3397	// Hoist the instructions.
3398	// Drop DbgVariableRecords attached to these instructions.
3399	for (auto &It : *ThenBB)
3400	for (DbgRecord &DR : make_early_inc_range(Range: It.getDbgRecordRange()))
3401	// Drop all records except assign-kind DbgVariableRecords (dbg.assign
3402	// equivalent).
3403	if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(Val: &DR);
3404	!DVR \|\| !DVR->isDbgAssign())
3405	It.dropOneDbgRecord(I: &DR);
3406	BB->splice(ToIt: BI->getIterator(), FromBB: ThenBB, FromBeginIt: ThenBB->begin(),
3407	FromEndIt: std::prev(x: ThenBB->end()));
3408
3409	if (!SpeculatedConditionalLoadsStores.empty())
3410	hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3411	Sel);
3412
3413	// Insert selects and rewrite the PHI operands.
3414	IRBuilder<NoFolder> Builder(BI);
3415	for (PHINode &PN : EndBB->phis()) {
3416	unsigned OrigI = PN.getBasicBlockIndex(BB);
3417	unsigned ThenI = PN.getBasicBlockIndex(BB: ThenBB);
3418	Value *OrigV = PN.getIncomingValue(i: OrigI);
3419	Value *ThenV = PN.getIncomingValue(i: ThenI);
3420
3421	// Skip PHIs which are trivial.
3422	if (OrigV == ThenV)
3423	continue;
3424
3425	// Create a select whose true value is the speculatively executed value and
3426	// false value is the pre-existing value. Swap them if the branch
3427	// destinations were inverted.
3428	Value TrueV = ThenV, FalseV = OrigV;
3429	if (Invert)
3430	std::swap(a&: TrueV, b&: FalseV);
3431	Value *V = Builder.CreateSelect(C: BrCond, True: TrueV, False: FalseV, Name: "spec.select", MDFrom: BI);
3432	PN.setIncomingValue(i: OrigI, V);
3433	PN.setIncomingValue(i: ThenI, V);
3434	}
3435
3436	// Remove speculated pseudo probes.
3437	for (Instruction *I : SpeculatedPseudoProbes)
3438	I->eraseFromParent();
3439
3440	++NumSpeculations;
3441	return true;
3442	}
3443
3444	using BlocksSet = SmallPtrSet<BasicBlock *, `8`>;
3445
3446	// Return false if number of blocks searched is too much.
3447	static bool findReaching(BasicBlock BB, BasicBlock DefBB,
3448	BlocksSet &ReachesNonLocalUses) {
3449	if (BB == DefBB)
3450	return true;
3451	if (!ReachesNonLocalUses.insert(Ptr: BB).second)
3452	return true;
3453
3454	if (ReachesNonLocalUses.size() > MaxJumpThreadingLiveBlocks)
3455	return false;
3456	for (BasicBlock *Pred : predecessors(BB))
3457	if (!findReaching(BB: Pred, DefBB, ReachesNonLocalUses))
3458	return false;
3459	return true;
3460	}
3461
3462	/// Return true if we can thread a branch across this block.
3463	static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB,
3464	BlocksSet &NonLocalUseBlocks) {
3465	int Size = `0`;
3466	EphemeralValueTracker EphTracker;
3467
3468	// Walk the loop in reverse so that we can identify ephemeral values properly
3469	// (values only feeding assumes).
3470	for (Instruction &I : reverse(C: BB->instructionsWithoutDebug(SkipPseudoOp: false))) {
3471	// Can't fold blocks that contain noduplicate or convergent calls.
3472	if (CallInst *CI = dyn_cast<CallInst>(Val: &I))
3473	if (CI->cannotDuplicate() \|\| CI->isConvergent())
3474	return false;
3475
3476	// Ignore ephemeral values which are deleted during codegen.
3477	// We will delete Phis while threading, so Phis should not be accounted in
3478	// block's size.
3479	if (!EphTracker.track(I: &I) && !isa<PHINode>(Val: I)) {
3480	if (Size++ > MaxSmallBlockSize)
3481	return false; // Don't clone large BB's.
3482	}
3483
3484	// Record blocks with non-local uses of values defined in the current basic
3485	// block.
3486	for (User *U : I.users()) {
3487	Instruction *UI = cast<Instruction>(Val: U);
3488	BasicBlock *UsedInBB = UI->getParent();
3489	if (UsedInBB == BB) {
3490	if (isa<PHINode>(Val: UI))
3491	return false;
3492	} else
3493	NonLocalUseBlocks.insert(Ptr: UsedInBB);
3494	}
3495
3496	// Looks ok, continue checking.
3497	}
3498
3499	return true;
3500	}
3501
3502	static ConstantInt getKnownValueOnEdge(Value V, BasicBlock *From,
3503	BasicBlock *To) {
3504	// Don't look past the block defining the value, we might get the value from
3505	// a previous loop iteration.
3506	auto *I = dyn_cast<Instruction>(Val: V);
3507	if (I && I->getParent() == To)
3508	return nullptr;
3509
3510	// We know the value if the From block branches on it.
3511	auto *BI = dyn_cast<CondBrInst>(Val: From->getTerminator());
3512	if (BI && BI->getCondition() == V &&
3513	BI->getSuccessor(i: `0`) != BI->getSuccessor(i: `1`))
3514	return BI->getSuccessor(i: `0`) == To ? ConstantInt::getTrue(Context&: BI->getContext())
3515	: ConstantInt::getFalse(Context&: BI->getContext());
3516
3517	return nullptr;
3518	}
3519
3520	/// If we have a conditional branch on something for which we know the constant
3521	/// value in predecessors (e.g. a phi node in the current block), thread edges
3522	/// from the predecessor to their ultimate destination.
3523	static std::optional<bool>
3524	foldCondBranchOnValueKnownInPredecessorImpl(CondBrInst BI, DomTreeUpdater DTU,
3525	const DataLayout &DL,
3526	AssumptionCache *AC) {
3527	SmallMapVector<ConstantInt , SmallSetVector<BasicBlock , `2`>, `2`> KnownValues;
3528	BasicBlock *BB = BI->getParent();
3529	Value *Cond = BI->getCondition();
3530	PHINode *PN = dyn_cast<PHINode>(Val: Cond);
3531	if (PN && PN->getParent() == BB) {
3532	// Degenerate case of a single entry PHI.
3533	if (PN->getNumIncomingValues() == `1`) {
3534	FoldSingleEntryPHINodes(BB: PN->getParent());
3535	return true;
3536	}
3537
3538	for (Use &U : PN->incoming_values())
3539	if (auto *CB = dyn_cast<ConstantInt>(Val&: U))
3540	KnownValues [CB].insert(X: PN->getIncomingBlock(U));
3541	} else {
3542	for (BasicBlock *Pred : predecessors(BB)) {
3543	if (ConstantInt *CB = getKnownValueOnEdge(V: Cond, From: Pred, To: BB))
3544	KnownValues [CB].insert(X: Pred);
3545	}
3546	}
3547
3548	if (KnownValues.empty())
3549	return false;
3550
3551	// Now we know that this block has multiple preds and two succs.
3552	// Check that the block is small enough and record which non-local blocks use
3553	// values defined in the block.
3554
3555	BlocksSet NonLocalUseBlocks;
3556	BlocksSet ReachesNonLocalUseBlocks;
3557	if (!blockIsSimpleEnoughToThreadThrough(BB, NonLocalUseBlocks))
3558	return false;
3559
3560	// Jump-threading can only be done to destinations where no values defined
3561	// in BB are live.
3562
3563	// Quickly check if both destinations have uses. If so, jump-threading cannot
3564	// be done.
3565	if (NonLocalUseBlocks.contains(Ptr: BI->getSuccessor(i: `0`)) &&
3566	NonLocalUseBlocks.contains(Ptr: BI->getSuccessor(i: `1`)))
3567	return false;
3568
3569	// Search backward from NonLocalUseBlocks to find which blocks
3570	// reach non-local uses.
3571	for (BasicBlock *UseBB : NonLocalUseBlocks)
3572	// Give up if too many blocks are searched.
3573	if (!findReaching(BB: UseBB, DefBB: BB, ReachesNonLocalUses&: ReachesNonLocalUseBlocks))
3574	return false;
3575
3576	for (const auto &Pair : KnownValues) {
3577	ConstantInt *CB = Pair.first;
3578	ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3579	BasicBlock *RealDest = BI->getSuccessor(i: !CB->getZExtValue());
3580
3581	// Okay, we now know that all edges from PredBB should be revectored to
3582	// branch to RealDest.
3583	if (RealDest == BB)
3584	continue; // Skip self loops.
3585
3586	// Skip if the predecessor's terminator is an indirect branch.
3587	if (any_of(Range&: PredBBs, P: [](BasicBlock *PredBB) {
3588	return isa<IndirectBrInst>(Val: PredBB->getTerminator());
3589	}))
3590	continue;
3591
3592	// Only revector to RealDest if no values defined in BB are live.
3593	if (ReachesNonLocalUseBlocks.contains(Ptr: RealDest))
3594	continue;
3595
3596	LLVM_DEBUG({
3597	dbgs() << "Condition " << *Cond << " in " << BB->getName()
3598	<< " has value " << *Pair.first << " in predecessors:\n";
3599	for (const BasicBlock *PredBB : Pair.second)
3600	dbgs() << " " << PredBB->getName() << "\n";
3601	dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3602	});
3603
3604	// Split the predecessors we are threading into a new edge block. We'll
3605	// clone the instructions into this block, and then redirect it to RealDest.
3606	BasicBlock *EdgeBB = SplitBlockPredecessors(BB, Preds: PredBBs, Suffix: ".critedge", DTU);
3607	if (!EdgeBB)
3608	continue;
3609
3610	// TODO: These just exist to reduce test diff, we can drop them if we like.
3611	EdgeBB->setName(RealDest->getName() + ".critedge");
3612	EdgeBB->moveBefore(MovePos: RealDest);
3613
3614	// Update PHI nodes.
3615	addPredecessorToBlock(Succ: RealDest, NewPred: EdgeBB, ExistPred: BB);
3616
3617	// BB may have instructions that are being threaded over. Clone these
3618	// instructions into EdgeBB. We know that there will be no uses of the
3619	// cloned instructions outside of EdgeBB.
3620	BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3621	ValueToValueMapTy TranslateMap; // Track translated values.
3622	TranslateMap [Cond] = CB;
3623
3624	// RemoveDIs: track instructions that we optimise away while folding, so
3625	// that we can copy DbgVariableRecords from them later.
3626	BasicBlock::iterator SrcDbgCursor = BB->begin();
3627	for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3628	if (PHINode *PN = dyn_cast<PHINode>(Val&: BBI)) {
3629	TranslateMap [PN] = PN->getIncomingValueForBlock(BB: EdgeBB);
3630	continue;
3631	}
3632	// Clone the instruction.
3633	Instruction *N = BBI ->clone();
3634	// Insert the new instruction into its new home.
3635	N->insertInto(ParentBB: EdgeBB, It: InsertPt);
3636
3637	if (BBI ->hasName())
3638	N->setName(BBI ->getName() + ".c");
3639
3640	// Update operands due to translation.
3641	// Key Instructions: Remap all the atom groups.
3642	if (const DebugLoc &DL = BBI ->getDebugLoc())
3643	mapAtomInstance(DL, VMap&: TranslateMap);
3644	RemapInstruction(I: N, VM&: TranslateMap,
3645	Flags: RF_IgnoreMissingLocals \| RF_NoModuleLevelChanges);
3646
3647	// Check for trivial simplification.
3648	if (Value V = simplifyInstruction(I: N, Q: {DL, nullptr, nullptr*, AC})) {
3649	if (!BBI ->use_empty())
3650	TranslateMap [&*BBI] = V;
3651	if (!N->mayHaveSideEffects()) {
3652	N->eraseFromParent(); // Instruction folded away, don't need actual
3653	// inst
3654	N = nullptr;
3655	}
3656	} else {
3657	if (!BBI ->use_empty())
3658	TranslateMap [&*BBI] = N;
3659	}
3660	if (N) {
3661	// Copy all debug-info attached to instructions from the last we
3662	// successfully clone, up to this instruction (they might have been
3663	// folded away).
3664	for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3665	N->cloneDebugInfoFrom(From: &*SrcDbgCursor);
3666	SrcDbgCursor = std::next(x: BBI);
3667	// Clone debug-info on this instruction too.
3668	N->cloneDebugInfoFrom(From: &*BBI);
3669
3670	// Register the new instruction with the assumption cache if necessary.
3671	if (auto *Assume = dyn_cast<AssumeInst>(Val: N))
3672	if (AC)
3673	AC->registerAssumption(CI: Assume);
3674	}
3675	}
3676
3677	for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3678	InsertPt ->cloneDebugInfoFrom(From: &*SrcDbgCursor);
3679	InsertPt ->cloneDebugInfoFrom(From: BI);
3680
3681	BB->removePredecessor(Pred: EdgeBB);
3682	UncondBrInst *EdgeBI = cast<UncondBrInst>(Val: EdgeBB->getTerminator());
3683	EdgeBI->setSuccessor(idx: `0`, NewSucc: RealDest);
3684	EdgeBI->setDebugLoc(BI->getDebugLoc());
3685
3686	if (DTU) {
3687	SmallVector<DominatorTree::UpdateType, `2`> Updates;
3688	Updates.push_back(Elt: {DominatorTree::Delete, EdgeBB, BB});
3689	Updates.push_back(Elt: {DominatorTree::Insert, EdgeBB, RealDest});
3690	DTU->applyUpdates(Updates);
3691	}
3692
3693	// For simplicity, we created a separate basic block for the edge. Merge
3694	// it back into the predecessor if possible. This not only avoids
3695	// unnecessary SimplifyCFG iterations, but also makes sure that we don't
3696	// bypass the check for trivial cycles above.
3697	MergeBlockIntoPredecessor(BB: EdgeBB, DTU);
3698
3699	// Signal repeat, simplifying any other constants.
3700	return std::nullopt;
3701	}
3702
3703	return false;
3704	}
3705
3706	bool SimplifyCFGOpt::foldCondBranchOnValueKnownInPredecessor(CondBrInst *BI) {
3707	// Note: If BB is a loop header then there is a risk that threading introduces
3708	// a non-canonical loop by moving a back edge. So we avoid this optimization
3709	// for loop headers if NeedCanonicalLoop is set.
3710	if (Options.NeedCanonicalLoop && is_contained(Range&: LoopHeaders, Element: BI->getParent()))
3711	return false;
3712
3713	std::optional<bool> Result;
3714	bool EverChanged = false;
3715	do {
3716	// Note that None means "we changed things, but recurse further."
3717	Result =
3718	foldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC: Options.AC);
3719	EverChanged \|= Result == std::nullopt \|\| *Result;
3720	} while (Result == std::nullopt);
3721	return EverChanged;
3722	}
3723
3724	/// Given a BB that starts with the specified two-entry PHI node,
3725	/// see if we can eliminate it.
3726	static bool foldTwoEntryPHINode(PHINode PN, const* TargetTransformInfo &TTI,
3727	DomTreeUpdater DTU, AssumptionCache AC,
3728	const DataLayout &DL,
3729	bool SpeculateUnpredictables) {
3730	// Ok, this is a two entry PHI node. Check to see if this is a simple "if
3731	// statement", which has a very simple dominance structure. Basically, we
3732	// are trying to find the condition that is being branched on, which
3733	// subsequently causes this merge to happen. We really want control
3734	// dependence information for this check, but simplifycfg can't keep it up
3735	// to date, and this catches most of the cases we care about anyway.
3736	BasicBlock *BB = PN->getParent();
3737
3738	BasicBlock IfTrue, IfFalse;
3739	CondBrInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3740	if (!DomBI)
3741	return false;
3742	Value *IfCond = DomBI->getCondition();
3743	// Don't bother if the branch will be constant folded trivially.
3744	if (isa<ConstantInt>(Val: IfCond))
3745	return false;
3746
3747	BasicBlock *DomBlock = DomBI->getParent();
3748	SmallVector<BasicBlock *, `2`> IfBlocks;
3749	llvm::copy_if(Range: PN->blocks(), Out: std::back_inserter(x&: IfBlocks),
3750	P: [](BasicBlock *IfBlock) {
3751	return isa<UncondBrInst>(Val: IfBlock->getTerminator());
3752	});
3753	assert((IfBlocks.size() == `1` \|\| IfBlocks.size() == `2`) &&
3754	"Will have either one or two blocks to speculate.");
3755
3756	// If the branch is non-unpredictable, see if we either predictably jump to
3757	// the merge bb (if we have only a single 'then' block), or if we predictably
3758	// jump to one specific 'then' block (if we have two of them).
3759	// It isn't beneficial to speculatively execute the code
3760	// from the block that we know is predictably not entered.
3761	bool IsUnpredictable = DomBI->getMetadata(KindID: LLVMContext::MD_unpredictable);
3762	if (!IsUnpredictable) {
3763	uint64_t TWeight, FWeight;
3764	if (extractBranchWeights(I: *DomBI, TrueVal&: TWeight, FalseVal&: FWeight) &&
3765	(TWeight + FWeight) != `0`) {
3766	BranchProbability BITrueProb =
3767	BranchProbability::getBranchProbability(Numerator: TWeight, Denominator: TWeight + FWeight);
3768	BranchProbability Likely = TTI.getPredictableBranchThreshold();
3769	BranchProbability BIFalseProb = BITrueProb.getCompl();
3770	if (IfBlocks.size() == `1`) {
3771	BranchProbability BIBBProb =
3772	DomBI->getSuccessor(i: `0`) == BB ? BITrueProb : BIFalseProb;
3773	if (BIBBProb >= Likely)
3774	return false;
3775	} else {
3776	if (BITrueProb >= Likely \|\| BIFalseProb >= Likely)
3777	return false;
3778	}
3779	}
3780	}
3781
3782	// Don't try to fold an unreachable block. For example, the phi node itself
3783	// can't be the candidate if-condition for a select that we want to form.
3784	if (auto *IfCondPhiInst = dyn_cast<PHINode>(Val: IfCond))
3785	if (IfCondPhiInst->getParent() == BB)
3786	return false;
3787
3788	// Okay, we found that we can merge this two-entry phi node into a select.
3789	// Doing so would require us to fold all* two entry phi nodes in this block.*
3790	// At some point this becomes non-profitable (particularly if the target
3791	// doesn't support cmov's). Only do this transformation if there are two or
3792	// fewer PHI nodes in this block.
3793	unsigned NumPhis = `0`;
3794	for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(Val: I); ++NumPhis, ++I)
3795	if (NumPhis > `2`)
3796	return false;
3797
3798	// Loop over the PHI's seeing if we can promote them all to select
3799	// instructions. While we are at it, keep track of the instructions
3800	// that need to be moved to the dominating block.
3801	SmallPtrSet<Instruction *, `4`> AggressiveInsts;
3802	SmallPtrSet<Instruction *, `2`> ZeroCostInstructions;
3803	InstructionCost Cost = `0`;
3804	InstructionCost Budget =
3805	TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3806	if (SpeculateUnpredictables && IsUnpredictable)
3807	Budget += TTI.getBranchMispredictPenalty();
3808
3809	bool Changed = false;
3810	for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(Val: II);) {
3811	PHINode *PN = cast<PHINode>(Val: II ++);
3812	if (Value *V = simplifyInstruction(I: PN, Q: {DL, PN})) {
3813	PN->replaceAllUsesWith(V);
3814	PN->eraseFromParent();
3815	Changed = true;
3816	continue;
3817	}
3818
3819	if (!dominatesMergePoint(V: PN->getIncomingValue(i: `0`), BB, InsertPt: DomBI,
3820	AggressiveInsts, Cost, Budget, TTI, AC,
3821	ZeroCostInstructions) \|\|
3822	!dominatesMergePoint(V: PN->getIncomingValue(i: `1`), BB, InsertPt: DomBI,
3823	AggressiveInsts, Cost, Budget, TTI, AC,
3824	ZeroCostInstructions))
3825	return Changed;
3826	}
3827
3828	// If we folded the first phi, PN dangles at this point. Refresh it. If
3829	// we ran out of PHIs then we simplified them all.
3830	PN = dyn_cast<PHINode>(Val: BB->begin());
3831	if (!PN)
3832	return true;
3833
3834	// Don't fold i1 branches on PHIs which contain binary operators or
3835	// (possibly inverted) select form of or/ands if their parameters are
3836	// an equality test.
3837	auto IsBinOpOrAndEq = [](Value *V) {
3838	CmpPredicate Pred;
3839	if (match(V, P: m_CombineOr(
3840	L: m_CombineOr(
3841	L: m_BinOp(L: m_Cmp(Pred, L: m_Value(), R: m_Value()), R: m_Value()),
3842	R: m_BinOp(L: m_Value(), R: m_Cmp(Pred, L: m_Value(), R: m_Value()))),
3843	R: m_c_Select(L: m_ImmConstant(),
3844	R: m_Cmp(Pred, L: m_Value(), R: m_Value()))))) {
3845	return CmpInst::isEquality(pred: Pred);
3846	}
3847	return false;
3848	};
3849	if (PN->getType()->isIntegerTy(Bitwidth: `1`) &&
3850	(IsBinOpOrAndEq (PN->getIncomingValue(i: `0`)) \|\|
3851	IsBinOpOrAndEq (PN->getIncomingValue(i: `1`)) \|\| IsBinOpOrAndEq (IfCond)))
3852	return Changed;
3853
3854	// If all PHI nodes are promotable, check to make sure that all instructions
3855	// in the predecessor blocks can be promoted as well. If not, we won't be able
3856	// to get rid of the control flow, so it's not worth promoting to select
3857	// instructions.
3858	for (BasicBlock *IfBlock : IfBlocks)
3859	for (BasicBlock::iterator I = IfBlock->begin(); !I ->isTerminator(); ++I)
3860	if (!AggressiveInsts.count(Ptr: &*I) && !I ->isDebugOrPseudoInst()) {
3861	// This is not an aggressive instruction that we can promote.
3862	// Because of this, we won't be able to get rid of the control flow, so
3863	// the xform is not worth it.
3864	return Changed;
3865	}
3866
3867	// If either of the blocks has it's address taken, we can't do this fold.
3868	if (any_of(Range&: IfBlocks,
3869	P: [](BasicBlock IfBlock) { return* IfBlock->hasAddressTaken(); }))
3870	return Changed;
3871
3872	LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3873	if (IsUnpredictable) dbgs() << " (unpredictable)";
3874	dbgs() << " T: " << IfTrue->getName()
3875	<< " F: " << IfFalse->getName() << "\n");
3876
3877	// If we can still promote the PHI nodes after this gauntlet of tests,
3878	// do all of the PHI's now.
3879
3880	// Move all 'aggressive' instructions, which are defined in the
3881	// conditional parts of the if's up to the dominating block.
3882	for (BasicBlock *IfBlock : IfBlocks)
3883	hoistAllInstructionsInto(DomBlock, InsertPt: DomBI, BB: IfBlock);
3884
3885	IRBuilder<NoFolder> Builder(DomBI);
3886	// Propagate fast-math-flags from phi nodes to replacement selects.
3887	while (PHINode *PN = dyn_cast<PHINode>(Val: BB->begin())) {
3888	// Change the PHI node into a select instruction.
3889	Value *TrueVal = PN->getIncomingValueForBlock(BB: IfTrue);
3890	Value *FalseVal = PN->getIncomingValueForBlock(BB: IfFalse);
3891
3892	Value *Sel = Builder.CreateSelectFMF(C: IfCond, True: TrueVal, False: FalseVal,
3893	FMFSource: isa<FPMathOperator>(Val: PN) ? PN : nullptr,
3894	Name: "", MDFrom: DomBI);
3895	PN->replaceAllUsesWith(V: Sel);
3896	Sel->takeName(V: PN);
3897	PN->eraseFromParent();
3898	}
3899
3900	// At this point, all IfBlocks are empty, so our if statement
3901	// has been flattened. Change DomBlock to jump directly to our new block to
3902	// avoid other simplifycfg's kicking in on the diamond.
3903	Builder.CreateBr(Dest: BB);
3904
3905	SmallVector<DominatorTree::UpdateType, `3`> Updates;
3906	if (DTU) {
3907	Updates.push_back(Elt: {DominatorTree::Insert, DomBlock, BB});
3908	for (auto *Successor : successors(BB: DomBlock))
3909	Updates.push_back(Elt: {DominatorTree::Delete, DomBlock, Successor});
3910	}
3911
3912	DomBI->eraseFromParent();
3913	if (DTU)
3914	DTU->applyUpdates(Updates);
3915
3916	return true;
3917	}
3918
3919	static Value *createLogicalOp(IRBuilderBase &Builder,
3920	Instruction::BinaryOps Opc, Value *LHS,
3921	Value RHS, const* Twine &Name = "") {
3922	// Try to relax logical op to binary op.
3923	if (impliesPoison(ValAssumedPoison: RHS, V: LHS))
3924	return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3925	if (Opc == Instruction::And)
3926	return Builder.CreateLogicalAnd(Cond1: LHS, Cond2: RHS, Name);
3927	if (Opc == Instruction::Or)
3928	return Builder.CreateLogicalOr(Cond1: LHS, Cond2: RHS, Name);
3929	llvm_unreachable("Invalid logical opcode");
3930	}
3931
3932	/// Return true if either PBI or BI has branch weight available, and store
3933	/// the weights in {Pred\|Succ}{True\|False}Weight. If one of PBI and BI does
3934	/// not have branch weight, use 1:1 as its weight.
3935	static bool extractPredSuccWeights(CondBrInst PBI, CondBrInst BI,
3936	uint64_t &PredTrueWeight,
3937	uint64_t &PredFalseWeight,
3938	uint64_t &SuccTrueWeight,
3939	uint64_t &SuccFalseWeight) {
3940	bool PredHasWeights =
3941	extractBranchWeights(I: *PBI, TrueVal&: PredTrueWeight, FalseVal&: PredFalseWeight);
3942	bool SuccHasWeights =
3943	extractBranchWeights(I: *BI, TrueVal&: SuccTrueWeight, FalseVal&: SuccFalseWeight);
3944	if (PredHasWeights \|\| SuccHasWeights) {
3945	if (!PredHasWeights)
3946	PredTrueWeight = PredFalseWeight = `1`;
3947	if (!SuccHasWeights)
3948	SuccTrueWeight = SuccFalseWeight = `1`;
3949	return true;
3950	} else {
3951	return false;
3952	}
3953	}
3954
3955	/// Determine if the two branches share a common destination and deduce a glue
3956	/// that joins the branches' conditions to arrive at the common destination if
3957	/// that would be profitable.
3958	static std::optional<std::tuple<BasicBlock , Instruction::BinaryOps, bool*>>
3959	shouldFoldCondBranchesToCommonDestination(CondBrInst BI, CondBrInst PBI,
3960	const TargetTransformInfo *TTI) {
3961	assert(BI && PBI && "Both blocks must end with a conditional branches.");
3962	assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
3963	"PredBB must be a predecessor of BB.");
3964
3965	// We have the potential to fold the conditions together, but if the
3966	// predecessor branch is predictable, we may not want to merge them.
3967	uint64_t PTWeight, PFWeight;
3968	BranchProbability PBITrueProb, Likely;
3969	if (TTI && !PBI->getMetadata(KindID: LLVMContext::MD_unpredictable) &&
3970	extractBranchWeights(I: *PBI, TrueVal&: PTWeight, FalseVal&: PFWeight) &&
3971	(PTWeight + PFWeight) != `0`) {
3972	PBITrueProb =
3973	BranchProbability::getBranchProbability(Numerator: PTWeight, Denominator: PTWeight + PFWeight);
3974	Likely = TTI->getPredictableBranchThreshold();
3975	}
3976
3977	if (PBI->getSuccessor(i: `0`) == BI->getSuccessor(i: `0`)) {
3978	// Speculate the 2nd condition unless the 1st is probably true.
3979	if (PBITrueProb.isUnknown() \|\| PBITrueProb < Likely)
3980	return {{BI->getSuccessor(i: `0`), Instruction::Or, false}};
3981	} else if (PBI->getSuccessor(i: `1`) == BI->getSuccessor(i: `1`)) {
3982	// Speculate the 2nd condition unless the 1st is probably false.
3983	if (PBITrueProb.isUnknown() \|\| PBITrueProb.getCompl() < Likely)
3984	return {{BI->getSuccessor(i: `1`), Instruction::And, false}};
3985	} else if (PBI->getSuccessor(i: `0`) == BI->getSuccessor(i: `1`)) {
3986	// Speculate the 2nd condition unless the 1st is probably true.
3987	if (PBITrueProb.isUnknown() \|\| PBITrueProb < Likely)
3988	return {{BI->getSuccessor(i: `1`), Instruction::And, true}};
3989	} else if (PBI->getSuccessor(i: `1`) == BI->getSuccessor(i: `0`)) {
3990	// Speculate the 2nd condition unless the 1st is probably false.
3991	if (PBITrueProb.isUnknown() \|\| PBITrueProb.getCompl() < Likely)
3992	return {{BI->getSuccessor(i: `0`), Instruction::Or, true}};
3993	}
3994	return std::nullopt;
3995	}
3996
3997	static bool performBranchToCommonDestFolding(CondBrInst BI, CondBrInst PBI,
3998	DomTreeUpdater *DTU,
3999	MemorySSAUpdater *MSSAU,
4000	const TargetTransformInfo *TTI) {
4001	BasicBlock *BB = BI->getParent();
4002	BasicBlock *PredBlock = PBI->getParent();
4003
4004	// Determine if the two branches share a common destination.
4005	BasicBlock *CommonSucc;
4006	Instruction::BinaryOps Opc;
4007	bool InvertPredCond;
4008	std::tie(args&: CommonSucc, args&: Opc, args&: InvertPredCond) =
4009	*shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI);
4010
4011	LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << PBI << BB);
4012
4013	IRBuilder<> Builder(PBI);
4014	// The builder is used to create instructions to eliminate the branch in BB.
4015	// If BB's terminator has !annotation metadata, add it to the new
4016	// instructions.
4017	Builder.CollectMetadataToCopy(Src: BB->getTerminator(),
4018	MetadataKinds: {LLVMContext::MD_annotation});
4019
4020	// If we need to invert the condition in the pred block to match, do so now.
4021	if (InvertPredCond) {
4022	InvertBranch(PBI, Builder);
4023	}
4024
4025	BasicBlock *UniqueSucc =
4026	PBI->getSuccessor(i: `0`) == BB ? BI->getSuccessor(i: `0`) : BI->getSuccessor(i: `1`);
4027
4028	// Before cloning instructions, notify the successor basic block that it
4029	// is about to have a new predecessor. This will update PHI nodes,
4030	// which will allow us to update live-out uses of bonus instructions.
4031	addPredecessorToBlock(Succ: UniqueSucc, NewPred: PredBlock, ExistPred: BB, MSSAU);
4032
4033	// Try to update branch weights.
4034	uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4035	SmallVector<uint64_t, `2`> MDWeights;
4036	if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4037	SuccTrueWeight, SuccFalseWeight)) {
4038
4039	if (PBI->getSuccessor(i: `0`) == BB) {
4040	// PBI: br i1 %x, BB, FalseDest
4041	// BI: br i1 %y, UniqueSucc, FalseDest
4042	// TrueWeight is TrueWeight for PBI TrueWeight for BI.*
4043	MDWeights.push_back(Elt: PredTrueWeight * SuccTrueWeight);
4044	// FalseWeight is FalseWeight for PBI TotalWeight for BI +*
4045	// TrueWeight for PBI FalseWeight for BI.*
4046	// We assume that total weights of a CondBrInst can fit into 32 bits.
4047	// Therefore, we will not have overflow using 64-bit arithmetic.
4048	MDWeights.push_back(Elt: PredFalseWeight * (SuccFalseWeight + SuccTrueWeight) +
4049	PredTrueWeight * SuccFalseWeight);
4050	} else {
4051	// PBI: br i1 %x, TrueDest, BB
4052	// BI: br i1 %y, TrueDest, UniqueSucc
4053	// TrueWeight is TrueWeight for PBI TotalWeight for BI +*
4054	// FalseWeight for PBI TrueWeight for BI.*
4055	MDWeights.push_back(Elt: PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4056	PredFalseWeight * SuccTrueWeight);
4057	// FalseWeight is FalseWeight for PBI FalseWeight for BI.*
4058	MDWeights.push_back(Elt: PredFalseWeight * SuccFalseWeight);
4059	}
4060
4061	setFittedBranchWeights(I&: PBI, Weights: MDWeights, /IsExpected=/*false,
4062	/ElideAllZero=/true);
4063
4064	// TODO: If BB is reachable from all paths through PredBlock, then we
4065	// could replace PBI's branch probabilities with BI's.
4066	} else
4067	PBI->setMetadata(KindID: LLVMContext::MD_prof, Node: nullptr);
4068
4069	// Now, update the CFG.
4070	PBI->setSuccessor(idx: PBI->getSuccessor(i: `0`) != BB, NewSucc: UniqueSucc);
4071
4072	if (DTU)
4073	DTU->applyUpdates(Updates: {{DominatorTree::Insert, PredBlock, UniqueSucc},
4074	{DominatorTree::Delete, PredBlock, BB}});
4075
4076	// If BI was a loop latch, it may have had associated loop metadata.
4077	// We need to copy it to the new latch, that is, PBI.
4078	if (MDNode *LoopMD = BI->getMetadata(KindID: LLVMContext::MD_loop))
4079	PBI->setMetadata(KindID: LLVMContext::MD_loop, Node: LoopMD);
4080
4081	ValueToValueMapTy VMap; // maps original values to cloned values
4082	cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap);
4083
4084	Module *M = BB->getModule();
4085
4086	PredBlock->getTerminator()->cloneDebugInfoFrom(From: BB->getTerminator());
4087	for (DbgVariableRecord &DVR :
4088	filterDbgVars(R: PredBlock->getTerminator()->getDbgRecordRange())) {
4089	RemapDbgRecord(M, DR: &DVR, VM&: VMap,
4090	Flags: RF_NoModuleLevelChanges \| RF_IgnoreMissingLocals);
4091	}
4092
4093	// Now that the Cond was cloned into the predecessor basic block,
4094	// or/and the two conditions together.
4095	Value *BICond = VMap [BI->getCondition()];
4096	PBI->setCondition(
4097	createLogicalOp(Builder, Opc, LHS: PBI->getCondition(), RHS: BICond, Name: "or.cond"));
4098	if (!ProfcheckDisableMetadataFixes)
4099	if (auto *SI = dyn_cast<SelectInst>(Val: PBI->getCondition()))
4100	if (!MDWeights.empty()) {
4101	assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4102	setFittedBranchWeights(I&: *SI, Weights: {MDWeights [`0`], MDWeights [`1`]},
4103	/IsExpected=/false, /ElideAllZero=/true);
4104	}
4105
4106	++NumFoldBranchToCommonDest;
4107	return true;
4108	}
4109
4110	/// Return if an instruction's type or any of its operands' types are a vector
4111	/// type.
4112	static bool isVectorOp(Instruction &I) {
4113	return I.getType()->isVectorTy() \|\| any_of(Range: I.operands(), P: [](Use &U) {
4114	return U ->getType()->isVectorTy();
4115	});
4116	}
4117
4118	/// If this basic block is simple enough, and if a predecessor branches to us
4119	/// and one of our successors, fold the block into the predecessor and use
4120	/// logical operations to pick the right destination.
4121	bool llvm::foldBranchToCommonDest(CondBrInst BI, DomTreeUpdater DTU,
4122	MemorySSAUpdater *MSSAU,
4123	const TargetTransformInfo *TTI,
4124	unsigned BonusInstThreshold) {
4125	BasicBlock *BB = BI->getParent();
4126	TargetTransformInfo::TargetCostKind CostKind =
4127	BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize
4128	: TargetTransformInfo::TCK_SizeAndLatency;
4129
4130	Instruction *Cond = dyn_cast<Instruction>(Val: BI->getCondition());
4131
4132	if (!Cond \|\| !isa<CmpInst, BinaryOperator, SelectInst, TruncInst>(Val: Cond) \|\|
4133	Cond->getParent() != BB \|\| !Cond->hasOneUse())
4134	return false;
4135
4136	// Finally, don't infinitely unroll conditional loops.
4137	if (is_contained(Range: successors(BB), Element: BB))
4138	return false;
4139
4140	// With which predecessors will we want to deal with?
4141	SmallVector<BasicBlock *, `8`> Preds;
4142	for (BasicBlock *PredBlock : predecessors(BB)) {
4143	CondBrInst *PBI = dyn_cast<CondBrInst>(Val: PredBlock->getTerminator());
4144
4145	// Check that we have two conditional branches. If there is a PHI node in
4146	// the common successor, verify that the same value flows in from both
4147	// blocks.
4148	if (!PBI \|\| !safeToMergeTerminators(SI1: BI, SI2: PBI))
4149	continue;
4150
4151	// Determine if the two branches share a common destination.
4152	BasicBlock *CommonSucc;
4153	Instruction::BinaryOps Opc;
4154	bool InvertPredCond;
4155	if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4156	std::tie(args&: CommonSucc, args&: Opc, args&: InvertPredCond) = *Recipe;
4157	else
4158	continue;
4159
4160	// Check the cost of inserting the necessary logic before performing the
4161	// transformation.
4162	if (TTI) {
4163	Type *Ty = BI->getCondition()->getType();
4164	InstructionCost Cost = TTI->getArithmeticInstrCost(Opcode: Opc, Ty, CostKind);
4165	if (InvertPredCond && (!PBI->getCondition()->hasOneUse() \|\|
4166	!isa<CmpInst>(Val: PBI->getCondition())))
4167	Cost += TTI->getArithmeticInstrCost(Opcode: Instruction::Xor, Ty, CostKind);
4168
4169	if (Cost > BranchFoldThreshold)
4170	continue;
4171	}
4172
4173	// Ok, we do want to deal with this predecessor. Record it.
4174	Preds.emplace_back(Args&: PredBlock);
4175	}
4176
4177	// If there aren't any predecessors into which we can fold,
4178	// don't bother checking the cost.
4179	if (Preds.empty())
4180	return false;
4181
4182	// Only allow this transformation if computing the condition doesn't involve
4183	// too many instructions and these involved instructions can be executed
4184	// unconditionally. We denote all involved instructions except the condition
4185	// as "bonus instructions", and only allow this transformation when the
4186	// number of the bonus instructions we'll need to create when cloning into
4187	// each predecessor does not exceed a certain threshold.
4188	unsigned NumBonusInsts = `0`;
4189	bool SawVectorOp = false;
4190	const unsigned PredCount = Preds.size();
4191	for (Instruction &I : *BB) {
4192	// Don't check the branch condition comparison itself.
4193	if (&I == Cond)
4194	continue;
4195	// Ignore the terminator.
4196	if (isa<UncondBrInst, CondBrInst>(Val: I))
4197	continue;
4198	// I must be safe to execute unconditionally.
4199	if (!isSafeToSpeculativelyExecute(I: &I))
4200	return false;
4201	SawVectorOp \|= isVectorOp(I);
4202
4203	// Account for the cost of duplicating this instruction into each
4204	// predecessor. Ignore free instructions.
4205	if (!TTI \|\| TTI->getInstructionCost(U: &I, CostKind) !=
4206	TargetTransformInfo::TCC_Free) {
4207	NumBonusInsts += PredCount;
4208
4209	// Early exits once we reach the limit.
4210	if (NumBonusInsts >
4211	BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4212	return false;
4213	}
4214
4215	auto IsBCSSAUse = [BB, &I](Use &U) {
4216	auto *UI = cast<Instruction>(Val: U.getUser());
4217	if (auto *PN = dyn_cast<PHINode>(Val: UI))
4218	return PN->getIncomingBlock(U) == BB;
4219	return UI->getParent() == BB && I.comesBefore(Other: UI);
4220	};
4221
4222	// Does this instruction require rewriting of uses?
4223	if (!all_of(Range: I.uses(), P: IsBCSSAUse))
4224	return false;
4225	}
4226	if (NumBonusInsts >
4227	BonusInstThreshold *
4228	(SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : `1`))
4229	return false;
4230
4231	// Ok, we have the budget. Perform the transformation.
4232	for (BasicBlock *PredBlock : Preds) {
4233	auto *PBI = cast<CondBrInst>(Val: PredBlock->getTerminator());
4234	return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4235	}
4236	return false;
4237	}
4238
4239	// If there is only one store in BB1 and BB2, return it, otherwise return
4240	// nullptr.
4241	static StoreInst findUniqueStoreInBlocks(BasicBlock BB1, BasicBlock *BB2) {
4242	StoreInst S = nullptr*;
4243	for (auto *BB : {BB1, BB2}) {
4244	if (!BB)
4245	continue;
4246	for (auto &I : *BB)
4247	if (auto *SI = dyn_cast<StoreInst>(Val: &I)) {
4248	if (S)
4249	// Multiple stores seen.
4250	return nullptr;
4251	else
4252	S = SI;
4253	}
4254	}
4255	return S;
4256	}
4257
4258	static Value ensureValueAvailableInSuccessor(Value V, BasicBlock *BB,
4259	Value AlternativeV = nullptr*) {
4260	// PHI is going to be a PHI node that allows the value V that is defined in
4261	// BB to be referenced in BB's only successor.
4262	//
4263	// If AlternativeV is nullptr, the only value we care about in PHI is V. It
4264	// doesn't matter to us what the other operand is (it'll never get used). We
4265	// could just create a new PHI with an undef incoming value, but that could
4266	// increase register pressure if EarlyCSE/InstCombine can't fold it with some
4267	// other PHI. So here we directly look for some PHI in BB's successor with V
4268	// as an incoming operand. If we find one, we use it, else we create a new
4269	// one.
4270	//
4271	// If AlternativeV is not nullptr, we care about both incoming values in PHI.
4272	// PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4273	// where OtherBB is the single other predecessor of BB's only successor.
4274	PHINode PHI = nullptr*;
4275	BasicBlock *Succ = BB->getSingleSuccessor();
4276
4277	for (auto I = Succ->begin(); isa<PHINode>(Val: I); ++I)
4278	if (cast<PHINode>(Val&: I)->getIncomingValueForBlock(BB) == V) {
4279	PHI = cast<PHINode>(Val&: I);
4280	if (!AlternativeV)
4281	break;
4282
4283	assert(Succ->hasNPredecessors(`2`));
4284	auto PredI = pred_begin(BB: Succ);
4285	BasicBlock OtherPredBB = PredI == BB ? ++PredI : PredI;
4286	if (PHI->getIncomingValueForBlock(BB: OtherPredBB) == AlternativeV)
4287	break;
4288	PHI = nullptr;
4289	}
4290	if (PHI)
4291	return PHI;
4292
4293	// If V is not an instruction defined in BB, just return it.
4294	if (!AlternativeV &&
4295	(!isa<Instruction>(Val: V) \|\| cast<Instruction>(Val: V)->getParent() != BB))
4296	return V;
4297
4298	PHI = PHINode::Create(Ty: V->getType(), NumReservedValues: `2`, NameStr: "simplifycfg.merge");
4299	PHI->insertBefore(InsertPos: Succ->begin());
4300	PHI->addIncoming(V, BB);
4301	for (BasicBlock *PredBB : predecessors(BB: Succ))
4302	if (PredBB != BB)
4303	PHI->addIncoming(
4304	V: AlternativeV ? AlternativeV : PoisonValue::get(T: V->getType()), BB: PredBB);
4305	return PHI;
4306	}
4307
4308	static bool mergeConditionalStoreToAddress(
4309	BasicBlock PTB, BasicBlock PFB, BasicBlock QTB, BasicBlock QFB,
4310	BasicBlock PostBB, Value Address, bool InvertPCond, bool InvertQCond,
4311	DomTreeUpdater DTU, const* DataLayout &DL, const TargetTransformInfo &TTI) {
4312	// For every pointer, there must be exactly two stores, one coming from
4313	// PTB or PFB, and the other from QTB or QFB. We don't support more than one
4314	// store (to any address) in PTB,PFB or QTB,QFB.
4315	// FIXME: We could relax this restriction with a bit more work and performance
4316	// testing.
4317	StoreInst *PStore = findUniqueStoreInBlocks(BB1: PTB, BB2: PFB);
4318	StoreInst *QStore = findUniqueStoreInBlocks(BB1: QTB, BB2: QFB);
4319	if (!PStore \|\| !QStore)
4320	return false;
4321
4322	// Now check the stores are compatible.
4323	if (!QStore->isUnordered() \|\| !PStore->isUnordered() \|\|
4324	PStore->getValueOperand()->getType() !=
4325	QStore->getValueOperand()->getType())
4326	return false;
4327
4328	// Check that sinking the store won't cause program behavior changes. Sinking
4329	// the store out of the Q blocks won't change any behavior as we're sinking
4330	// from a block to its unconditional successor. But we're moving a store from
4331	// the P blocks down through the middle block (QBI) and past both QFB and QTB.
4332	// So we need to check that there are no aliasing loads or stores in
4333	// QBI, QTB and QFB. We also need to check there are no conflicting memory
4334	// operations between PStore and the end of its parent block.
4335	//
4336	// The ideal way to do this is to query AliasAnalysis, but we don't
4337	// preserve AA currently so that is dangerous. Be super safe and just
4338	// check there are no other memory operations at all.
4339	for (auto &I : *QFB->getSinglePredecessor())
4340	if (I.mayReadOrWriteMemory())
4341	return false;
4342	for (auto &I : *QFB)
4343	if (&I != QStore && I.mayReadOrWriteMemory())
4344	return false;
4345	if (QTB)
4346	for (auto &I : *QTB)
4347	if (&I != QStore && I.mayReadOrWriteMemory())
4348	return false;
4349	for (auto I = BasicBlock::iterator (PStore), E = PStore->getParent()->end();
4350	I != E; ++I)
4351	if (&*I != PStore && I ->mayReadOrWriteMemory())
4352	return false;
4353
4354	// If we're not in aggressive mode, we only optimize if we have some
4355	// confidence that by optimizing we'll allow P and/or Q to be if-converted.
4356	auto IsWorthwhile = [&](BasicBlock BB, ArrayRef<StoreInst > FreeStores) {
4357	if (!BB)
4358	return true;
4359	// Heuristic: if the block can be if-converted/phi-folded and the
4360	// instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4361	// thread this store.
4362	InstructionCost Cost = `0`;
4363	InstructionCost Budget =
4364	PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
4365	for (auto &I : BB->instructionsWithoutDebug(SkipPseudoOp: false)) {
4366	// Consider terminator instruction to be free.
4367	if (I.isTerminator())
4368	continue;
4369	// If this is one the stores that we want to speculate out of this BB,
4370	// then don't count it's cost, consider it to be free.
4371	if (auto *S = dyn_cast<StoreInst>(Val: &I))
4372	if (llvm::find(Range&: FreeStores, Val: S))
4373	continue;
4374	// Else, we have a white-list of instructions that we are ak speculating.
4375	if (!isa<BinaryOperator>(Val: I) && !isa<GetElementPtrInst>(Val: I))
4376	return false; // Not in white-list - not worthwhile folding.
4377	// And finally, if this is a non-free instruction that we are okay
4378	// speculating, ensure that we consider the speculation budget.
4379	Cost +=
4380	TTI.getInstructionCost(U: &I, CostKind: TargetTransformInfo::TCK_SizeAndLatency);
4381	if (Cost > Budget)
4382	return false; // Eagerly refuse to fold as soon as we're out of budget.
4383	}
4384	assert(Cost <= Budget &&
4385	"When we run out of budget we will eagerly return from within the "
4386	"per-instruction loop.");
4387	return true;
4388	};
4389
4390	const std::array<StoreInst *, `2`> FreeStores = {PStore, QStore};
4391	if (!MergeCondStoresAggressively &&
4392	(!IsWorthwhile (PTB, FreeStores) \|\| !IsWorthwhile (PFB, FreeStores) \|\|
4393	!IsWorthwhile (QTB, FreeStores) \|\| !IsWorthwhile (QFB, FreeStores)))
4394	return false;
4395
4396	// If PostBB has more than two predecessors, we need to split it so we can
4397	// sink the store.
4398	if (std::next(x: pred_begin(BB: PostBB), n: `2`) != pred_end(BB: PostBB)) {
4399	// We know that QFB's only successor is PostBB. And QFB has a single
4400	// predecessor. If QTB exists, then its only successor is also PostBB.
4401	// If QTB does not exist, then QFB's only predecessor has a conditional
4402	// branch to QFB and PostBB.
4403	BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4404	BasicBlock *NewBB =
4405	SplitBlockPredecessors(BB: PostBB, Preds: {QFB, TruePred}, Suffix: "condstore.split", DTU);
4406	if (!NewBB)
4407	return false;
4408	PostBB = NewBB;
4409	}
4410
4411	// OK, we're going to sink the stores to PostBB. The store has to be
4412	// conditional though, so first create the predicate.
4413	CondBrInst *PBranch =
4414	cast<CondBrInst>(Val: PFB->getSinglePredecessor()->getTerminator());
4415	CondBrInst *QBranch =
4416	cast<CondBrInst>(Val: QFB->getSinglePredecessor()->getTerminator());
4417	Value *PCond = PBranch->getCondition();
4418	Value *QCond = QBranch->getCondition();
4419
4420	Value *PPHI = ensureValueAvailableInSuccessor(V: PStore->getValueOperand(),
4421	BB: PStore->getParent());
4422	Value *QPHI = ensureValueAvailableInSuccessor(V: QStore->getValueOperand(),
4423	BB: QStore->getParent(), AlternativeV: PPHI);
4424
4425	BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4426	IRBuilder<> QB(PostBB, PostBBFirst);
4427	QB.SetCurrentDebugLocation(PostBBFirst ->getStableDebugLoc());
4428
4429	InvertPCond ^= (PStore->getParent() != PTB);
4430	InvertQCond ^= (QStore->getParent() != QTB);
4431	Value *PPred = InvertPCond ? QB.CreateNot(V: PCond) : PCond;
4432	Value *QPred = InvertQCond ? QB.CreateNot(V: QCond) : QCond;
4433
4434	Value *CombinedPred = QB.CreateOr(LHS: PPred, RHS: QPred);
4435
4436	BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4437	auto *T = SplitBlockAndInsertIfThen(Cond: CombinedPred, SplitBefore: InsertPt,
4438	/Unreachable=/false,
4439	/BranchWeights=/nullptr, DTU);
4440	if (hasBranchWeightMD(I: PBranch) && hasBranchWeightMD(I: QBranch) &&
4441	!ProfcheckDisableMetadataFixes) {
4442	SmallVector<uint32_t, `2`> PWeights, QWeights;
4443	extractBranchWeights(I: *PBranch, Weights&: PWeights);
4444	extractBranchWeights(I: *QBranch, Weights&: QWeights);
4445	if (InvertPCond)
4446	std::swap(a&: PWeights [`0`], b&: PWeights [`1`]);
4447	if (InvertQCond)
4448	std::swap(a&: QWeights [`0`], b&: QWeights [`1`]);
4449	auto CombinedWeights = getDisjunctionWeights(B1: PWeights, B2: QWeights);
4450	setFittedBranchWeights(I&: *PostBB->getTerminator(),
4451	Weights: {CombinedWeights [`0`], CombinedWeights [`1`]},
4452	/IsExpected=/false, /ElideAllZero=/true);
4453	}
4454
4455	QB.SetInsertPoint(T);
4456	StoreInst *SI = cast<StoreInst>(Val: QB.CreateStore(Val: QPHI, Ptr: Address));
4457	SI->setAAMetadata(PStore->getAAMetadata().merge(Other: QStore->getAAMetadata()));
4458	// Choose the minimum alignment. If we could prove both stores execute, we
4459	// could use biggest one. In this case, though, we only know that one of the
4460	// stores executes. And we don't know it's safe to take the alignment from a
4461	// store that doesn't execute.
4462	SI->setAlignment(std::min(a: PStore->getAlign(), b: QStore->getAlign()));
4463
4464	QStore->eraseFromParent();
4465	PStore->eraseFromParent();
4466
4467	return true;
4468	}
4469
4470	static bool mergeConditionalStores(CondBrInst PBI, CondBrInst QBI,
4471	DomTreeUpdater DTU, const* DataLayout &DL,
4472	const TargetTransformInfo &TTI) {
4473	// The intention here is to find diamonds or triangles (see below) where each
4474	// conditional block contains a store to the same address. Both of these
4475	// stores are conditional, so they can't be unconditionally sunk. But it may
4476	// be profitable to speculatively sink the stores into one merged store at the
4477	// end, and predicate the merged store on the union of the two conditions of
4478	// PBI and QBI.
4479	//
4480	// This can reduce the number of stores executed if both of the conditions are
4481	// true, and can allow the blocks to become small enough to be if-converted.
4482	// This optimization will also chain, so that ladders of test-and-set
4483	// sequences can be if-converted away.
4484	//
4485	// We only deal with simple diamonds or triangles:
4486	//
4487	// PBI or PBI or a combination of the two
4488	// / \ \| \
4489	// PTB PFB \| PFB
4490	// \ / \| /
4491	// QBI QBI
4492	// / \ \| \
4493	// QTB QFB \| QFB
4494	// \ / \| /
4495	// PostBB PostBB
4496	//
4497	// We model triangles as a type of diamond with a nullptr "true" block.
4498	// Triangles are canonicalized so that the fallthrough edge is represented by
4499	// a true condition, as in the diagram above.
4500	BasicBlock *PTB = PBI->getSuccessor(i: `0`);
4501	BasicBlock *PFB = PBI->getSuccessor(i: `1`);
4502	BasicBlock *QTB = QBI->getSuccessor(i: `0`);
4503	BasicBlock *QFB = QBI->getSuccessor(i: `1`);
4504	BasicBlock *PostBB = QFB->getSingleSuccessor();
4505
4506	// Make sure we have a good guess for PostBB. If QTB's only successor is
4507	// QFB, then QFB is a better PostBB.
4508	if (QTB->getSingleSuccessor() == QFB)
4509	PostBB = QFB;
4510
4511	// If we couldn't find a good PostBB, stop.
4512	if (!PostBB)
4513	return false;
4514
4515	bool InvertPCond = false, InvertQCond = false;
4516	// Canonicalize fallthroughs to the true branches.
4517	if (PFB == QBI->getParent()) {
4518	std::swap(a&: PFB, b&: PTB);
4519	InvertPCond = true;
4520	}
4521	if (QFB == PostBB) {
4522	std::swap(a&: QFB, b&: QTB);
4523	InvertQCond = true;
4524	}
4525
4526	// From this point on we can assume PTB or QTB may be fallthroughs but PFB
4527	// and QFB may not. Model fallthroughs as a nullptr block.
4528	if (PTB == QBI->getParent())
4529	PTB = nullptr;
4530	if (QTB == PostBB)
4531	QTB = nullptr;
4532
4533	// Legality bailouts. We must have at least the non-fallthrough blocks and
4534	// the post-dominating block, and the non-fallthroughs must only have one
4535	// predecessor.
4536	auto HasOnePredAndOneSucc = [](BasicBlock BB, BasicBlock P, BasicBlock *S) {
4537	return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4538	};
4539	if (!HasOnePredAndOneSucc (PFB, PBI->getParent(), QBI->getParent()) \|\|
4540	!HasOnePredAndOneSucc (QFB, QBI->getParent(), PostBB))
4541	return false;
4542	if ((PTB && !HasOnePredAndOneSucc (PTB, PBI->getParent(), QBI->getParent())) \|\|
4543	(QTB && !HasOnePredAndOneSucc (QTB, QBI->getParent(), PostBB)))
4544	return false;
4545	if (!QBI->getParent()->hasNUses(N: `2`))
4546	return false;
4547
4548	// OK, this is a sequence of two diamonds or triangles.
4549	// Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4550	SmallPtrSet<Value *, `4`> PStoreAddresses, QStoreAddresses;
4551	for (auto *BB : {PTB, PFB}) {
4552	if (!BB)
4553	continue;
4554	for (auto &I : *BB)
4555	if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I))
4556	PStoreAddresses.insert(Ptr: SI->getPointerOperand());
4557	}
4558	for (auto *BB : {QTB, QFB}) {
4559	if (!BB)
4560	continue;
4561	for (auto &I : *BB)
4562	if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I))
4563	QStoreAddresses.insert(Ptr: SI->getPointerOperand());
4564	}
4565
4566	set_intersect(S1&: PStoreAddresses, S2: QStoreAddresses);
4567	// set_intersect mutates PStoreAddresses in place. Rename it here to make it
4568	// clear what it contains.
4569	auto &CommonAddresses = PStoreAddresses;
4570
4571	bool Changed = false;
4572	for (auto *Address : CommonAddresses)
4573	Changed \|=
4574	mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4575	InvertPCond, InvertQCond, DTU, DL, TTI);
4576	return Changed;
4577	}
4578
4579	/// If the previous block ended with a widenable branch, determine if reusing
4580	/// the target block is profitable and legal. This will have the effect of
4581	/// "widening" PBI, but doesn't require us to reason about hosting safety.
4582	static bool tryWidenCondBranchToCondBranch(CondBrInst PBI, CondBrInst BI,
4583	DomTreeUpdater *DTU) {
4584	// TODO: This can be generalized in two important ways:
4585	// 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4586	// values from the PBI edge.
4587	// 2) We can sink side effecting instructions into BI's fallthrough
4588	// successor provided they doesn't contribute to computation of
4589	// BI's condition.
4590	BasicBlock *IfTrueBB = PBI->getSuccessor(i: `0`);
4591	BasicBlock *IfFalseBB = PBI->getSuccessor(i: `1`);
4592	if (!isWidenableBranch(U: PBI) \|\| IfTrueBB != BI->getParent() \|\|
4593	!BI->getParent()->getSinglePredecessor())
4594	return false;
4595	if (!IfFalseBB->phis().empty())
4596	return false; // TODO
4597	// This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4598	// may undo the transform done here.
4599	// TODO: There might be a more fine-grained solution to this.
4600	if (!llvm::succ_empty(BB: IfFalseBB))
4601	return false;
4602	// Use lambda to lazily compute expensive condition after cheap ones.
4603	auto NoSideEffects = [](BasicBlock &BB) {
4604	return llvm::none_of(Range&: BB, P: [](const Instruction &I) {
4605	return I.mayWriteToMemory() \|\| I.mayHaveSideEffects();
4606	});
4607	};
4608	if (BI->getSuccessor(i: `1`) != IfFalseBB && // no inf looping
4609	BI->getSuccessor(i: `1`)->getTerminatingDeoptimizeCall() && // profitability
4610	NoSideEffects (*BI->getParent())) {
4611	auto *OldSuccessor = BI->getSuccessor(i: `1`);
4612	OldSuccessor->removePredecessor(Pred: BI->getParent());
4613	BI->setSuccessor(idx: `1`, NewSucc: IfFalseBB);
4614	if (DTU)
4615	DTU->applyUpdates(
4616	Updates: {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4617	{DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4618	return true;
4619	}
4620	if (BI->getSuccessor(i: `0`) != IfFalseBB && // no inf looping
4621	BI->getSuccessor(i: `0`)->getTerminatingDeoptimizeCall() && // profitability
4622	NoSideEffects (*BI->getParent())) {
4623	auto *OldSuccessor = BI->getSuccessor(i: `0`);
4624	OldSuccessor->removePredecessor(Pred: BI->getParent());
4625	BI->setSuccessor(idx: `0`, NewSucc: IfFalseBB);
4626	if (DTU)
4627	DTU->applyUpdates(
4628	Updates: {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4629	{DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4630	return true;
4631	}
4632	return false;
4633	}
4634
4635	/// If we have a conditional branch as a predecessor of another block,
4636	/// this function tries to simplify it. We know
4637	/// that PBI and BI are both conditional branches, and BI is in one of the
4638	/// successor blocks of PBI - PBI branches to BI.
4639	static bool SimplifyCondBranchToCondBranch(CondBrInst PBI, CondBrInst BI,
4640	DomTreeUpdater *DTU,
4641	const DataLayout &DL,
4642	const TargetTransformInfo &TTI) {
4643	BasicBlock *BB = BI->getParent();
4644
4645	// If this block ends with a branch instruction, and if there is a
4646	// predecessor that ends on a branch of the same condition, make
4647	// this conditional branch redundant.
4648	if (PBI->getCondition() == BI->getCondition() &&
4649	PBI->getSuccessor(i: `0`) != PBI->getSuccessor(i: `1`)) {
4650	// Okay, the outcome of this conditional branch is statically
4651	// knowable. If this block had a single pred, handle specially, otherwise
4652	// foldCondBranchOnValueKnownInPredecessor() will handle it.
4653	if (BB->getSinglePredecessor()) {
4654	// Turn this into a branch on constant.
4655	bool CondIsTrue = PBI->getSuccessor(i: `0`) == BB;
4656	BI->setCondition(
4657	ConstantInt::get(Ty: Type::getInt1Ty(C&: BB->getContext()), V: CondIsTrue));
4658	return true; // Nuke the branch on constant.
4659	}
4660	}
4661
4662	// If the previous block ended with a widenable branch, determine if reusing
4663	// the target block is profitable and legal. This will have the effect of
4664	// "widening" PBI, but doesn't require us to reason about hosting safety.
4665	if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4666	return true;
4667
4668	// If both branches are conditional and both contain stores to the same
4669	// address, remove the stores from the conditionals and create a conditional
4670	// merged store at the end.
4671	if (MergeCondStores && mergeConditionalStores(PBI, QBI: BI, DTU, DL, TTI))
4672	return true;
4673
4674	// If this is a conditional branch in an empty block, and if any
4675	// predecessors are a conditional branch to one of our destinations,
4676	// fold the conditions into logical ops and one cond br.
4677
4678	// Ignore dbg intrinsics.
4679	if (&BB->instructionsWithoutDebug(SkipPseudoOp: false*).begin() != BI)
4680	return false;
4681
4682	int PBIOp, BIOp;
4683	if (PBI->getSuccessor(i: `0`) == BI->getSuccessor(i: `0`)) {
4684	PBIOp = `0`;
4685	BIOp = `0`;
4686	} else if (PBI->getSuccessor(i: `0`) == BI->getSuccessor(i: `1`)) {
4687	PBIOp = `0`;
4688	BIOp = `1`;
4689	} else if (PBI->getSuccessor(i: `1`) == BI->getSuccessor(i: `0`)) {
4690	PBIOp = `1`;
4691	BIOp = `0`;
4692	} else if (PBI->getSuccessor(i: `1`) == BI->getSuccessor(i: `1`)) {
4693	PBIOp = `1`;
4694	BIOp = `1`;
4695	} else {
4696	return false;
4697	}
4698
4699	// Check to make sure that the other destination of this branch
4700	// isn't BB itself. If so, this is an infinite loop that will
4701	// keep getting unwound.
4702	if (PBI->getSuccessor(i: PBIOp) == BB)
4703	return false;
4704
4705	// If predecessor's branch probability to BB is too low don't merge branches.
4706	SmallVector<uint32_t, `2`> PredWeights;
4707	if (!PBI->getMetadata(KindID: LLVMContext::MD_unpredictable) &&
4708	extractBranchWeights(I: *PBI, Weights&: PredWeights) &&
4709	(static_cast<uint64_t>(PredWeights [`0`]) + PredWeights [`1`]) != `0`) {
4710
4711	BranchProbability CommonDestProb = BranchProbability::getBranchProbability(
4712	Numerator: PredWeights [PBIOp],
4713	Denominator: static_cast<uint64_t>(PredWeights [`0`]) + PredWeights [`1`]);
4714
4715	BranchProbability Likely = TTI.getPredictableBranchThreshold();
4716	if (CommonDestProb >= Likely)
4717	return false;
4718	}
4719
4720	// Do not perform this transformation if it would require
4721	// insertion of a large number of select instructions. For targets
4722	// without predication/cmovs, this is a big pessimization.
4723
4724	BasicBlock *CommonDest = PBI->getSuccessor(i: PBIOp);
4725	BasicBlock *RemovedDest = PBI->getSuccessor(i: PBIOp ^ `1`);
4726	unsigned NumPhis = `0`;
4727	for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(Val: II);
4728	++II, ++NumPhis) {
4729	if (NumPhis > `2`) // Disable this xform.
4730	return false;
4731	}
4732
4733	// Finally, if everything is ok, fold the branches to logical ops.
4734	BasicBlock *OtherDest = BI->getSuccessor(i: BIOp ^ `1`);
4735
4736	LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4737	<< "AND: " << *BI->getParent());
4738
4739	SmallVector<DominatorTree::UpdateType, `5`> Updates;
4740
4741	// If OtherDest is* BB, then BB is a basic block with a single conditional*
4742	// branch in it, where one edge (OtherDest) goes back to itself but the other
4743	// exits. We don't know* that the program avoids the infinite loop*
4744	// (even though that seems likely). If we do this xform naively, we'll end up
4745	// recursively unpeeling the loop. Since we know that (after the xform is
4746	// done) that the block is* infinite if reached, we just make it an obviously*
4747	// infinite loop with no cond branch.
4748	if (OtherDest == BB) {
4749	// Insert it at the end of the function, because it's either code,
4750	// or it won't matter if it's hot. :)
4751	BasicBlock *InfLoopBlock =
4752	BasicBlock::Create(Context&: BB->getContext(), Name: "infloop", Parent: BB->getParent());
4753	UncondBrInst::Create(IfTrue: InfLoopBlock, InsertBefore: InfLoopBlock);
4754	if (DTU)
4755	Updates.push_back(Elt: {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4756	OtherDest = InfLoopBlock;
4757	}
4758
4759	LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4760
4761	// BI may have other predecessors. Because of this, we leave
4762	// it alone, but modify PBI.
4763
4764	// Make sure we get to CommonDest on True&True directions.
4765	Value *PBICond = PBI->getCondition();
4766	IRBuilder<NoFolder> Builder(PBI);
4767	if (PBIOp)
4768	PBICond = Builder.CreateNot(V: PBICond, Name: PBICond->getName() + ".not");
4769
4770	Value *BICond = BI->getCondition();
4771	if (BIOp)
4772	BICond = Builder.CreateNot(V: BICond, Name: BICond->getName() + ".not");
4773
4774	// Merge the conditions.
4775	Value *Cond =
4776	createLogicalOp(Builder, Opc: Instruction::Or, LHS: PBICond, RHS: BICond, Name: "brmerge");
4777
4778	// Modify PBI to branch on the new condition to the new dests.
4779	PBI->setCondition(Cond);
4780	PBI->setSuccessor(idx: `0`, NewSucc: CommonDest);
4781	PBI->setSuccessor(idx: `1`, NewSucc: OtherDest);
4782
4783	if (DTU) {
4784	Updates.push_back(Elt: {DominatorTree::Insert, PBI->getParent(), OtherDest});
4785	Updates.push_back(Elt: {DominatorTree::Delete, PBI->getParent(), RemovedDest});
4786
4787	DTU->applyUpdates(Updates);
4788	}
4789
4790	// Update branch weight for PBI.
4791	uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4792	uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4793	bool HasWeights =
4794	extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4795	SuccTrueWeight, SuccFalseWeight);
4796	if (HasWeights) {
4797	PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4798	PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4799	SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4800	SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4801	// The weight to CommonDest should be PredCommon SuccTotal +*
4802	// PredOther SuccCommon.*
4803	// The weight to OtherDest should be PredOther SuccOther.*
4804	uint64_t NewWeights[`2`] = {PredCommon * (SuccCommon + SuccOther) +
4805	PredOther * SuccCommon,
4806	PredOther * SuccOther};
4807
4808	setFittedBranchWeights(I&: PBI, Weights: NewWeights, /IsExpected=/*false,
4809	/ElideAllZero=/true);
4810	// Cond may be a select instruction with the first operand set to "true", or
4811	// the second to "false" (see how createLogicalOp works for `and` and `or`)
4812	if (!ProfcheckDisableMetadataFixes)
4813	if (auto *SI = dyn_cast<SelectInst>(Val: Cond)) {
4814	assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4815	// The select is predicated on PBICond
4816	assert(dyn_cast<SelectInst>(SI)->getCondition() == PBICond);
4817	// The corresponding probabilities are what was referred to above as
4818	// PredCommon and PredOther.
4819	setFittedBranchWeights(I&: *SI, Weights: {PredCommon, PredOther},
4820	/IsExpected=/false, /ElideAllZero=/true);
4821	}
4822	}
4823
4824	// OtherDest may have phi nodes. If so, add an entry from PBI's
4825	// block that are identical to the entries for BI's block.
4826	addPredecessorToBlock(Succ: OtherDest, NewPred: PBI->getParent(), ExistPred: BB);
4827
4828	// We know that the CommonDest already had an edge from PBI to
4829	// it. If it has PHIs though, the PHIs may have different
4830	// entries for BB and PBI's BB. If so, insert a select to make
4831	// them agree.
4832	for (PHINode &PN : CommonDest->phis()) {
4833	Value *BIV = PN.getIncomingValueForBlock(BB);
4834	unsigned PBBIdx = PN.getBasicBlockIndex(BB: PBI->getParent());
4835	Value *PBIV = PN.getIncomingValue(i: PBBIdx);
4836	if (BIV != PBIV) {
4837	// Insert a select in PBI to pick the right value.
4838	SelectInst *NV = cast<SelectInst>(
4839	Val: Builder.CreateSelect(C: PBICond, True: PBIV, False: BIV, Name: PBIV->getName() + ".mux"));
4840	PN.setIncomingValue(i: PBBIdx, V: NV);
4841	// The select has the same condition as PBI, in the same BB. The
4842	// probabilities don't change.
4843	if (HasWeights) {
4844	uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight;
4845	uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight;
4846	setFittedBranchWeights(I&: *NV, Weights: {TrueWeight, FalseWeight},
4847	/IsExpected=/false, /ElideAllZero=/true);
4848	}
4849	}
4850	}
4851
4852	LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4853	LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4854
4855	// This basic block is probably dead. We know it has at least
4856	// one fewer predecessor.
4857	return true;
4858	}
4859
4860	// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4861	// true or to FalseBB if Cond is false.
4862	// Takes care of updating the successors and removing the old terminator.
4863	// Also makes sure not to introduce new successors by assuming that edges to
4864	// non-successor TrueBBs and FalseBBs aren't reachable.
4865	bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4866	Value Cond, BasicBlock TrueBB,
4867	BasicBlock *FalseBB,
4868	uint32_t TrueWeight,
4869	uint32_t FalseWeight) {
4870	auto *BB = OldTerm->getParent();
4871	// Remove any superfluous successor edges from the CFG.
4872	// First, figure out which successors to preserve.
4873	// If TrueBB and FalseBB are equal, only try to preserve one copy of that
4874	// successor.
4875	BasicBlock *KeepEdge1 = TrueBB;
4876	BasicBlock KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr*;
4877
4878	SmallSetVector<BasicBlock *, `2`> RemovedSuccessors;
4879
4880	// Then remove the rest.
4881	for (BasicBlock *Succ : successors(I: OldTerm)) {
4882	// Make sure only to keep exactly one copy of each edge.
4883	if (Succ == KeepEdge1)
4884	KeepEdge1 = nullptr;
4885	else if (Succ == KeepEdge2)
4886	KeepEdge2 = nullptr;
4887	else {
4888	Succ->removePredecessor(Pred: BB,
4889	/KeepOneInputPHIs=/true);
4890
4891	if (Succ != TrueBB && Succ != FalseBB)
4892	RemovedSuccessors.insert(X: Succ);
4893	}
4894	}
4895
4896	IRBuilder<> Builder(OldTerm);
4897	Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4898
4899	// Insert an appropriate new terminator.
4900	if (!KeepEdge1 && !KeepEdge2) {
4901	if (TrueBB == FalseBB) {
4902	// We were only looking for one successor, and it was present.
4903	// Create an unconditional branch to it.
4904	Builder.CreateBr(Dest: TrueBB);
4905	} else {
4906	// We found both of the successors we were looking for.
4907	// Create a conditional branch sharing the condition of the select.
4908	CondBrInst *NewBI = Builder.CreateCondBr(Cond, True: TrueBB, False: FalseBB);
4909	setBranchWeights(I&: *NewBI, Weights: {TrueWeight, FalseWeight},
4910	/IsExpected=/false, /ElideAllZero=/true);
4911	}
4912	} else if (KeepEdge1 && (KeepEdge2 \|\| TrueBB == FalseBB)) {
4913	// Neither of the selected blocks were successors, so this
4914	// terminator must be unreachable.
4915	new UnreachableInst (OldTerm->getContext(), OldTerm->getIterator());
4916	} else {
4917	// One of the selected values was a successor, but the other wasn't.
4918	// Insert an unconditional branch to the one that was found;
4919	// the edge to the one that wasn't must be unreachable.
4920	if (!KeepEdge1) {
4921	// Only TrueBB was found.
4922	Builder.CreateBr(Dest: TrueBB);
4923	} else {
4924	// Only FalseBB was found.
4925	Builder.CreateBr(Dest: FalseBB);
4926	}
4927	}
4928
4929	eraseTerminatorAndDCECond(TI: OldTerm);
4930
4931	if (DTU) {
4932	SmallVector<DominatorTree::UpdateType, `2`> Updates;
4933	Updates.reserve(N: RemovedSuccessors.size());
4934	for (auto *RemovedSuccessor : RemovedSuccessors)
4935	Updates.push_back(Elt: {DominatorTree::Delete, BB, RemovedSuccessor});
4936	DTU->applyUpdates(Updates);
4937	}
4938
4939	return true;
4940	}
4941
4942	// Replaces
4943	// (switch (select cond, X, Y)) on constant X, Y
4944	// with a branch - conditional if X and Y lead to distinct BBs,
4945	// unconditional otherwise.
4946	bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4947	SelectInst *Select) {
4948	// Check for constant integer values in the select.
4949	ConstantInt *TrueVal = dyn_cast<ConstantInt>(Val: Select->getTrueValue());
4950	ConstantInt *FalseVal = dyn_cast<ConstantInt>(Val: Select->getFalseValue());
4951	if (!TrueVal \|\| !FalseVal)
4952	return false;
4953
4954	// Find the relevant condition and destinations.
4955	Value *Condition = Select->getCondition();
4956	BasicBlock *TrueBB = SI->findCaseValue(C: TrueVal)->getCaseSuccessor();
4957	BasicBlock *FalseBB = SI->findCaseValue(C: FalseVal)->getCaseSuccessor();
4958
4959	// Get weight for TrueBB and FalseBB.
4960	uint32_t TrueWeight = `0`, FalseWeight = `0`;
4961	SmallVector<uint64_t, `8`> Weights;
4962	bool HasWeights = hasBranchWeightMD(I: *SI);
4963	if (HasWeights) {
4964	getBranchWeights(TI: SI, Weights);
4965	if (Weights.size() == `1` + SI->getNumCases()) {
4966	TrueWeight =
4967	(uint32_t)Weights [SI->findCaseValue(C: TrueVal)->getSuccessorIndex()];
4968	FalseWeight =
4969	(uint32_t)Weights [SI->findCaseValue(C: FalseVal)->getSuccessorIndex()];
4970	}
4971	}
4972
4973	// Perform the actual simplification.
4974	return simplifyTerminatorOnSelect(OldTerm: SI, Cond: Condition, TrueBB, FalseBB, TrueWeight,
4975	FalseWeight);
4976	}
4977
4978	// Replaces
4979	// (indirectbr (select cond, blockaddress(@fn, BlockA),
4980	// blockaddress(@fn, BlockB)))
4981	// with
4982	// (br cond, BlockA, BlockB).
4983	bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4984	SelectInst *SI) {
4985	// Check that both operands of the select are block addresses.
4986	BlockAddress *TBA = dyn_cast<BlockAddress>(Val: SI->getTrueValue());
4987	BlockAddress *FBA = dyn_cast<BlockAddress>(Val: SI->getFalseValue());
4988	if (!TBA \|\| !FBA)
4989	return false;
4990
4991	// Extract the actual blocks.
4992	BasicBlock *TrueBB = TBA->getBasicBlock();
4993	BasicBlock *FalseBB = FBA->getBasicBlock();
4994
4995	// The select's profile becomes the profile of the conditional branch that
4996	// replaces the indirect branch.
4997	SmallVector<uint32_t> SelectBranchWeights(`2`);
4998	if (!ProfcheckDisableMetadataFixes)
4999	extractBranchWeights(I: *SI, Weights&: SelectBranchWeights);
5000	// Perform the actual simplification.
5001	return simplifyTerminatorOnSelect(OldTerm: IBI, Cond: SI->getCondition(), TrueBB, FalseBB,
5002	TrueWeight: SelectBranchWeights [`0`],
5003	FalseWeight: SelectBranchWeights [`1`]);
5004	}
5005
5006	/// This is called when we find an icmp instruction
5007	/// (a seteq/setne with a constant) as the only instruction in a
5008	/// block that ends with an uncond branch. We are looking for a very specific
5009	/// pattern that occurs when "A == 1 \|\| A == 2 \|\| A == 3" gets simplified. In
5010	/// this case, we merge the first two "or's of icmp" into a switch, but then the
5011	/// default value goes to an uncond block with a seteq in it, we get something
5012	/// like:
5013	///
5014	/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
5015	/// DEFAULT:
5016	/// %tmp = icmp eq i8 %A, 92
5017	/// br label %end
5018	/// end:
5019	/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
5020	///
5021	/// We prefer to split the edge to 'end' so that there is a true/false entry to
5022	/// the PHI, merging the third icmp into the switch.
5023	bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
5024	ICmpInst *ICI, IRBuilder<> &Builder) {
5025	// Select == nullptr means we assume that there is a hidden no-op select
5026	// instruction of `_ = select %icmp, true, false` after `%icmp = icmp ...`
5027	return tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, Select: nullptr, Builder);
5028	}
5029
5030	/// Similar to tryToSimplifyUncondBranchWithICmpInIt, but handle a more generic
5031	/// case. This is called when we find an icmp instruction (a seteq/setne with a
5032	/// constant) and its following select instruction as the only TWO instructions
5033	/// in a block that ends with an uncond branch. We are looking for a very
5034	/// specific pattern that occurs when "
5035	/// if (A == 1) return C1;
5036	/// if (A == 2) return C2;
5037	/// if (A < 3) return C3;
5038	/// return C4;
5039	/// " gets simplified. In this case, we merge the first two "branches of icmp"
5040	/// into a switch, but then the default value goes to an uncond block with a lt
5041	/// icmp and select in it, as InstCombine can not simplify "A < 3" as "A == 2".
5042	/// After SimplifyCFG and other subsequent optimizations (e.g., SCCP), we might
5043	/// get something like:
5044	///
5045	/// case1:
5046	/// switch i8 %A, label %DEFAULT [ i8 0, label %end i8 1, label %case2 ]
5047	/// case2:
5048	/// br label %end
5049	/// DEFAULT:
5050	/// %tmp = icmp eq i8 %A, 2
5051	/// %val = select i1 %tmp, i8 C3, i8 C4
5052	/// br label %end
5053	/// end:
5054	/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ %val, %DEFAULT ]
5055	///
5056	/// We prefer to split the edge to 'end' so that there are TWO entries of V3/V4
5057	/// to the PHI, merging the icmp & select into the switch, as follows:
5058	///
5059	/// case1:
5060	/// switch i8 %A, label %DEFAULT [
5061	/// i8 0, label %end
5062	/// i8 1, label %case2
5063	/// i8 2, label %case3
5064	/// ]
5065	/// case2:
5066	/// br label %end
5067	/// case3:
5068	/// br label %end
5069	/// DEFAULT:
5070	/// br label %end
5071	/// end:
5072	/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ C3, %case2 ], [ C4, %DEFAULT]
5073	bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpSelectInIt(
5074	ICmpInst ICI, SelectInst Select, IRBuilder<> &Builder) {
5075	BasicBlock *BB = ICI->getParent();
5076
5077	// If the block has any PHIs in it or the icmp/select has multiple uses, it is
5078	// too complex.
5079	/// TODO: support multi-phis in succ BB of select's BB.
5080	if (isa<PHINode>(Val: BB->begin()) \|\| !ICI->hasOneUse() \|\|
5081	(Select && !Select->hasOneUse()))
5082	return false;
5083
5084	// The pattern we're looking for is where our only predecessor is a switch on
5085	// 'V' and this block is the default case for the switch. In this case we can
5086	// fold the compared value into the switch to simplify things.
5087	BasicBlock *Pred = BB->getSinglePredecessor();
5088	if (!Pred \|\| !isa<SwitchInst>(Val: Pred->getTerminator()))
5089	return false;
5090
5091	Value *IcmpCond;
5092	ConstantInt *NewCaseVal;
5093	CmpPredicate Predicate;
5094
5095	// Match icmp X, C
5096	if (!match(V: ICI,
5097	P: m_ICmp(Pred&: Predicate, L: m_Value(V&: IcmpCond), R: m_ConstantInt(CI&: NewCaseVal))))
5098	return false;
5099
5100	Value SelectCond, SelectTrueVal, *SelectFalseVal;
5101	Instruction *User;
5102	if (!Select) {
5103	// If Select == nullptr, we can assume that there is a hidden no-op select
5104	// just after icmp
5105	SelectCond = ICI;
5106	SelectTrueVal = Builder.getTrue();
5107	SelectFalseVal = Builder.getFalse();
5108	User = ICI->user_back();
5109	} else {
5110	SelectCond = Select->getCondition();
5111	// Check if the select condition is the same as the icmp condition.
5112	if (SelectCond != ICI)
5113	return false;
5114	SelectTrueVal = Select->getTrueValue();
5115	SelectFalseVal = Select->getFalseValue();
5116	User = Select->user_back();
5117	}
5118
5119	SwitchInst *SI = cast<SwitchInst>(Val: Pred->getTerminator());
5120	if (SI->getCondition() != IcmpCond)
5121	return false;
5122
5123	// If BB is reachable on a non-default case, then we simply know the value of
5124	// V in this block. Substitute it and constant fold the icmp instruction
5125	// away.
5126	if (SI->getDefaultDest() != BB) {
5127	ConstantInt *VVal = SI->findCaseDest(BB);
5128	assert(VVal && "Should have a unique destination value");
5129	ICI->setOperand(i_nocapture: `0`, Val_nocapture: VVal);
5130
5131	if (Value *V = simplifyInstruction(I: ICI, Q: {DL, ICI})) {
5132	ICI->replaceAllUsesWith(V);
5133	ICI->eraseFromParent();
5134	}
5135	// BB is now empty, so it is likely to simplify away.
5136	return requestResimplify();
5137	}
5138
5139	// Ok, the block is reachable from the default dest. If the constant we're
5140	// comparing exists in one of the other edges, then we can constant fold ICI
5141	// and zap it.
5142	if (SI->findCaseValue(C: NewCaseVal) != SI->case_default()) {
5143	Value *V;
5144	if (Predicate == ICmpInst::ICMP_EQ)
5145	V = ConstantInt::getFalse(Context&: BB->getContext());
5146	else
5147	V = ConstantInt::getTrue(Context&: BB->getContext());
5148
5149	ICI->replaceAllUsesWith(V);
5150	ICI->eraseFromParent();
5151	// BB is now empty, so it is likely to simplify away.
5152	return requestResimplify();
5153	}
5154
5155	// The use of the select has to be in the 'end' block, by the only PHI node in
5156	// the block.
5157	BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(Idx: `0`);
5158	PHINode *PHIUse = dyn_cast<PHINode>(Val: User);
5159	if (PHIUse == nullptr \|\| PHIUse != &SuccBlock->front() \|\|
5160	isa<PHINode>(Val: ++BasicBlock::iterator (PHIUse)))
5161	return false;
5162
5163	// If the icmp is a SETEQ, then the default dest gets SelectFalseVal, the new
5164	// edge gets SelectTrueVal in the PHI.
5165	Value *DefaultCst = SelectFalseVal;
5166	Value *NewCst = SelectTrueVal;
5167
5168	if (ICI->getPredicate() == ICmpInst::ICMP_NE)
5169	std::swap(a&: DefaultCst, b&: NewCst);
5170
5171	// Replace Select (which is used by the PHI for the default value) with
5172	// SelectFalseVal or SelectTrueVal depending on if ICI is EQ or NE.
5173	if (Select) {
5174	Select->replaceAllUsesWith(V: DefaultCst);
5175	Select->eraseFromParent();
5176	} else {
5177	ICI->replaceAllUsesWith(V: DefaultCst);
5178	}
5179	ICI->eraseFromParent();
5180
5181	SmallVector<DominatorTree::UpdateType, `2`> Updates;
5182
5183	// Okay, the switch goes to this block on a default value. Add an edge from
5184	// the switch to the merge point on the compared value.
5185	BasicBlock *NewBB =
5186	BasicBlock::Create(Context&: BB->getContext(), Name: "switch.edge", Parent: BB->getParent(), InsertBefore: BB);
5187	{
5188	SwitchInstProfUpdateWrapper SIW(*SI);
5189	auto W0 = SIW.getSuccessorWeight(idx: `0`);
5190	SwitchInstProfUpdateWrapper::CaseWeightOpt NewW;
5191	if (W0) {
5192	NewW = ((uint64_t(*W0) + `1`) >> `1`);
5193	SIW.setSuccessorWeight(idx: `0`, W: *NewW);
5194	}
5195	SIW.addCase(OnVal: NewCaseVal, Dest: NewBB, W: NewW);
5196	if (DTU)
5197	Updates.push_back(Elt: {DominatorTree::Insert, Pred, NewBB});
5198	}
5199
5200	// NewBB branches to the phi block, add the uncond branch and the phi entry.
5201	Builder.SetInsertPoint(NewBB);
5202	Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5203	Builder.CreateBr(Dest: SuccBlock);
5204	PHIUse->addIncoming(V: NewCst, BB: NewBB);
5205	if (DTU) {
5206	Updates.push_back(Elt: {DominatorTree::Insert, NewBB, SuccBlock});
5207	DTU->applyUpdates(Updates);
5208	}
5209	return true;
5210	}
5211
5212	/// Check to see if it is branching on an or/and chain of icmp instructions, and
5213	/// fold it into a switch instruction if so.
5214	bool SimplifyCFGOpt::simplifyBranchOnICmpChain(CondBrInst *BI,
5215	IRBuilder<> &Builder,
5216	const DataLayout &DL) {
5217	Instruction *Cond = dyn_cast<Instruction>(Val: BI->getCondition());
5218	if (!Cond)
5219	return false;
5220
5221	// Change br (X == 0 \| X == 1), T, F into a switch instruction.
5222	// If this is a bunch of seteq's or'd together, or if it's a bunch of
5223	// 'setne's and'ed together, collect them.
5224
5225	// Try to gather values from a chain of and/or to be turned into a switch
5226	ConstantComparesGatherer ConstantCompare(Cond, DL);
5227	// Unpack the result
5228	SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5229	Value *CompVal = ConstantCompare.CompValue;
5230	unsigned UsedICmps = ConstantCompare.UsedICmps;
5231	Value *ExtraCase = ConstantCompare.Extra;
5232	bool TrueWhenEqual = ConstantCompare.IsEq;
5233
5234	// If we didn't have a multiply compared value, fail.
5235	if (!CompVal)
5236	return false;
5237
5238	// Avoid turning single icmps into a switch.
5239	if (UsedICmps <= `1`)
5240	return false;
5241
5242	// There might be duplicate constants in the list, which the switch
5243	// instruction can't handle, remove them now.
5244	array_pod_sort(Start: Values.begin(), End: Values.end(), Compare: constantIntSortPredicate);
5245	Values.erase(CS: llvm::unique(R&: Values), CE: Values.end());
5246
5247	// If Extra was used, we require at least two switch values to do the
5248	// transformation. A switch with one value is just a conditional branch.
5249	if (ExtraCase && Values.size() < `2`)
5250	return false;
5251
5252	SmallVector<uint32_t> BranchWeights;
5253	const bool HasProfile = !ProfcheckDisableMetadataFixes &&
5254	extractBranchWeights(I: *BI, Weights&: BranchWeights);
5255
5256	// Figure out which block is which destination.
5257	BasicBlock *DefaultBB = BI->getSuccessor(i: `1`);
5258	BasicBlock *EdgeBB = BI->getSuccessor(i: `0`);
5259	if (!TrueWhenEqual) {
5260	std::swap(a&: DefaultBB, b&: EdgeBB);
5261	if (HasProfile)
5262	std::swap(a&: BranchWeights [`0`], b&: BranchWeights [`1`]);
5263	}
5264
5265	BasicBlock *BB = BI->getParent();
5266
5267	LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5268	<< " cases into SWITCH. BB is:\n"
5269	<< *BB);
5270
5271	SmallVector<DominatorTree::UpdateType, `2`> Updates;
5272
5273	// If there are any extra values that couldn't be folded into the switch
5274	// then we evaluate them with an explicit branch first. Split the block
5275	// right before the condbr to handle it.
5276	if (ExtraCase) {
5277	BasicBlock NewBB = SplitBlock(Old: BB, SplitPt: BI, DTU, /LI=/*nullptr,
5278	/MSSAU=/nullptr, BBName: "switch.early.test");
5279
5280	// Remove the uncond branch added to the old block.
5281	Instruction *OldTI = BB->getTerminator();
5282	Builder.SetInsertPoint(OldTI);
5283
5284	// There can be an unintended UB if extra values are Poison. Before the
5285	// transformation, extra values may not be evaluated according to the
5286	// condition, and it will not raise UB. But after transformation, we are
5287	// evaluating extra values before checking the condition, and it will raise
5288	// UB. It can be solved by adding freeze instruction to extra values.
5289	AssumptionCache *AC = Options.AC;
5290
5291	if (!isGuaranteedNotToBeUndefOrPoison(V: ExtraCase, AC, CtxI: BI, DT: nullptr))
5292	ExtraCase = Builder.CreateFreeze(V: ExtraCase);
5293
5294	// We don't have any info about this condition.
5295	auto *Br = TrueWhenEqual ? Builder.CreateCondBr(Cond: ExtraCase, True: EdgeBB, False: NewBB)
5296	: Builder.CreateCondBr(Cond: ExtraCase, True: NewBB, False: EdgeBB);
5297	setExplicitlyUnknownBranchWeightsIfProfiled(I&: *Br, DEBUG_TYPE);
5298
5299	OldTI->eraseFromParent();
5300
5301	if (DTU)
5302	Updates.push_back(Elt: {DominatorTree::Insert, BB, EdgeBB});
5303
5304	// If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5305	// for the edge we just added.
5306	addPredecessorToBlock(Succ: EdgeBB, NewPred: BB, ExistPred: NewBB);
5307
5308	LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5309	<< "\nEXTRABB = " << *BB);
5310	BB = NewBB;
5311	}
5312
5313	Builder.SetInsertPoint(BI);
5314	// Convert pointer to int before we switch.
5315	if (CompVal->getType()->isPointerTy()) {
5316	assert(!DL.hasUnstableRepresentation(CompVal->getType()) &&
5317	"Should not end up here with unstable pointers");
5318	CompVal = Builder.CreatePtrToInt(
5319	V: CompVal, DestTy: DL.getIntPtrType(CompVal->getType()), Name: "magicptr");
5320	}
5321
5322	// Check if we can represent the values as a contiguous range. If so, we use a
5323	// range check + conditional branch instead of a switch.
5324	if (Values.front()->getValue() - Values.back()->getValue() ==
5325	Values.size() - `1`) {
5326	ConstantRange RangeToCheck = ConstantRange::getNonEmpty(
5327	Lower: Values.back()->getValue(), Upper: Values.front()->getValue() + `1`);
5328	APInt Offset, RHS;
5329	ICmpInst::Predicate Pred;
5330	RangeToCheck.getEquivalentICmp(Pred, RHS, Offset);
5331	Value *X = CompVal;
5332	if (!Offset.isZero())
5333	X = Builder.CreateAdd(LHS: X, RHS: ConstantInt::get(Ty: CompVal->getType(), V: Offset));
5334	Value *Cond =
5335	Builder.CreateICmp(P: Pred, LHS: X, RHS: ConstantInt::get(Ty: CompVal->getType(), V: RHS));
5336	CondBrInst *NewBI = Builder.CreateCondBr(Cond, True: EdgeBB, False: DefaultBB);
5337	if (HasProfile)
5338	setBranchWeights(I&: NewBI, Weights: BranchWeights, /IsExpected=/*false);
5339	// We don't need to update PHI nodes since we don't add any new edges.
5340	} else {
5341	// Create the new switch instruction now.
5342	SwitchInst *New = Builder.CreateSwitch(V: CompVal, Dest: DefaultBB, NumCases: Values.size());
5343	if (HasProfile) {
5344	// We know the weight of the default case. We don't know the weight of the
5345	// other cases, but rather than completely lose profiling info, we split
5346	// the remaining probability equally over them.
5347	SmallVector<uint32_t> NewWeights(Values.size() + `1`);
5348	NewWeights [`0`] = BranchWeights [`1`]; // this is the default, and we swapped
5349	// if TrueWhenEqual.
5350	for (auto &V : drop_begin(RangeOrContainer&: NewWeights))
5351	V = BranchWeights [`0`] / Values.size();
5352	setBranchWeights(I&: New, Weights: NewWeights, /IsExpected=/*false);
5353	}
5354
5355	// Add all of the 'cases' to the switch instruction.
5356	for (ConstantInt *Val : Values)
5357	New->addCase(OnVal: Val, Dest: EdgeBB);
5358
5359	// We added edges from PI to the EdgeBB. As such, if there were any
5360	// PHI nodes in EdgeBB, they need entries to be added corresponding to
5361	// the number of edges added.
5362	for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(Val: BBI); ++BBI) {
5363	PHINode *PN = cast<PHINode>(Val&: BBI);
5364	Value *InVal = PN->getIncomingValueForBlock(BB);
5365	for (unsigned i = `0`, e = Values.size() - `1`; i != e; ++i)
5366	PN->addIncoming(V: InVal, BB);
5367	}
5368	}
5369
5370	// Erase the old branch instruction.
5371	eraseTerminatorAndDCECond(TI: BI);
5372	if (DTU)
5373	DTU->applyUpdates(Updates);
5374
5375	LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << `'\n'`);
5376	return true;
5377	}
5378
5379	bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5380	if (isa<PHINode>(Val: RI->getValue()))
5381	return simplifyCommonResume(RI);
5382	else if (isa<LandingPadInst>(Val: RI->getParent()->getFirstNonPHIIt()) &&
5383	RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5384	// The resume must unwind the exception that caused control to branch here.
5385	return simplifySingleResume(RI);
5386
5387	return false;
5388	}
5389
5390	// Check if cleanup block is empty
5391	static bool isCleanupBlockEmpty(iterator_range<BasicBlock::iterator> R) {
5392	for (Instruction &I : R) {
5393	auto *II = dyn_cast<IntrinsicInst>(Val: &I);
5394	if (!II)
5395	return false;
5396
5397	Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5398	switch (IntrinsicID) {
5399	case Intrinsic::dbg_declare:
5400	case Intrinsic::dbg_value:
5401	case Intrinsic::dbg_label:
5402	case Intrinsic::lifetime_end:
5403	break;
5404	default:
5405	return false;
5406	}
5407	}
5408	return true;
5409	}
5410
5411	// Simplify resume that is shared by several landing pads (phi of landing pad).
5412	bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5413	BasicBlock *BB = RI->getParent();
5414
5415	// Check that there are no other instructions except for debug and lifetime
5416	// intrinsics between the phi's and resume instruction.
5417	if (!isCleanupBlockEmpty(R: make_range(x: RI->getParent()->getFirstNonPHIIt(),
5418	y: BB->getTerminator()->getIterator())))
5419	return false;
5420
5421	SmallSetVector<BasicBlock *, `4`> TrivialUnwindBlocks;
5422	auto *PhiLPInst = cast<PHINode>(Val: RI->getValue());
5423
5424	// Check incoming blocks to see if any of them are trivial.
5425	for (unsigned Idx = `0`, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5426	Idx++) {
5427	auto *IncomingBB = PhiLPInst->getIncomingBlock(i: Idx);
5428	auto *IncomingValue = PhiLPInst->getIncomingValue(i: Idx);
5429
5430	// If the block has other successors, we can not delete it because
5431	// it has other dependents.
5432	if (IncomingBB->getUniqueSuccessor() != BB)
5433	continue;
5434
5435	auto *LandingPad = dyn_cast<LandingPadInst>(Val: IncomingBB->getFirstNonPHIIt());
5436	// Not the landing pad that caused the control to branch here.
5437	if (IncomingValue != LandingPad)
5438	continue;
5439
5440	if (isCleanupBlockEmpty(
5441	R: make_range(x: LandingPad->getNextNode(), y: IncomingBB->getTerminator())))
5442	TrivialUnwindBlocks.insert(X: IncomingBB);
5443	}
5444
5445	// If no trivial unwind blocks, don't do any simplifications.
5446	if (TrivialUnwindBlocks.empty())
5447	return false;
5448
5449	// Turn all invokes that unwind here into calls.
5450	for (auto *TrivialBB : TrivialUnwindBlocks) {
5451	// Blocks that will be simplified should be removed from the phi node.
5452	// Note there could be multiple edges to the resume block, and we need
5453	// to remove them all.
5454	while (PhiLPInst->getBasicBlockIndex(BB: TrivialBB) != -`1`)
5455	BB->removePredecessor(Pred: TrivialBB, KeepOneInputPHIs: true);
5456
5457	for (BasicBlock *Pred :
5458	llvm::make_early_inc_range(Range: predecessors(BB: TrivialBB))) {
5459	removeUnwindEdge(BB: Pred, DTU);
5460	++NumInvokes;
5461	}
5462
5463	// In each SimplifyCFG run, only the current processed block can be erased.
5464	// Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5465	// of erasing TrivialBB, we only remove the branch to the common resume
5466	// block so that we can later erase the resume block since it has no
5467	// predecessors.
5468	TrivialBB->getTerminator()->eraseFromParent();
5469	new UnreachableInst (RI->getContext(), TrivialBB);
5470	if (DTU)
5471	DTU->applyUpdates(Updates: {{DominatorTree::Delete, TrivialBB, BB}});
5472	}
5473
5474	// Delete the resume block if all its predecessors have been removed.
5475	if (pred_empty(BB))
5476	DeleteDeadBlock(BB, DTU);
5477
5478	return !TrivialUnwindBlocks.empty();
5479	}
5480
5481	// Simplify resume that is only used by a single (non-phi) landing pad.
5482	bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5483	BasicBlock *BB = RI->getParent();
5484	auto *LPInst = cast<LandingPadInst>(Val: BB->getFirstNonPHIIt());
5485	assert(RI->getValue() == LPInst &&
5486	"Resume must unwind the exception that caused control to here");
5487
5488	// Check that there are no other instructions except for debug intrinsics.
5489	if (!isCleanupBlockEmpty(
5490	R: make_range<Instruction *>(x: LPInst->getNextNode(), y: RI)))
5491	return false;
5492
5493	// Turn all invokes that unwind here into calls and delete the basic block.
5494	for (BasicBlock *Pred : llvm::make_early_inc_range(Range: predecessors(BB))) {
5495	removeUnwindEdge(BB: Pred, DTU);
5496	++NumInvokes;
5497	}
5498
5499	// The landingpad is now unreachable. Zap it.
5500	DeleteDeadBlock(BB, DTU);
5501	return true;
5502	}
5503
5504	static bool removeEmptyCleanup(CleanupReturnInst RI, DomTreeUpdater DTU) {
5505	// If this is a trivial cleanup pad that executes no instructions, it can be
5506	// eliminated. If the cleanup pad continues to the caller, any predecessor
5507	// that is an EH pad will be updated to continue to the caller and any
5508	// predecessor that terminates with an invoke instruction will have its invoke
5509	// instruction converted to a call instruction. If the cleanup pad being
5510	// simplified does not continue to the caller, each predecessor will be
5511	// updated to continue to the unwind destination of the cleanup pad being
5512	// simplified.
5513	BasicBlock *BB = RI->getParent();
5514	CleanupPadInst *CPInst = RI->getCleanupPad();
5515	if (CPInst->getParent() != BB)
5516	// This isn't an empty cleanup.
5517	return false;
5518
5519	// We cannot kill the pad if it has multiple uses. This typically arises
5520	// from unreachable basic blocks.
5521	if (!CPInst->hasOneUse())
5522	return false;
5523
5524	// Check that there are no other instructions except for benign intrinsics.
5525	if (!isCleanupBlockEmpty(
5526	R: make_range<Instruction *>(x: CPInst->getNextNode(), y: RI)))
5527	return false;
5528
5529	// If the cleanup return we are simplifying unwinds to the caller, this will
5530	// set UnwindDest to nullptr.
5531	BasicBlock *UnwindDest = RI->getUnwindDest();
5532
5533	// We're about to remove BB from the control flow. Before we do, sink any
5534	// PHINodes into the unwind destination. Doing this before changing the
5535	// control flow avoids some potentially slow checks, since we can currently
5536	// be certain that UnwindDest and BB have no common predecessors (since they
5537	// are both EH pads).
5538	if (UnwindDest) {
5539	// First, go through the PHI nodes in UnwindDest and update any nodes that
5540	// reference the block we are removing
5541	for (PHINode &DestPN : UnwindDest->phis()) {
5542	int Idx = DestPN.getBasicBlockIndex(BB);
5543	// Since BB unwinds to UnwindDest, it has to be in the PHI node.
5544	assert(Idx != -`1`);
5545	// This PHI node has an incoming value that corresponds to a control
5546	// path through the cleanup pad we are removing. If the incoming
5547	// value is in the cleanup pad, it must be a PHINode (because we
5548	// verified above that the block is otherwise empty). Otherwise, the
5549	// value is either a constant or a value that dominates the cleanup
5550	// pad being removed.
5551	//
5552	// Because BB and UnwindDest are both EH pads, all of their
5553	// predecessors must unwind to these blocks, and since no instruction
5554	// can have multiple unwind destinations, there will be no overlap in
5555	// incoming blocks between SrcPN and DestPN.
5556	Value *SrcVal = DestPN.getIncomingValue(i: Idx);
5557	PHINode *SrcPN = dyn_cast<PHINode>(Val: SrcVal);
5558
5559	bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5560	for (auto *Pred : predecessors(BB)) {
5561	Value *Incoming =
5562	NeedPHITranslation ? SrcPN->getIncomingValueForBlock(BB: Pred) : SrcVal;
5563	DestPN.addIncoming(V: Incoming, BB: Pred);
5564	}
5565	}
5566
5567	// Sink any remaining PHI nodes directly into UnwindDest.
5568	BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5569	for (PHINode &PN : make_early_inc_range(Range: BB->phis())) {
5570	if (PN.use_empty() \|\| !PN.isUsedOutsideOfBlock(BB))
5571	// If the PHI node has no uses or all of its uses are in this basic
5572	// block (meaning they are debug or lifetime intrinsics), just leave
5573	// it. It will be erased when we erase BB below.
5574	continue;
5575
5576	// Otherwise, sink this PHI node into UnwindDest.
5577	// Any predecessors to UnwindDest which are not already represented
5578	// must be back edges which inherit the value from the path through
5579	// BB. In this case, the PHI value must reference itself.
5580	for (auto *pred : predecessors(BB: UnwindDest))
5581	if (pred != BB)
5582	PN.addIncoming(V: &PN, BB: pred);
5583	PN.moveBefore(InsertPos: InsertPt);
5584	// Also, add a dummy incoming value for the original BB itself,
5585	// so that the PHI is well-formed until we drop said predecessor.
5586	PN.addIncoming(V: PoisonValue::get(T: PN.getType()), BB);
5587	}
5588	}
5589
5590	std::vector<DominatorTree::UpdateType> Updates;
5591
5592	// We use make_early_inc_range here because we will remove all predecessors.
5593	for (BasicBlock *PredBB : llvm::make_early_inc_range(Range: predecessors(BB))) {
5594	if (UnwindDest == nullptr) {
5595	if (DTU) {
5596	DTU->applyUpdates(Updates);
5597	Updates.clear();
5598	}
5599	removeUnwindEdge(BB: PredBB, DTU);
5600	++NumInvokes;
5601	} else {
5602	BB->removePredecessor(Pred: PredBB);
5603	Instruction *TI = PredBB->getTerminator();
5604	TI->replaceUsesOfWith(From: BB, To: UnwindDest);
5605	if (DTU) {
5606	Updates.push_back(x: {DominatorTree::Insert, PredBB, UnwindDest});
5607	Updates.push_back(x: {DominatorTree::Delete, PredBB, BB});
5608	}
5609	}
5610	}
5611
5612	if (DTU)
5613	DTU->applyUpdates(Updates);
5614
5615	DeleteDeadBlock(BB, DTU);
5616
5617	return true;
5618	}
5619
5620	// Try to merge two cleanuppads together.
5621	static bool mergeCleanupPad(CleanupReturnInst *RI) {
5622	// Skip any cleanuprets which unwind to caller, there is nothing to merge
5623	// with.
5624	BasicBlock *UnwindDest = RI->getUnwindDest();
5625	if (!UnwindDest)
5626	return false;
5627
5628	// This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5629	// be safe to merge without code duplication.
5630	if (UnwindDest->getSinglePredecessor() != RI->getParent())
5631	return false;
5632
5633	// Verify that our cleanuppad's unwind destination is another cleanuppad.
5634	auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(Val: &UnwindDest->front());
5635	if (!SuccessorCleanupPad)
5636	return false;
5637
5638	CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5639	// Replace any uses of the successor cleanupad with the predecessor pad
5640	// The only cleanuppad uses should be this cleanupret, it's cleanupret and
5641	// funclet bundle operands.
5642	SuccessorCleanupPad->replaceAllUsesWith(V: PredecessorCleanupPad);
5643	// Remove the old cleanuppad.
5644	SuccessorCleanupPad->eraseFromParent();
5645	// Now, we simply replace the cleanupret with a branch to the unwind
5646	// destination.
5647	UncondBrInst::Create(IfTrue: UnwindDest, InsertBefore: RI->getParent());
5648	RI->eraseFromParent();
5649
5650	return true;
5651	}
5652
5653	bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5654	// It is possible to transiantly have an undef cleanuppad operand because we
5655	// have deleted some, but not all, dead blocks.
5656	// Eventually, this block will be deleted.
5657	if (isa<UndefValue>(Val: RI->getOperand(i_nocapture: `0`)))
5658	return false;
5659
5660	if (mergeCleanupPad(RI))
5661	return true;
5662
5663	if (removeEmptyCleanup(RI, DTU))
5664	return true;
5665
5666	return false;
5667	}
5668
5669	// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5670	bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5671	BasicBlock *BB = UI->getParent();
5672
5673	bool Changed = false;
5674
5675	// Ensure that any debug-info records that used to occur after the Unreachable
5676	// are moved to in front of it -- otherwise they'll "dangle" at the end of
5677	// the block.
5678	BB->flushTerminatorDbgRecords();
5679
5680	// Debug-info records on the unreachable inst itself should be deleted, as
5681	// below we delete everything past the final executable instruction.
5682	UI->dropDbgRecords();
5683
5684	// If there are any instructions immediately before the unreachable that can
5685	// be removed, do so.
5686	while (UI->getIterator() != BB->begin()) {
5687	BasicBlock::iterator BBI = UI->getIterator();
5688	--BBI;
5689
5690	if (!isGuaranteedToTransferExecutionToSuccessor(I: &*BBI))
5691	break; // Can not drop any more instructions. We're done here.
5692	// Otherwise, this instruction can be freely erased,
5693	// even if it is not side-effect free.
5694
5695	// Note that deleting EH's here is in fact okay, although it involves a bit
5696	// of subtle reasoning. If this inst is an EH, all the predecessors of this
5697	// block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5698	// and we can therefore guarantee this block will be erased.
5699
5700	// If we're deleting this, we're deleting any subsequent debug info, so
5701	// delete DbgRecords.
5702	BBI ->dropDbgRecords();
5703
5704	// Delete this instruction (any uses are guaranteed to be dead)
5705	BBI ->replaceAllUsesWith(V: PoisonValue::get(T: BBI ->getType()));
5706	BBI ->eraseFromParent();
5707	Changed = true;
5708	}
5709
5710	// If the unreachable instruction is the first in the block, take a gander
5711	// at all of the predecessors of this instruction, and simplify them.
5712	if (&BB->front() != UI)
5713	return Changed;
5714
5715	std::vector<DominatorTree::UpdateType> Updates;
5716
5717	SmallSetVector<BasicBlock *, `8`> Preds(pred_begin(BB), pred_end(BB));
5718	for (BasicBlock *Predecessor : Preds) {
5719	Instruction *TI = Predecessor->getTerminator();
5720	IRBuilder<> Builder(TI);
5721	if (isa<UncondBrInst>(Val: TI)) {
5722	new UnreachableInst (TI->getContext(), TI->getIterator());
5723	TI->eraseFromParent();
5724	Changed = true;
5725	if (DTU)
5726	Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5727	} else if (auto *BI = dyn_cast<CondBrInst>(Val: TI)) {
5728	// We could either have a proper unconditional branch,
5729	// or a degenerate conditional branch with matching destinations.
5730	if (BI->getSuccessor(i: `0`) == BI->getSuccessor(i: `1`)) {
5731	new UnreachableInst (TI->getContext(), TI->getIterator());
5732	TI->eraseFromParent();
5733	Changed = true;
5734	} else {
5735	Value* Cond = BI->getCondition();
5736	assert(BI->getSuccessor(`0`) != BI->getSuccessor(`1`) &&
5737	"The destinations are guaranteed to be different here.");
5738	CallInst *Assumption;
5739	if (BI->getSuccessor(i: `0`) == BB) {
5740	Assumption = Builder.CreateAssumption(Cond: Builder.CreateNot(V: Cond));
5741	Builder.CreateBr(Dest: BI->getSuccessor(i: `1`));
5742	} else {
5743	assert(BI->getSuccessor(`1`) == BB && "Incorrect CFG");
5744	Assumption = Builder.CreateAssumption(Cond);
5745	Builder.CreateBr(Dest: BI->getSuccessor(i: `0`));
5746	}
5747	if (Options.AC)
5748	Options.AC->registerAssumption(CI: cast<AssumeInst>(Val: Assumption));
5749
5750	eraseTerminatorAndDCECond(TI: BI);
5751	Changed = true;
5752	}
5753	if (DTU)
5754	Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5755	} else if (auto *SI = dyn_cast<SwitchInst>(Val: TI)) {
5756	SwitchInstProfUpdateWrapper SU(*SI);
5757	for (auto i = SU ->case_begin(), e = SU ->case_end(); i != e;) {
5758	if (i ->getCaseSuccessor() != BB) {
5759	++i;
5760	continue;
5761	}
5762	BB->removePredecessor(Pred: SU ->getParent());
5763	i = SU.removeCase(I: i);
5764	e = SU ->case_end();
5765	Changed = true;
5766	}
5767	// Note that the default destination can't be removed!
5768	if (DTU && SI->getDefaultDest() != BB)
5769	Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5770	} else if (auto *II = dyn_cast<InvokeInst>(Val: TI)) {
5771	if (II->getUnwindDest() == BB) {
5772	if (DTU) {
5773	DTU->applyUpdates(Updates);
5774	Updates.clear();
5775	}
5776	auto *CI = cast<CallInst>(Val: removeUnwindEdge(BB: TI->getParent(), DTU));
5777	if (!CI->doesNotThrow())
5778	CI->setDoesNotThrow();
5779	Changed = true;
5780	}
5781	} else if (auto *CSI = dyn_cast<CatchSwitchInst>(Val: TI)) {
5782	if (CSI->getUnwindDest() == BB) {
5783	if (DTU) {
5784	DTU->applyUpdates(Updates);
5785	Updates.clear();
5786	}
5787	removeUnwindEdge(BB: TI->getParent(), DTU);
5788	Changed = true;
5789	continue;
5790	}
5791
5792	for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5793	E = CSI->handler_end();
5794	I != E; ++I) {
5795	if (*I == BB) {
5796	CSI->removeHandler(HI: I);
5797	--I;
5798	--E;
5799	Changed = true;
5800	}
5801	}
5802	if (DTU)
5803	Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5804	if (CSI->getNumHandlers() == `0`) {
5805	if (CSI->hasUnwindDest()) {
5806	// Redirect all predecessors of the block containing CatchSwitchInst
5807	// to instead branch to the CatchSwitchInst's unwind destination.
5808	if (DTU) {
5809	for (auto *PredecessorOfPredecessor : predecessors(BB: Predecessor)) {
5810	Updates.push_back(x: {DominatorTree::Insert,
5811	PredecessorOfPredecessor,
5812	CSI->getUnwindDest()});
5813	Updates.push_back(x: {DominatorTree::Delete,
5814	PredecessorOfPredecessor, Predecessor});
5815	}
5816	}
5817	Predecessor->replaceAllUsesWith(V: CSI->getUnwindDest());
5818	} else {
5819	// Rewrite all preds to unwind to caller (or from invoke to call).
5820	if (DTU) {
5821	DTU->applyUpdates(Updates);
5822	Updates.clear();
5823	}
5824	SmallVector<BasicBlock *, `8`> EHPreds(predecessors(BB: Predecessor));
5825	for (BasicBlock *EHPred : EHPreds)
5826	removeUnwindEdge(BB: EHPred, DTU);
5827	}
5828	// The catchswitch is no longer reachable.
5829	new UnreachableInst (CSI->getContext(), CSI->getIterator());
5830	CSI->eraseFromParent();
5831	Changed = true;
5832	}
5833	} else if (auto *CRI = dyn_cast<CleanupReturnInst>(Val: TI)) {
5834	(void)CRI;
5835	assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5836	"Expected to always have an unwind to BB.");
5837	if (DTU)
5838	Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5839	new UnreachableInst (TI->getContext(), TI->getIterator());
5840	TI->eraseFromParent();
5841	Changed = true;
5842	}
5843	}
5844
5845	if (DTU)
5846	DTU->applyUpdates(Updates);
5847
5848	// If this block is now dead, remove it.
5849	if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5850	DeleteDeadBlock(BB, DTU);
5851	return true;
5852	}
5853
5854	return Changed;
5855	}
5856
5857	struct ContiguousCasesResult {
5858	ConstantInt *Min;
5859	ConstantInt *Max;
5860	BasicBlock *Dest;
5861	BasicBlock *OtherDest;
5862	SmallVectorImpl<ConstantInt > Cases;
5863	SmallVectorImpl<ConstantInt > OtherCases;
5864	};
5865
5866	static std::optional<ContiguousCasesResult>
5867	findContiguousCases(Value Condition, SmallVectorImpl<ConstantInt > &Cases,
5868	SmallVectorImpl<ConstantInt *> &OtherCases,
5869	BasicBlock Dest, BasicBlock OtherDest) {
5870	assert(Cases.size() >= `1`);
5871
5872	array_pod_sort(Start: Cases.begin(), End: Cases.end(), Compare: constantIntSortPredicate);
5873	const APInt &Min = Cases.back()->getValue();
5874	const APInt &Max = Cases.front()->getValue();
5875	APInt Offset = Max - Min;
5876	size_t ContiguousOffset = Cases.size() - `1`;
5877	if (Offset == ContiguousOffset) {
5878	return ContiguousCasesResult{
5879	/Min=/Cases.back(),
5880	/Max=/Cases.front(),
5881	/Dest=/Dest,
5882	/OtherDest=/OtherDest,
5883	/Cases=/&Cases,
5884	/OtherCases=/&OtherCases,
5885	};
5886	}
5887	ConstantRange CR = computeConstantRange(V: Condition, /ForSigned=/false);
5888	// If this is a wrapping contiguous range, that is, [Min, OtherMin] +
5889	// [OtherMax, Max] (also [OtherMax, OtherMin]), [OtherMin+1, OtherMax-1] is a
5890	// contiguous range for the other destination. N.B. If CR is not a full range,
5891	// Max+1 is not equal to Min. It's not continuous in arithmetic.
5892	if (Max == CR.getUnsignedMax() && Min == CR.getUnsignedMin()) {
5893	assert(Cases.size() >= `2`);
5894	auto *It =
5895	std::adjacent_find(first: Cases.begin(), last: Cases.end(), binary_pred: [](auto L, auto R) {
5896	return L->getValue() != R->getValue() + `1`;
5897	});
5898	if (It == Cases.end())
5899	return std::nullopt;
5900	auto [OtherMax, OtherMin] = std::make_pair(x&: It, y&: std::next(x: It));
5901	if ((Max - OtherMax->getValue()) + (OtherMin->getValue() - Min) ==
5902	Cases.size() - `2`) {
5903	return ContiguousCasesResult{
5904	/Min=/cast<ConstantInt>(
5905	Val: ConstantInt::get(Ty: OtherMin->getType(), V: OtherMin->getValue() + `1`)),
5906	/Max=/
5907	cast<ConstantInt>(
5908	Val: ConstantInt::get(Ty: OtherMax->getType(), V: OtherMax->getValue() - `1`)),
5909	/Dest=/OtherDest,
5910	/OtherDest=/Dest,
5911	/Cases=/&OtherCases,
5912	/OtherCases=/&Cases,
5913	};
5914	}
5915	}
5916	return std::nullopt;
5917	}
5918
5919	static void createUnreachableSwitchDefault(SwitchInst *Switch,
5920	DomTreeUpdater *DTU,
5921	bool RemoveOrigDefaultBlock = true) {
5922	LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5923	auto *BB = Switch->getParent();
5924	auto *OrigDefaultBlock = Switch->getDefaultDest();
5925	if (RemoveOrigDefaultBlock)
5926	OrigDefaultBlock->removePredecessor(Pred: BB);
5927	BasicBlock *NewDefaultBlock = BasicBlock::Create(
5928	Context&: BB->getContext(), Name: BB->getName() + ".unreachabledefault", Parent: BB->getParent(),
5929	InsertBefore: OrigDefaultBlock);
5930	auto UI = new* UnreachableInst (Switch->getContext(), NewDefaultBlock);
5931	UI->setDebugLoc(DebugLoc::getTemporary());
5932	Switch->setDefaultDest(&*NewDefaultBlock);
5933	if (DTU) {
5934	SmallVector<DominatorTree::UpdateType, `2`> Updates;
5935	Updates.push_back(Elt: {DominatorTree::Insert, BB, &*NewDefaultBlock});
5936	if (RemoveOrigDefaultBlock &&
5937	!is_contained(Range: successors(BB), Element: OrigDefaultBlock))
5938	Updates.push_back(Elt: {DominatorTree::Delete, BB, &*OrigDefaultBlock});
5939	DTU->applyUpdates(Updates);
5940	}
5941	}
5942
5943	/// Turn a switch into an integer range comparison and branch.
5944	/// Switches with more than 2 destinations are ignored.
5945	/// Switches with 1 destination are also ignored.
5946	bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5947	IRBuilder<> &Builder) {
5948	assert(SI->getNumCases() > `1` && "Degenerate switch?");
5949
5950	bool HasDefault = !SI->defaultDestUnreachable();
5951
5952	auto *BB = SI->getParent();
5953	// Partition the cases into two sets with different destinations.
5954	BasicBlock DestA = HasDefault ? SI->getDefaultDest() : nullptr*;
5955	BasicBlock DestB = nullptr*;
5956	SmallVector<ConstantInt *, `16`> CasesA;
5957	SmallVector<ConstantInt *, `16`> CasesB;
5958
5959	for (auto Case : SI->cases()) {
5960	BasicBlock *Dest = Case.getCaseSuccessor();
5961	if (!DestA)
5962	DestA = Dest;
5963	if (Dest == DestA) {
5964	CasesA.push_back(Elt: Case.getCaseValue());
5965	continue;
5966	}
5967	if (!DestB)
5968	DestB = Dest;
5969	if (Dest == DestB) {
5970	CasesB.push_back(Elt: Case.getCaseValue());
5971	continue;
5972	}
5973	return false; // More than two destinations.
5974	}
5975	if (!DestB)
5976	return false; // All destinations are the same and the default is unreachable
5977
5978	assert(DestA && DestB &&
5979	"Single-destination switch should have been folded.");
5980	assert(DestA != DestB);
5981	assert(DestB != SI->getDefaultDest());
5982	assert(!CasesB.empty() && "There must be non-default cases.");
5983	assert(!CasesA.empty() \|\| HasDefault);
5984
5985	// Figure out if one of the sets of cases form a contiguous range.
5986	std::optional<ContiguousCasesResult> ContiguousCases;
5987
5988	// Only one icmp is needed when there is only one case.
5989	if (!HasDefault && CasesA.size() == `1`)
5990	ContiguousCases = ContiguousCasesResult{
5991	/Min=/CasesA [`0`],
5992	/Max=/CasesA [`0`],
5993	/Dest=/DestA,
5994	/OtherDest=/DestB,
5995	/Cases=/&CasesA,
5996	/OtherCases=/&CasesB,
5997	};
5998	else if (CasesB.size() == `1`)
5999	ContiguousCases = ContiguousCasesResult{
6000	/Min=/CasesB [`0`],
6001	/Max=/CasesB [`0`],
6002	/Dest=/DestB,
6003	/OtherDest=/DestA,
6004	/Cases=/&CasesB,
6005	/OtherCases=/&CasesA,
6006	};
6007	// Correctness: Cases to the default destination cannot be contiguous cases.
6008	else if (!HasDefault)
6009	ContiguousCases =
6010	findContiguousCases(Condition: SI->getCondition(), Cases&: CasesA, OtherCases&: CasesB, Dest: DestA, OtherDest: DestB);
6011
6012	if (!ContiguousCases)
6013	ContiguousCases =
6014	findContiguousCases(Condition: SI->getCondition(), Cases&: CasesB, OtherCases&: CasesA, Dest: DestB, OtherDest: DestA);
6015
6016	if (!ContiguousCases)
6017	return false;
6018
6019	auto [Min, Max, Dest, OtherDest, Cases, OtherCases] = *ContiguousCases;
6020
6021	// Start building the compare and branch.
6022
6023	Constant *Offset = ConstantExpr::getNeg(C: Min);
6024	Constant *NumCases = ConstantInt::get(Ty: Offset->getType(),
6025	V: Max->getValue() - Min->getValue() + `1`);
6026	Instruction *NewBI;
6027	if (NumCases->isOneValue()) {
6028	assert(Max->getValue() == Min->getValue());
6029	Value *Cmp = Builder.CreateICmpEQ(LHS: SI->getCondition(), RHS: Min);
6030	NewBI = Builder.CreateCondBr(Cond: Cmp, True: Dest, False: OtherDest);
6031	}
6032	// If NumCases overflowed, then all possible values jump to the successor.
6033	else if (NumCases->isNullValue() && !Cases->empty()) {
6034	NewBI = Builder.CreateBr(Dest);
6035	} else {
6036	Value *Sub = SI->getCondition();
6037	if (!Offset->isNullValue())
6038	Sub = Builder.CreateAdd(LHS: Sub, RHS: Offset, Name: Sub->getName() + ".off");
6039	Value *Cmp = Builder.CreateICmpULT(LHS: Sub, RHS: NumCases, Name: "switch");
6040	NewBI = Builder.CreateCondBr(Cond: Cmp, True: Dest, False: OtherDest);
6041	}
6042
6043	// Update weight for the newly-created conditional branch.
6044	if (hasBranchWeightMD(I: *SI) && isa<CondBrInst>(Val: NewBI)) {
6045	SmallVector<uint64_t, `8`> Weights;
6046	getBranchWeights(TI: SI, Weights);
6047	if (Weights.size() == `1` + SI->getNumCases()) {
6048	uint64_t TrueWeight = `0`;
6049	uint64_t FalseWeight = `0`;
6050	for (size_t I = `0`, E = Weights.size(); I != E; ++I) {
6051	if (SI->getSuccessor(idx: I) == Dest)
6052	TrueWeight += Weights [I];
6053	else
6054	FalseWeight += Weights [I];
6055	}
6056	while (TrueWeight > UINT32_MAX \|\| FalseWeight > UINT32_MAX) {
6057	TrueWeight /= `2`;
6058	FalseWeight /= `2`;
6059	}
6060	setFittedBranchWeights(I&: *NewBI, Weights: {TrueWeight, FalseWeight},
6061	/IsExpected=/false, /ElideAllZero=/true);
6062	}
6063	}
6064
6065	// Prune obsolete incoming values off the successors' PHI nodes.
6066	for (auto &PHI : make_early_inc_range(Range: Dest->phis())) {
6067	unsigned PreviousEdges = Cases->size();
6068	if (Dest == SI->getDefaultDest())
6069	++PreviousEdges;
6070	for (unsigned I = `0`, E = PreviousEdges - `1`; I != E; ++I)
6071	PHI.removeIncomingValue(BB: SI->getParent());
6072	}
6073	for (auto &PHI : make_early_inc_range(Range: OtherDest->phis())) {
6074	unsigned PreviousEdges = OtherCases->size();
6075	if (OtherDest == SI->getDefaultDest())
6076	++PreviousEdges;
6077	unsigned E = PreviousEdges - `1`;
6078	// Remove all incoming values from OtherDest if OtherDest is unreachable.
6079	if (isa<UncondBrInst>(Val: NewBI))
6080	++E;
6081	for (unsigned I = `0`; I != E; ++I)
6082	PHI.removeIncomingValue(BB: SI->getParent());
6083	}
6084
6085	// Clean up the default block - it may have phis or other instructions before
6086	// the unreachable terminator.
6087	if (!HasDefault)
6088	createUnreachableSwitchDefault(Switch: SI, DTU);
6089
6090	auto *UnreachableDefault = SI->getDefaultDest();
6091
6092	// Drop the switch.
6093	SI->eraseFromParent();
6094
6095	if (!HasDefault && DTU)
6096	DTU->applyUpdates(Updates: {{DominatorTree::Delete, BB, UnreachableDefault}});
6097
6098	return true;
6099	}
6100
6101	/// Compute masked bits for the condition of a switch
6102	/// and use it to remove dead cases.
6103	static bool eliminateDeadSwitchCases(SwitchInst SI, DomTreeUpdater DTU,
6104	AssumptionCache *AC,
6105	const DataLayout &DL) {
6106	Value *Cond = SI->getCondition();
6107	KnownBits Known = computeKnownBits(V: Cond, DL, AC, CxtI: SI);
6108	SmallPtrSet<const Constant *, `4`> KnownValues;
6109	bool IsKnownValuesValid = collectPossibleValues(V: Cond, Constants&: KnownValues, MaxCount: `4`);
6110
6111	// We can also eliminate cases by determining that their values are outside of
6112	// the limited range of the condition based on how many significant (non-sign)
6113	// bits are in the condition value.
6114	unsigned MaxSignificantBitsInCond =
6115	ComputeMaxSignificantBits(Op: Cond, DL, AC, CxtI: SI);
6116
6117	// Gather dead cases.
6118	SmallVector<ConstantInt *, `8`> DeadCases;
6119	SmallDenseMap<BasicBlock , int*, `8`> NumPerSuccessorCases;
6120	SmallVector<BasicBlock *, `8`> UniqueSuccessors;
6121	for (const auto &Case : SI->cases()) {
6122	auto *Successor = Case.getCaseSuccessor();
6123	if (DTU) {
6124	auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Key: Successor);
6125	if (Inserted)
6126	UniqueSuccessors.push_back(Elt: Successor);
6127	++It ->second;
6128	}
6129	ConstantInt *CaseC = Case.getCaseValue();
6130	const APInt &CaseVal = CaseC->getValue();
6131	if (Known.Zero.intersects(RHS: CaseVal) \|\| !Known.One.isSubsetOf(RHS: CaseVal) \|\|
6132	(CaseVal.getSignificantBits() > MaxSignificantBitsInCond) \|\|
6133	(IsKnownValuesValid && !KnownValues.contains(Ptr: CaseC))) {
6134	DeadCases.push_back(Elt: CaseC);
6135	if (DTU)
6136	--NumPerSuccessorCases [Successor];
6137	LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
6138	<< " is dead.\n");
6139	} else if (IsKnownValuesValid)
6140	KnownValues.erase(Ptr: CaseC);
6141	}
6142
6143	// If we can prove that the cases must cover all possible values, the
6144	// default destination becomes dead and we can remove it. If we know some
6145	// of the bits in the value, we can use that to more precisely compute the
6146	// number of possible unique case values.
6147	bool HasDefault = !SI->defaultDestUnreachable();
6148	const unsigned NumUnknownBits =
6149	Known.getBitWidth() - (Known.Zero \| Known.One).popcount();
6150	assert(NumUnknownBits <= Known.getBitWidth());
6151	if (HasDefault && DeadCases.empty()) {
6152	if (IsKnownValuesValid && all_of(Range&: KnownValues, P: IsaPred<UndefValue>)) {
6153	createUnreachableSwitchDefault(Switch: SI, DTU);
6154	return true;
6155	}
6156
6157	if (NumUnknownBits < `64` / avoid overflow /) {
6158	uint64_t AllNumCases = `1ULL` << NumUnknownBits;
6159	if (SI->getNumCases() == AllNumCases) {
6160	createUnreachableSwitchDefault(Switch: SI, DTU);
6161	return true;
6162	}
6163	// When only one case value is missing, replace default with that case.
6164	// Eliminating the default branch will provide more opportunities for
6165	// optimization, such as lookup tables.
6166	if (SI->getNumCases() == AllNumCases - `1`) {
6167	assert(NumUnknownBits > `1` && "Should be canonicalized to a branch");
6168	IntegerType *CondTy = cast<IntegerType>(Val: Cond->getType());
6169	if (CondTy->getIntegerBitWidth() > `64` \|\|
6170	!DL.fitsInLegalInteger(Width: CondTy->getIntegerBitWidth()))
6171	return false;
6172
6173	uint64_t MissingCaseVal = `0`;
6174	for (const auto &Case : SI->cases())
6175	MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
6176	auto *MissingCase = cast<ConstantInt>(
6177	Val: ConstantInt::get(Ty: Cond->getType(), V: MissingCaseVal));
6178	SwitchInstProfUpdateWrapper SIW(*SI);
6179	SIW.addCase(OnVal: MissingCase, Dest: SI->getDefaultDest(),
6180	W: SIW.getSuccessorWeight(idx: `0`));
6181	createUnreachableSwitchDefault(Switch: SI, DTU,
6182	/RemoveOrigDefaultBlock/ false);
6183	SIW.setSuccessorWeight(idx: `0`, W: `0`);
6184	return true;
6185	}
6186	}
6187	}
6188
6189	if (DeadCases.empty())
6190	return false;
6191
6192	SwitchInstProfUpdateWrapper SIW(*SI);
6193	for (ConstantInt *DeadCase : DeadCases) {
6194	SwitchInst::CaseIt CaseI = SI->findCaseValue(C: DeadCase);
6195	assert(CaseI != SI->case_default() &&
6196	"Case was not found. Probably mistake in DeadCases forming.");
6197	// Prune unused values from PHI nodes.
6198	CaseI ->getCaseSuccessor()->removePredecessor(Pred: SI->getParent());
6199	SIW.removeCase(I: CaseI);
6200	}
6201
6202	if (DTU) {
6203	std::vector<DominatorTree::UpdateType> Updates;
6204	for (auto *Successor : UniqueSuccessors)
6205	if (NumPerSuccessorCases [Successor] == `0`)
6206	Updates.push_back(x: {DominatorTree::Delete, SI->getParent(), Successor});
6207	DTU->applyUpdates(Updates);
6208	}
6209
6210	return true;
6211	}
6212
6213	/// If BB would be eligible for simplification by
6214	/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
6215	/// by an unconditional branch), look at the phi node for BB in the successor
6216	/// block and see if the incoming value is equal to CaseValue. If so, return
6217	/// the phi node, and set PhiIndex to BB's index in the phi node.
6218	static PHINode findPHIForConditionForwarding(ConstantInt CaseValue,
6219	BasicBlock BB, int* *PhiIndex) {
6220	if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
6221	return nullptr; // BB must be empty to be a candidate for simplification.
6222	if (!BB->getSinglePredecessor())
6223	return nullptr; // BB must be dominated by the switch.
6224
6225	UncondBrInst *Branch = dyn_cast<UncondBrInst>(Val: BB->getTerminator());
6226	if (!Branch)
6227	return nullptr; // Terminator must be unconditional branch.
6228
6229	BasicBlock *Succ = Branch->getSuccessor();
6230
6231	for (PHINode &PHI : Succ->phis()) {
6232	int Idx = PHI.getBasicBlockIndex(BB);
6233	assert(Idx >= `0` && "PHI has no entry for predecessor?");
6234
6235	Value *InValue = PHI.getIncomingValue(i: Idx);
6236	if (InValue != CaseValue)
6237	continue;
6238
6239	*PhiIndex = Idx;
6240	return &PHI;
6241	}
6242
6243	return nullptr;
6244	}
6245
6246	/// Try to forward the condition of a switch instruction to a phi node
6247	/// dominated by the switch, if that would mean that some of the destination
6248	/// blocks of the switch can be folded away. Return true if a change is made.
6249	static bool forwardSwitchConditionToPHI(SwitchInst *SI) {
6250	using ForwardingNodesMap = DenseMap<PHINode , SmallVector<int*, `4`>>;
6251
6252	ForwardingNodesMap ForwardingNodes;
6253	BasicBlock *SwitchBlock = SI->getParent();
6254	bool Changed = false;
6255	for (const auto &Case : SI->cases()) {
6256	ConstantInt *CaseValue = Case.getCaseValue();
6257	BasicBlock *CaseDest = Case.getCaseSuccessor();
6258
6259	// Replace phi operands in successor blocks that are using the constant case
6260	// value rather than the switch condition variable:
6261	// switchbb:
6262	// switch i32 %x, label %default [
6263	// i32 17, label %succ
6264	// ...
6265	// succ:
6266	// %r = phi i32 ... [ 17, %switchbb ] ...
6267	// -->
6268	// %r = phi i32 ... [ %x, %switchbb ] ...
6269
6270	for (PHINode &Phi : CaseDest->phis()) {
6271	// This only works if there is exactly 1 incoming edge from the switch to
6272	// a phi. If there is >1, that means multiple cases of the switch map to 1
6273	// value in the phi, and that phi value is not the switch condition. Thus,
6274	// this transform would not make sense (the phi would be invalid because
6275	// a phi can't have different incoming values from the same block).
6276	int SwitchBBIdx = Phi.getBasicBlockIndex(BB: SwitchBlock);
6277	if (Phi.getIncomingValue(i: SwitchBBIdx) == CaseValue &&
6278	count(Range: Phi.blocks(), Element: SwitchBlock) == `1`) {
6279	Phi.setIncomingValue(i: SwitchBBIdx, V: SI->getCondition());
6280	Changed = true;
6281	}
6282	}
6283
6284	// Collect phi nodes that are indirectly using this switch's case constants.
6285	int PhiIdx;
6286	if (auto *Phi = findPHIForConditionForwarding(CaseValue, BB: CaseDest, PhiIndex: &PhiIdx))
6287	ForwardingNodes [Phi].push_back(Elt: PhiIdx);
6288	}
6289
6290	for (auto &ForwardingNode : ForwardingNodes) {
6291	PHINode *Phi = ForwardingNode.first;
6292	SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6293	// Check if it helps to fold PHI.
6294	if (Indexes.size() < `2` && !llvm::is_contained(Range: Phi->incoming_values(), Element: SI->getCondition()))
6295	continue;
6296
6297	for (int Index : Indexes)
6298	Phi->setIncomingValue(i: Index, V: SI->getCondition());
6299	Changed = true;
6300	}
6301
6302	return Changed;
6303	}
6304
6305	/// Return true if the backend will be able to handle
6306	/// initializing an array of constants like C.
6307	static bool validLookupTableConstant(Constant C, const* TargetTransformInfo &TTI) {
6308	if (C->isThreadDependent())
6309	return false;
6310	if (C->isDLLImportDependent())
6311	return false;
6312
6313	if (!isa<ConstantDataVector, ConstantExpr, ConstantFP, ConstantInt,
6314	ConstantPointerNull, GlobalValue, UndefValue>(Val: C))
6315	return false;
6316
6317	// Globals cannot contain scalable types.
6318	if (C->getType()->isScalableTy())
6319	return false;
6320
6321	if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: C)) {
6322	// Pointer casts and in-bounds GEPs will not prohibit the backend from
6323	// materializing the array of constants.
6324	Constant *StrippedC = cast<Constant>(Val: CE->stripInBoundsConstantOffsets());
6325	if (StrippedC == C \|\| !validLookupTableConstant(C: StrippedC, TTI))
6326	return false;
6327	}
6328
6329	if (!TTI.shouldBuildLookupTablesForConstant(C))
6330	return false;
6331
6332	return true;
6333	}
6334
6335	/// If V is a Constant, return it. Otherwise, try to look up
6336	/// its constant value in ConstantPool, returning 0 if it's not there.
6337	static Constant *
6338	lookupConstant(Value *V,
6339	const SmallDenseMap<Value , Constant > &ConstantPool) {
6340	if (Constant *C = dyn_cast<Constant>(Val: V))
6341	return C;
6342	return ConstantPool.lookup(Val: V);
6343	}
6344
6345	/// Try to fold instruction I into a constant. This works for
6346	/// simple instructions such as binary operations where both operands are
6347	/// constant or can be replaced by constants from the ConstantPool. Returns the
6348	/// resulting constant on success, 0 otherwise.
6349	static Constant *
6350	constantFold(Instruction I, const* DataLayout &DL,
6351	const SmallDenseMap<Value , Constant > &ConstantPool) {
6352	if (SelectInst *Select = dyn_cast<SelectInst>(Val: I)) {
6353	Constant *A = lookupConstant(V: Select->getCondition(), ConstantPool);
6354	if (!A)
6355	return nullptr;
6356	if (A->isAllOnesValue())
6357	return lookupConstant(V: Select->getTrueValue(), ConstantPool);
6358	if (A->isNullValue())
6359	return lookupConstant(V: Select->getFalseValue(), ConstantPool);
6360	return nullptr;
6361	}
6362
6363	SmallVector<Constant *, `4`> COps;
6364	for (unsigned N = `0`, E = I->getNumOperands(); N != E; ++N) {
6365	if (Constant *A = lookupConstant(V: I->getOperand(i: N), ConstantPool))
6366	COps.push_back(Elt: A);
6367	else
6368	return nullptr;
6369	}
6370
6371	return ConstantFoldInstOperands(I, Ops: COps, DL);
6372	}
6373
6374	/// Try to determine the resulting constant values in phi nodes
6375	/// at the common destination basic block, CommonDest, for one of the case*
6376	/// destinations CaseDest corresponding to value CaseVal (nullptr for the
6377	/// default case), of a switch instruction SI.
6378	static bool
6379	getCaseResults(SwitchInst SI, ConstantInt CaseVal, BasicBlock *CaseDest,
6380	BasicBlock **CommonDest,
6381	SmallVectorImpl<std::pair<PHINode , Constant >> &Res,
6382	const DataLayout &DL, const TargetTransformInfo &TTI) {
6383	// The block from which we enter the common destination.
6384	BasicBlock *Pred = SI->getParent();
6385
6386	// If CaseDest is empty except for some side-effect free instructions through
6387	// which we can constant-propagate the CaseVal, continue to its successor.
6388	SmallDenseMap<Value , Constant > ConstantPool;
6389	ConstantPool.insert(KV: std::make_pair(x: SI->getCondition(), y&: CaseVal));
6390	for (Instruction &I : CaseDest->instructionsWithoutDebug(SkipPseudoOp: false)) {
6391	if (I.isTerminator()) {
6392	// If the terminator is a simple branch, continue to the next block.
6393	if (I.getNumSuccessors() != `1` \|\| I.isSpecialTerminator())
6394	return false;
6395	Pred = CaseDest;
6396	CaseDest = I.getSuccessor(Idx: `0`);
6397	} else if (Constant *C = constantFold(I: &I, DL, ConstantPool)) {
6398	// Instruction is side-effect free and constant.
6399
6400	// If the instruction has uses outside this block or a phi node slot for
6401	// the block, it is not safe to bypass the instruction since it would then
6402	// no longer dominate all its uses.
6403	for (auto &Use : I.uses()) {
6404	User *User = Use.getUser();
6405	if (Instruction *I = dyn_cast<Instruction>(Val: User))
6406	if (I->getParent() == CaseDest)
6407	continue;
6408	if (PHINode *Phi = dyn_cast<PHINode>(Val: User))
6409	if (Phi->getIncomingBlock(U: Use) == CaseDest)
6410	continue;
6411	return false;
6412	}
6413
6414	ConstantPool.insert(KV: std::make_pair(x: &I, y&: C));
6415	} else {
6416	break;
6417	}
6418	}
6419
6420	// If we did not have a CommonDest before, use the current one.
6421	if (!*CommonDest)
6422	*CommonDest = CaseDest;
6423	// If the destination isn't the common one, abort.
6424	if (CaseDest != *CommonDest)
6425	return false;
6426
6427	// Get the values for this case from phi nodes in the destination block.
6428	for (PHINode &PHI : (*CommonDest)->phis()) {
6429	int Idx = PHI.getBasicBlockIndex(BB: Pred);
6430	if (Idx == -`1`)
6431	continue;
6432
6433	Constant *ConstVal =
6434	lookupConstant(V: PHI.getIncomingValue(i: Idx), ConstantPool);
6435	if (!ConstVal)
6436	return false;
6437
6438	// Be conservative about which kinds of constants we support.
6439	if (!validLookupTableConstant(C: ConstVal, TTI))
6440	return false;
6441
6442	Res.push_back(Elt: std::make_pair(x: &PHI, y&: ConstVal));
6443	}
6444
6445	return Res.size() > `0`;
6446	}
6447
6448	// Helper function used to add CaseVal to the list of cases that generate
6449	// Result. Returns the updated number of cases that generate this result.
6450	static size_t mapCaseToResult(ConstantInt *CaseVal,
6451	SwitchCaseResultVectorTy &UniqueResults,
6452	Constant *Result) {
6453	for (auto &I : UniqueResults) {
6454	if (I.first == Result) {
6455	I.second.push_back(Elt: CaseVal);
6456	return I.second.size();
6457	}
6458	}
6459	UniqueResults.push_back(
6460	Elt: std::make_pair(x&: Result, y: SmallVector<ConstantInt *, `4`>(`1`, CaseVal)));
6461	return `1`;
6462	}
6463
6464	// Helper function that initializes a map containing
6465	// results for the PHI node of the common destination block for a switch
6466	// instruction. Returns false if multiple PHI nodes have been found or if
6467	// there is not a common destination block for the switch.
6468	static bool initializeUniqueCases(SwitchInst SI, PHINode &PHI,
6469	BasicBlock *&CommonDest,
6470	SwitchCaseResultVectorTy &UniqueResults,
6471	Constant *&DefaultResult,
6472	const DataLayout &DL,
6473	const TargetTransformInfo &TTI,
6474	uintptr_t MaxUniqueResults) {
6475	for (const auto &I : SI->cases()) {
6476	ConstantInt *CaseVal = I.getCaseValue();
6477
6478	// Resulting value at phi nodes for this case value.
6479	SwitchCaseResultsTy Results;
6480	if (!getCaseResults(SI, CaseVal, CaseDest: I.getCaseSuccessor(), CommonDest: &CommonDest, Res&: Results,
6481	DL, TTI))
6482	return false;
6483
6484	// Only one value per case is permitted.
6485	if (Results.size() > `1`)
6486	return false;
6487
6488	// Add the case->result mapping to UniqueResults.
6489	const size_t NumCasesForResult =
6490	mapCaseToResult(CaseVal, UniqueResults, Result: Results.begin()->second);
6491
6492	// Early out if there are too many cases for this result.
6493	if (NumCasesForResult > MaxSwitchCasesPerResult)
6494	return false;
6495
6496	// Early out if there are too many unique results.
6497	if (UniqueResults.size() > MaxUniqueResults)
6498	return false;
6499
6500	// Check the PHI consistency.
6501	if (!PHI)
6502	PHI = Results [`0`].first;
6503	else if (PHI != Results [`0`].first)
6504	return false;
6505	}
6506	// Find the default result value.
6507	SmallVector<std::pair<PHINode , Constant >, `1`> DefaultResults;
6508	getCaseResults(SI, CaseVal: nullptr, CaseDest: SI->getDefaultDest(), CommonDest: &CommonDest, Res&: DefaultResults,
6509	DL, TTI);
6510	// If the default value is not found abort unless the default destination
6511	// is unreachable.
6512	DefaultResult =
6513	DefaultResults.size() == `1` ? DefaultResults.begin()->second : nullptr;
6514
6515	return DefaultResult \|\| SI->defaultDestUnreachable();
6516	}
6517
6518	// Helper function that checks if it is possible to transform a switch with only
6519	// two cases (or two cases + default) that produces a result into a select.
6520	// TODO: Handle switches with more than 2 cases that map to the same result.
6521	// The branch weights correspond to the provided Condition (i.e. if Condition is
6522	// modified from the original SwitchInst, the caller must adjust the weights)
6523	static Value foldSwitchToSelect(const* SwitchCaseResultVectorTy &ResultVector,
6524	Constant DefaultResult, Value Condition,
6525	IRBuilder<> &Builder, const DataLayout &DL,
6526	ArrayRef<uint32_t> BranchWeights) {
6527	// If we are selecting between only two cases transform into a simple
6528	// select or a two-way select if default is possible.
6529	// Example:
6530	// switch (a) { %0 = icmp eq i32 %a, 10
6531	// case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6532	// case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6533	// default: return 4; %3 = select i1 %2, i32 2, i32 %1
6534	// }
6535
6536	const bool HasBranchWeights =
6537	!BranchWeights.empty() && !ProfcheckDisableMetadataFixes;
6538
6539	if (ResultVector.size() == `2` && ResultVector [`0`].second.size() == `1` &&
6540	ResultVector [`1`].second.size() == `1`) {
6541	ConstantInt *FirstCase = ResultVector [`0`].second [`0`];
6542	ConstantInt *SecondCase = ResultVector [`1`].second [`0`];
6543	Value *SelectValue = ResultVector [`1`].first;
6544	if (DefaultResult) {
6545	Value *ValueCompare =
6546	Builder.CreateICmpEQ(LHS: Condition, RHS: SecondCase, Name: "switch.selectcmp");
6547	SelectValue = Builder.CreateSelect(C: ValueCompare, True: ResultVector [`1`].first,
6548	False: DefaultResult, Name: "switch.select");
6549	if (auto *SI = dyn_cast<SelectInst>(Val: SelectValue);
6550	SI && HasBranchWeights) {
6551	// We start with 3 probabilities, where the numerator is the
6552	// corresponding BranchWeights[i], and the denominator is the sum over
6553	// BranchWeights. We want the probability and negative probability of
6554	// Condition == SecondCase.
6555	assert(BranchWeights.size() == `3`);
6556	setBranchWeights(
6557	I&: *SI, Weights: {BranchWeights [`2`], BranchWeights [`0`] + BranchWeights [`1`]},
6558	/IsExpected=/false, /ElideAllZero=/true);
6559	}
6560	}
6561	Value *ValueCompare =
6562	Builder.CreateICmpEQ(LHS: Condition, RHS: FirstCase, Name: "switch.selectcmp");
6563	Value *Ret = Builder.CreateSelect(C: ValueCompare, True: ResultVector [`0`].first,
6564	False: SelectValue, Name: "switch.select");
6565	if (auto *SI = dyn_cast<SelectInst>(Val: Ret); SI && HasBranchWeights) {
6566	// We may have had a DefaultResult. Base the position of the first and
6567	// second's branch weights accordingly. Also the proability that Condition
6568	// != FirstCase needs to take that into account.
6569	assert(BranchWeights.size() >= `2`);
6570	size_t FirstCasePos = (Condition != nullptr);
6571	size_t SecondCasePos = FirstCasePos + `1`;
6572	uint32_t DefaultCase = (Condition != nullptr) ? BranchWeights [`0`] : `0`;
6573	setBranchWeights(I&: *SI,
6574	Weights: {BranchWeights [FirstCasePos],
6575	DefaultCase + BranchWeights [SecondCasePos]},
6576	/IsExpected=/false, /ElideAllZero=/true);
6577	}
6578	return Ret;
6579	}
6580
6581	// Handle the degenerate case where two cases have the same result value.
6582	if (ResultVector.size() == `1` && DefaultResult) {
6583	ArrayRef<ConstantInt *> CaseValues = ResultVector [`0`].second;
6584	unsigned CaseCount = CaseValues.size();
6585	// n bits group cases map to the same result:
6586	// case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6587	// case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6588	// case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6589	if (isPowerOf2_32(Value: CaseCount)) {
6590	ConstantInt *MinCaseVal = CaseValues [`0`];
6591	// If there are bits that are set exclusively by CaseValues, we
6592	// can transform the switch into a select if the conjunction of
6593	// all the values uniquely identify CaseValues.
6594	APInt AndMask = APInt::getAllOnes(numBits: MinCaseVal->getBitWidth());
6595
6596	// Find the minimum value and compute the and of all the case values.
6597	for (auto *Case : CaseValues) {
6598	if (Case->getValue().slt(RHS: MinCaseVal->getValue()))
6599	MinCaseVal = Case;
6600	AndMask &= Case->getValue();
6601	}
6602	KnownBits Known = computeKnownBits(V: Condition, DL);
6603
6604	if (!AndMask.isZero() && Known.getMaxValue().uge(RHS: AndMask)) {
6605	// Compute the number of bits that are free to vary.
6606	unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6607
6608	// Check if the number of values covered by the mask is equal
6609	// to the number of cases.
6610	if (FreeBits == Log2_32(Value: CaseCount)) {
6611	Value *And = Builder.CreateAnd(LHS: Condition, RHS: AndMask);
6612	Value *Cmp = Builder.CreateICmpEQ(
6613	LHS: And, RHS: Constant::getIntegerValue(Ty: And->getType(), V: AndMask));
6614	Value *Ret =
6615	Builder.CreateSelect(C: Cmp, True: ResultVector [`0`].first, False: DefaultResult);
6616	if (auto *SI = dyn_cast<SelectInst>(Val: Ret); SI && HasBranchWeights) {
6617	// We know there's a Default case. We base the resulting branch
6618	// weights off its probability.
6619	assert(BranchWeights.size() >= `2`);
6620	setBranchWeights(
6621	I&: *SI,
6622	Weights: {accumulate(Range: drop_begin(RangeOrContainer&: BranchWeights), Init: `0U`), BranchWeights [`0`]},
6623	/IsExpected=/false, /ElideAllZero=/true);
6624	}
6625	return Ret;
6626	}
6627	}
6628
6629	// Mark the bits case number touched.
6630	APInt BitMask = APInt::getZero(numBits: MinCaseVal->getBitWidth());
6631	for (auto *Case : CaseValues)
6632	BitMask \|= (Case->getValue() - MinCaseVal->getValue());
6633
6634	// Check if cases with the same result can cover all number
6635	// in touched bits.
6636	if (BitMask.popcount() == Log2_32(Value: CaseCount)) {
6637	if (!MinCaseVal->isNullValue())
6638	Condition = Builder.CreateSub(LHS: Condition, RHS: MinCaseVal);
6639	Value *And = Builder.CreateAnd(LHS: Condition, RHS: ~BitMask, Name: "switch.and");
6640	Value *Cmp = Builder.CreateICmpEQ(
6641	LHS: And, RHS: Constant::getNullValue(Ty: And->getType()), Name: "switch.selectcmp");
6642	Value *Ret =
6643	Builder.CreateSelect(C: Cmp, True: ResultVector [`0`].first, False: DefaultResult);
6644	if (auto *SI = dyn_cast<SelectInst>(Val: Ret); SI && HasBranchWeights) {
6645	assert(BranchWeights.size() >= `2`);
6646	setBranchWeights(
6647	I&: *SI,
6648	Weights: {accumulate(Range: drop_begin(RangeOrContainer&: BranchWeights), Init: `0U`), BranchWeights [`0`]},
6649	/IsExpected=/false, /ElideAllZero=/true);
6650	}
6651	return Ret;
6652	}
6653	}
6654
6655	// Handle the degenerate case where two cases have the same value.
6656	if (CaseValues.size() == `2`) {
6657	Value *Cmp1 = Builder.CreateICmpEQ(LHS: Condition, RHS: CaseValues [`0`],
6658	Name: "switch.selectcmp.case1");
6659	Value *Cmp2 = Builder.CreateICmpEQ(LHS: Condition, RHS: CaseValues [`1`],
6660	Name: "switch.selectcmp.case2");
6661	Value *Cmp = Builder.CreateOr(LHS: Cmp1, RHS: Cmp2, Name: "switch.selectcmp");
6662	Value *Ret =
6663	Builder.CreateSelect(C: Cmp, True: ResultVector [`0`].first, False: DefaultResult);
6664	if (auto *SI = dyn_cast<SelectInst>(Val: Ret); SI && HasBranchWeights) {
6665	assert(BranchWeights.size() >= `2`);
6666	setBranchWeights(
6667	I&: *SI, Weights: {accumulate(Range: drop_begin(RangeOrContainer&: BranchWeights), Init: `0U`), BranchWeights [`0`]},
6668	/IsExpected=/false, /ElideAllZero=/true);
6669	}
6670	return Ret;
6671	}
6672	}
6673
6674	return nullptr;
6675	}
6676
6677	// Helper function to cleanup a switch instruction that has been converted into
6678	// a select, fixing up PHI nodes and basic blocks.
6679	static void removeSwitchAfterSelectFold(SwitchInst SI, PHINode PHI,
6680	Value *SelectValue,
6681	IRBuilder<> &Builder,
6682	DomTreeUpdater *DTU) {
6683	std::vector<DominatorTree::UpdateType> Updates;
6684
6685	BasicBlock *SelectBB = SI->getParent();
6686	BasicBlock *DestBB = PHI->getParent();
6687
6688	if (DTU && !is_contained(Range: predecessors(BB: DestBB), Element: SelectBB))
6689	Updates.push_back(x: {DominatorTree::Insert, SelectBB, DestBB});
6690	Builder.CreateBr(Dest: DestBB);
6691
6692	// Remove the switch.
6693
6694	PHI->removeIncomingValueIf(
6695	Predicate: [&](unsigned Idx) { return PHI->getIncomingBlock(i: Idx) == SelectBB; });
6696	PHI->addIncoming(V: SelectValue, BB: SelectBB);
6697
6698	SmallPtrSet<BasicBlock *, `4`> RemovedSuccessors;
6699	for (unsigned i = `0`, e = SI->getNumSuccessors(); i < e; ++i) {
6700	BasicBlock *Succ = SI->getSuccessor(idx: i);
6701
6702	if (Succ == DestBB)
6703	continue;
6704	Succ->removePredecessor(Pred: SelectBB);
6705	if (DTU && RemovedSuccessors.insert(Ptr: Succ).second)
6706	Updates.push_back(x: {DominatorTree::Delete, SelectBB, Succ});
6707	}
6708	SI->eraseFromParent();
6709	if (DTU)
6710	DTU->applyUpdates(Updates);
6711	}
6712
6713	/// If a switch is only used to initialize one or more phi nodes in a common
6714	/// successor block with only two different constant values, try to replace the
6715	/// switch with a select. Returns true if the fold was made.
6716	static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
6717	DomTreeUpdater DTU, const* DataLayout &DL,
6718	const TargetTransformInfo &TTI) {
6719	Value *const Cond = SI->getCondition();
6720	PHINode PHI = nullptr*;
6721	BasicBlock CommonDest = nullptr*;
6722	Constant *DefaultResult;
6723	SwitchCaseResultVectorTy UniqueResults;
6724	// Collect all the cases that will deliver the same value from the switch.
6725	if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6726	DL, TTI, /MaxUniqueResults/ `2`))
6727	return false;
6728
6729	assert(PHI != nullptr && "PHI for value select not found");
6730	Builder.SetInsertPoint(SI);
6731	SmallVector<uint32_t, `4`> BranchWeights;
6732	if (!ProfcheckDisableMetadataFixes) {
6733	[[maybe_unused]] auto HasWeights =
6734	extractBranchWeights(ProfileData: getBranchWeightMDNode(I: *SI), Weights&: BranchWeights);
6735	assert(!HasWeights == (BranchWeights.empty()));
6736	}
6737	assert(BranchWeights.empty() \|\|
6738	(BranchWeights.size() >=
6739	UniqueResults.size() + (DefaultResult != nullptr)));
6740
6741	Value *SelectValue = foldSwitchToSelect(ResultVector: UniqueResults, DefaultResult, Condition: Cond,
6742	Builder, DL, BranchWeights);
6743	if (!SelectValue)
6744	return false;
6745
6746	removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6747	return true;
6748	}
6749
6750	namespace {
6751
6752	/// This class finds alternatives for switches to ultimately
6753	/// replace the switch.
6754	class SwitchReplacement {
6755	public:
6756	/// Create a helper for optimizations to use as a switch replacement.
6757	/// Find a better representation for the content of Values,
6758	/// using DefaultValue to fill any holes in the table.
6759	SwitchReplacement(
6760	Module &M, uint64_t TableSize, ConstantInt *Offset,
6761	const SmallVectorImpl<std::pair<ConstantInt , Constant >> &Values,
6762	Constant DefaultValue, const* DataLayout &DL, const StringRef &FuncName);
6763
6764	/// Build instructions with Builder to retrieve values using Index
6765	/// and replace the switch.
6766	Value replaceSwitch(Value Index, IRBuilder<> &Builder, const DataLayout &DL,
6767	Function *Func);
6768
6769	/// Return true if a table with TableSize elements of
6770	/// type ElementType would fit in a target-legal register.
6771	static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6772	Type *ElementType);
6773
6774	/// Return the default value of the switch.
6775	Constant *getDefaultValue();
6776
6777	/// Return true if the replacement is a lookup table.
6778	bool isLookupTable();
6779
6780	/// Return true if the replacement is a bit map.
6781	bool isBitMap();
6782
6783	private:
6784	// Depending on the switch, there are different alternatives.
6785	enum {
6786	// For switches where each case contains the same value, we just have to
6787	// store that single value and return it for each lookup.
6788	SingleValueKind,
6789
6790	// For switches where there is a linear relationship between table index
6791	// and values. We calculate the result with a simple multiplication
6792	// and addition instead of a table lookup.
6793	LinearMapKind,
6794
6795	// For small tables with integer elements, we can pack them into a bitmap
6796	// that fits into a target-legal register. Values are retrieved by
6797	// shift and mask operations.
6798	BitMapKind,
6799
6800	// The table is stored as an array of values. Values are retrieved by load
6801	// instructions from the table.
6802	LookupTableKind
6803	} Kind;
6804
6805	// The default value of the switch.
6806	Constant *DefaultValue;
6807
6808	// The type of the output values.
6809	Type *ValueType;
6810
6811	// For SingleValueKind, this is the single value.
6812	Constant SingleValue = nullptr*;
6813
6814	// For BitMapKind, this is the bitmap.
6815	ConstantInt BitMap = nullptr*;
6816	IntegerType BitMapElementTy = nullptr*;
6817
6818	// For LinearMapKind, these are the constants used to derive the value.
6819	ConstantInt LinearOffset = nullptr*;
6820	ConstantInt LinearMultiplier = nullptr*;
6821	bool LinearMapValWrapped = false;
6822
6823	// For LookupTableKind, this is the table.
6824	Constant Initializer = nullptr*;
6825	};
6826
6827	} // end anonymous namespace
6828
6829	SwitchReplacement::SwitchReplacement(
6830	Module &M, uint64_t TableSize, ConstantInt *Offset,
6831	const SmallVectorImpl<std::pair<ConstantInt , Constant >> &Values,
6832	Constant DefaultValue, const* DataLayout &DL, const StringRef &FuncName)
6833	: DefaultValue(DefaultValue) {
6834	assert(Values.size() && "Can't build lookup table without values!");
6835	assert(TableSize >= Values.size() && "Can't fit values in table!");
6836
6837	// If all values in the table are equal, this is that value.
6838	SingleValue = Values.begin()->second;
6839
6840	ValueType = Values.begin()->second->getType();
6841
6842	// Build up the table contents.
6843	SmallVector<Constant *, `64`> TableContents(TableSize);
6844	for (const auto &[CaseVal, CaseRes] : Values) {
6845	assert(CaseRes->getType() == ValueType);
6846
6847	uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6848	TableContents [Idx] = CaseRes;
6849
6850	if (SingleValue && !isa<PoisonValue>(Val: CaseRes) && CaseRes != SingleValue)
6851	SingleValue = isa<PoisonValue>(Val: SingleValue) ? CaseRes : nullptr;
6852	}
6853
6854	// Fill in any holes in the table with the default result.
6855	if (Values.size() < TableSize) {
6856	assert(DefaultValue &&
6857	"Need a default value to fill the lookup table holes.");
6858	assert(DefaultValue->getType() == ValueType);
6859	for (uint64_t I = `0`; I < TableSize; ++I) {
6860	if (!TableContents [I])
6861	TableContents [I] = DefaultValue;
6862	}
6863
6864	// If the default value is poison, all the holes are poison.
6865	bool DefaultValueIsPoison = isa<PoisonValue>(Val: DefaultValue);
6866
6867	if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6868	SingleValue = nullptr;
6869	}
6870
6871	// If each element in the table contains the same value, we only need to store
6872	// that single value.
6873	if (SingleValue) {
6874	Kind = SingleValueKind;
6875	return;
6876	}
6877
6878	// Check if we can derive the value with a linear transformation from the
6879	// table index.
6880	if (isa<IntegerType>(Val: ValueType)) {
6881	bool LinearMappingPossible = true;
6882	APInt PrevVal;
6883	APInt DistToPrev;
6884	// When linear map is monotonic and signed overflow doesn't happen on
6885	// maximum index, we can attach nsw on Add and Mul.
6886	bool NonMonotonic = false;
6887	assert(TableSize >= `2` && "Should be a SingleValue table.");
6888	// Check if there is the same distance between two consecutive values.
6889	for (uint64_t I = `0`; I < TableSize; ++I) {
6890	ConstantInt *ConstVal = dyn_cast<ConstantInt>(Val: TableContents [I]);
6891
6892	if (!ConstVal && isa<PoisonValue>(Val: TableContents [I])) {
6893	// This is an poison, so it's (probably) a lookup table hole.
6894	// To prevent any regressions from before we switched to using poison as
6895	// the default value, holes will fall back to using the first value.
6896	// This can be removed once we add proper handling for poisons in lookup
6897	// tables.
6898	ConstVal = dyn_cast<ConstantInt>(Val: Values [`0`].second);
6899	}
6900
6901	if (!ConstVal) {
6902	// This is an undef. We could deal with it, but undefs in lookup tables
6903	// are very seldom. It's probably not worth the additional complexity.
6904	LinearMappingPossible = false;
6905	break;
6906	}
6907	const APInt &Val = ConstVal->getValue();
6908	if (I != `0`) {
6909	APInt Dist = Val - PrevVal;
6910	if (I == `1`) {
6911	DistToPrev = Dist;
6912	} else if (Dist != DistToPrev) {
6913	LinearMappingPossible = false;
6914	break;
6915	}
6916	NonMonotonic \|=
6917	Dist.isStrictlyPositive() ? Val.sle(RHS: PrevVal) : Val.sgt(RHS: PrevVal);
6918	}
6919	PrevVal = Val;
6920	}
6921	if (LinearMappingPossible) {
6922	LinearOffset = cast<ConstantInt>(Val: TableContents [`0`]);
6923	LinearMultiplier = ConstantInt::get(Context&: M.getContext(), V: DistToPrev);
6924	APInt M = LinearMultiplier->getValue();
6925	bool MayWrap = true;
6926	if (isIntN(N: M.getBitWidth(), x: TableSize - `1`))
6927	(void)M.smul_ov(RHS: APInt (M.getBitWidth(), TableSize - `1`), Overflow&: MayWrap);
6928	LinearMapValWrapped = NonMonotonic \|\| MayWrap;
6929	Kind = LinearMapKind;
6930	return;
6931	}
6932	}
6933
6934	// If the type is integer and the table fits in a register, build a bitmap.
6935	if (wouldFitInRegister(DL, TableSize, ElementType: ValueType)) {
6936	IntegerType *IT = cast<IntegerType>(Val: ValueType);
6937	APInt TableInt(TableSize * IT->getBitWidth(), `0`);
6938	for (uint64_t I = TableSize; I > `0`; --I) {
6939	TableInt <<= IT->getBitWidth();
6940	// Insert values into the bitmap. Undef values are set to zero.
6941	if (!isa<UndefValue>(Val: TableContents [I - `1`])) {
6942	ConstantInt *Val = cast<ConstantInt>(Val: TableContents [I - `1`]);
6943	TableInt \|= Val->getValue().zext(width: TableInt.getBitWidth());
6944	}
6945	}
6946	BitMap = ConstantInt::get(Context&: M.getContext(), V: TableInt);
6947	BitMapElementTy = IT;
6948	Kind = BitMapKind;
6949	return;
6950	}
6951
6952	// Store the table in an array.
6953	auto *TableTy = ArrayType::get(ElementType: ValueType, NumElements: TableSize);
6954	Initializer = ConstantArray::get(T: TableTy, V: TableContents);
6955
6956	Kind = LookupTableKind;
6957	}
6958
6959	Value SwitchReplacement::replaceSwitch(Value Index, IRBuilder<> &Builder,
6960	const DataLayout &DL, Function *Func) {
6961	switch (Kind) {
6962	case SingleValueKind:
6963	return SingleValue;
6964	case LinearMapKind: {
6965	++NumLinearMaps;
6966	// Derive the result value from the input value.
6967	Value *Result = Builder.CreateIntCast(V: Index, DestTy: LinearMultiplier->getType(),
6968	isSigned: false, Name: "switch.idx.cast");
6969	if (!LinearMultiplier->isOne())
6970	Result = Builder.CreateMul(LHS: Result, RHS: LinearMultiplier, Name: "switch.idx.mult",
6971	/HasNUW = / false,
6972	/HasNSW = / !LinearMapValWrapped);
6973
6974	if (!LinearOffset->isZero())
6975	Result = Builder.CreateAdd(LHS: Result, RHS: LinearOffset, Name: "switch.offset",
6976	/HasNUW = / false,
6977	/HasNSW = / !LinearMapValWrapped);
6978	return Result;
6979	}
6980	case BitMapKind: {
6981	++NumBitMaps;
6982	// Type of the bitmap (e.g. i59).
6983	IntegerType *MapTy = BitMap->getIntegerType();
6984
6985	// Cast Index to the same type as the bitmap.
6986	// Note: The Index is <= the number of elements in the table, so
6987	// truncating it to the width of the bitmask is safe.
6988	Value *ShiftAmt = Builder.CreateZExtOrTrunc(V: Index, DestTy: MapTy, Name: "switch.cast");
6989
6990	// Multiply the shift amount by the element width. NUW/NSW can always be
6991	// set, because wouldFitInRegister guarantees Index ShiftAmt is in*
6992	// BitMap's bit width.
6993	ShiftAmt = Builder.CreateMul(
6994	LHS: ShiftAmt, RHS: ConstantInt::get(Ty: MapTy, V: BitMapElementTy->getBitWidth()),
6995	Name: "switch.shiftamt",/HasNUW =/true,/HasNSW =/true);
6996
6997	// Shift down.
6998	Value *DownShifted =
6999	Builder.CreateLShr(LHS: BitMap, RHS: ShiftAmt, Name: "switch.downshift");
7000	// Mask off.
7001	return Builder.CreateTrunc(V: DownShifted, DestTy: BitMapElementTy, Name: "switch.masked");
7002	}
7003	case LookupTableKind: {
7004	++NumLookupTables;
7005	auto *Table =
7006	new GlobalVariable (*Func->getParent(), Initializer->getType(),
7007	/isConstant=/true, GlobalVariable::PrivateLinkage,
7008	Initializer, "switch.table." + Func->getName());
7009	Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
7010	// Set the alignment to that of an array items. We will be only loading one
7011	// value out of it.
7012	Table->setAlignment(DL.getPrefTypeAlign(Ty: ValueType));
7013	Type *IndexTy = DL.getIndexType(PtrTy: Table->getType());
7014	auto *ArrayTy = cast<ArrayType>(Val: Table->getValueType());
7015
7016	if (Index->getType() != IndexTy) {
7017	unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
7018	Index = Builder.CreateZExtOrTrunc(V: Index, DestTy: IndexTy);
7019	if (auto *Zext = dyn_cast<ZExtInst>(Val: Index))
7020	Zext->setNonNeg(
7021	isUIntN(N: OldBitWidth - `1`, x: ArrayTy->getNumElements() - `1`));
7022	}
7023
7024	Value *GEPIndices[] = {ConstantInt::get(Ty: IndexTy, V: `0`), Index};
7025	Value *GEP =
7026	Builder.CreateInBoundsGEP(Ty: ArrayTy, Ptr: Table, IdxList: GEPIndices, Name: "switch.gep");
7027	return Builder.CreateLoad(Ty: ArrayTy->getElementType(), Ptr: GEP, Name: "switch.load");
7028	}
7029	}
7030	llvm_unreachable("Unknown helper kind!");
7031	}
7032
7033	bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL,
7034	uint64_t TableSize,
7035	Type *ElementType) {
7036	auto *IT = dyn_cast<IntegerType>(Val: ElementType);
7037	if (!IT)
7038	return false;
7039	// FIXME: If the type is wider than it needs to be, e.g. i8 but all values
7040	// are <= 15, we could try to narrow the type.
7041
7042	// Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
7043	if (TableSize >= UINT_MAX / IT->getBitWidth())
7044	return false;
7045	return DL.fitsInLegalInteger(Width: TableSize * IT->getBitWidth());
7046	}
7047
7048	static bool isTypeLegalForLookupTable(Type Ty, const* TargetTransformInfo &TTI,
7049	const DataLayout &DL) {
7050	// Allow any legal type.
7051	if (TTI.isTypeLegal(Ty))
7052	return true;
7053
7054	auto *IT = dyn_cast<IntegerType>(Val: Ty);
7055	if (!IT)
7056	return false;
7057
7058	// Also allow power of 2 integer types that have at least 8 bits and fit in
7059	// a register. These types are common in frontend languages and targets
7060	// usually support loads of these types.
7061	// TODO: We could relax this to any integer that fits in a register and rely
7062	// on ABI alignment and padding in the table to allow the load to be widened.
7063	// Or we could widen the constants and truncate the load.
7064	unsigned BitWidth = IT->getBitWidth();
7065	return BitWidth >= `8` && isPowerOf2_32(Value: BitWidth) &&
7066	DL.fitsInLegalInteger(Width: IT->getBitWidth());
7067	}
7068
7069	Constant SwitchReplacement::getDefaultValue() { return* DefaultValue; }
7070
7071	bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; }
7072
7073	bool SwitchReplacement::isBitMap() { return Kind == BitMapKind; }
7074
7075	static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
7076	// 40% is the default density for building a jump table in optsize/minsize
7077	// mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
7078	// function was based on.
7079	const uint64_t MinDensity = `40`;
7080
7081	if (CaseRange >= UINT64_MAX / `100`)
7082	return false; // Avoid multiplication overflows below.
7083
7084	return NumCases * `100` >= CaseRange * MinDensity;
7085	}
7086
7087	static bool isSwitchDense(ArrayRef<int64_t> Values) {
7088	uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
7089	uint64_t Range = Diff + `1`;
7090	if (Range < Diff)
7091	return false; // Overflow.
7092
7093	return isSwitchDense(NumCases: Values.size(), CaseRange: Range);
7094	}
7095
7096	/// Determine whether a lookup table should be built for this switch, based on
7097	/// the number of cases, size of the table, and the types of the results.
7098	// TODO: We could support larger than legal types by limiting based on the
7099	// number of loads required and/or table size. If the constants are small we
7100	// could use smaller table entries and extend after the load.
7101	static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
7102	const TargetTransformInfo &TTI,
7103	const DataLayout &DL,
7104	const SmallVector<Type *> &ResultTypes) {
7105	if (SI->getNumCases() > TableSize)
7106	return false; // TableSize overflowed.
7107
7108	bool AllTablesFitInRegister = true;
7109	bool HasIllegalType = false;
7110	for (const auto &Ty : ResultTypes) {
7111	// Saturate this flag to true.
7112	HasIllegalType = HasIllegalType \|\| !isTypeLegalForLookupTable(Ty, TTI, DL);
7113
7114	// Saturate this flag to false.
7115	AllTablesFitInRegister =
7116	AllTablesFitInRegister &&
7117	SwitchReplacement::wouldFitInRegister(DL, TableSize, ElementType: Ty);
7118
7119	// If both flags saturate, we're done. NOTE: This only* works with*
7120	// saturating flags, and all flags have to saturate first due to the
7121	// non-deterministic behavior of iterating over a dense map.
7122	if (HasIllegalType && !AllTablesFitInRegister)
7123	break;
7124	}
7125
7126	// If each table would fit in a register, we should build it anyway.
7127	if (AllTablesFitInRegister)
7128	return true;
7129
7130	// Don't build a table that doesn't fit in-register if it has illegal types.
7131	if (HasIllegalType)
7132	return false;
7133
7134	return isSwitchDense(NumCases: SI->getNumCases(), CaseRange: TableSize);
7135	}
7136
7137	static bool shouldUseSwitchConditionAsTableIndex(
7138	ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
7139	bool HasDefaultResults, const SmallVector<Type *> &ResultTypes,
7140	const DataLayout &DL, const TargetTransformInfo &TTI) {
7141	if (MinCaseVal.isNullValue())
7142	return true;
7143	if (MinCaseVal.isNegative() \|\|
7144	MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() \|\|
7145	!HasDefaultResults)
7146	return false;
7147	return all_of(Range: ResultTypes, P: [&](const auto &ResultType) {
7148	return SwitchReplacement::wouldFitInRegister(
7149	DL, TableSize: MaxCaseVal.getLimitedValue() + `1` / TableSize /, ElementType: ResultType);
7150	});
7151	}
7152
7153	/// Try to reuse the switch table index compare. Following pattern:
7154	/// \code
7155	/// if (idx < tablesize)
7156	/// r = table[idx]; // table does not contain default_value
7157	/// else
7158	/// r = default_value;
7159	/// if (r != default_value)
7160	/// ...
7161	/// \endcode
7162	/// Is optimized to:
7163	/// \code
7164	/// cond = idx < tablesize;
7165	/// if (cond)
7166	/// r = table[idx];
7167	/// else
7168	/// r = default_value;
7169	/// if (cond)
7170	/// ...
7171	/// \endcode
7172	/// Jump threading will then eliminate the second if(cond).
7173	static void reuseTableCompare(
7174	User PhiUser, BasicBlock PhiBlock, CondBrInst *RangeCheckBranch,
7175	Constant *DefaultValue,
7176	const SmallVectorImpl<std::pair<ConstantInt , Constant >> &Values) {
7177	ICmpInst *CmpInst = dyn_cast<ICmpInst>(Val: PhiUser);
7178	if (!CmpInst)
7179	return;
7180
7181	// We require that the compare is in the same block as the phi so that jump
7182	// threading can do its work afterwards.
7183	if (CmpInst->getParent() != PhiBlock)
7184	return;
7185
7186	Constant *CmpOp1 = dyn_cast<Constant>(Val: CmpInst->getOperand(i_nocapture: `1`));
7187	if (!CmpOp1)
7188	return;
7189
7190	Value *RangeCmp = RangeCheckBranch->getCondition();
7191	Constant *TrueConst = ConstantInt::getTrue(Ty: RangeCmp->getType());
7192	Constant *FalseConst = ConstantInt::getFalse(Ty: RangeCmp->getType());
7193
7194	// Check if the compare with the default value is constant true or false.
7195	const DataLayout &DL = PhiBlock->getDataLayout();
7196	Constant *DefaultConst = ConstantFoldCompareInstOperands(
7197	Predicate: CmpInst->getPredicate(), LHS: DefaultValue, RHS: CmpOp1, DL);
7198	if (DefaultConst != TrueConst && DefaultConst != FalseConst)
7199	return;
7200
7201	// Check if the compare with the case values is distinct from the default
7202	// compare result.
7203	for (auto ValuePair : Values) {
7204	Constant *CaseConst = ConstantFoldCompareInstOperands(
7205	Predicate: CmpInst->getPredicate(), LHS: ValuePair.second, RHS: CmpOp1, DL);
7206	if (!CaseConst \|\| CaseConst == DefaultConst \|\|
7207	(CaseConst != TrueConst && CaseConst != FalseConst))
7208	return;
7209	}
7210
7211	// Check if the branch instruction dominates the phi node. It's a simple
7212	// dominance check, but sufficient for our needs.
7213	// Although this check is invariant in the calling loops, it's better to do it
7214	// at this late stage. Practically we do it at most once for a switch.
7215	BasicBlock *BranchBlock = RangeCheckBranch->getParent();
7216	for (BasicBlock *Pred : predecessors(BB: PhiBlock)) {
7217	if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
7218	return;
7219	}
7220
7221	if (DefaultConst == FalseConst) {
7222	// The compare yields the same result. We can replace it.
7223	CmpInst->replaceAllUsesWith(V: RangeCmp);
7224	++NumTableCmpReuses;
7225	} else {
7226	// The compare yields the same result, just inverted. We can replace it.
7227	Value *InvertedTableCmp = BinaryOperator::CreateXor(
7228	V1: RangeCmp, V2: ConstantInt::get(Ty: RangeCmp->getType(), V: `1`), Name: "inverted.cmp",
7229	InsertBefore: RangeCheckBranch->getIterator());
7230	CmpInst->replaceAllUsesWith(V: InvertedTableCmp);
7231	++NumTableCmpReuses;
7232	}
7233	}
7234
7235	/// If the switch is only used to initialize one or more phi nodes in a common
7236	/// successor block with different constant values, replace the switch with
7237	/// lookup tables.
7238	static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder,
7239	DomTreeUpdater DTU, const* DataLayout &DL,
7240	const TargetTransformInfo &TTI,
7241	bool ConvertSwitchToLookupTable) {
7242	assert(SI->getNumCases() > `1` && "Degenerate switch?");
7243
7244	BasicBlock *BB = SI->getParent();
7245	Function *Fn = BB->getParent();
7246
7247	// FIXME: If the switch is too sparse for a lookup table, perhaps we could
7248	// split off a dense part and build a lookup table for that.
7249
7250	// FIXME: This creates arrays of GEPs to constant strings, which means each
7251	// GEP needs a runtime relocation in PIC code. We should just build one big
7252	// string and lookup indices into that.
7253
7254	// Ignore switches with less than three cases. Lookup tables will not make
7255	// them faster, so we don't analyze them.
7256	if (SI->getNumCases() < `3`)
7257	return false;
7258
7259	// Figure out the corresponding result for each case value and phi node in the
7260	// common destination, as well as the min and max case values.
7261	assert(!SI->cases().empty());
7262	SwitchInst::CaseIt CI = SI->case_begin();
7263	ConstantInt *MinCaseVal = CI ->getCaseValue();
7264	ConstantInt *MaxCaseVal = CI ->getCaseValue();
7265
7266	BasicBlock CommonDest = nullptr*;
7267
7268	using ResultListTy = SmallVector<std::pair<ConstantInt , Constant >, `4`>;
7269	SmallDenseMap<PHINode *, ResultListTy> ResultLists;
7270
7271	SmallDenseMap<PHINode , Constant > DefaultResults;
7272	SmallVector<Type *> ResultTypes;
7273	SmallVector<PHINode *, `4`> PHIs;
7274
7275	for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
7276	ConstantInt *CaseVal = CI ->getCaseValue();
7277	if (CaseVal->getValue().slt(RHS: MinCaseVal->getValue()))
7278	MinCaseVal = CaseVal;
7279	if (CaseVal->getValue().sgt(RHS: MaxCaseVal->getValue()))
7280	MaxCaseVal = CaseVal;
7281
7282	// Resulting value at phi nodes for this case value.
7283	using ResultsTy = SmallVector<std::pair<PHINode , Constant >, `4`>;
7284	ResultsTy Results;
7285	if (!getCaseResults(SI, CaseVal, CaseDest: CI ->getCaseSuccessor(), CommonDest: &CommonDest,
7286	Res&: Results, DL, TTI))
7287	return false;
7288
7289	// Append the result and result types from this case to the list for each
7290	// phi.
7291	for (const auto &I : Results) {
7292	PHINode *PHI = I.first;
7293	Constant *Value = I.second;
7294	auto [It, Inserted] = ResultLists.try_emplace(Key: PHI);
7295	if (Inserted)
7296	PHIs.push_back(Elt: PHI);
7297	It ->second.push_back(Elt: std::make_pair(x&: CaseVal, y&: Value));
7298	ResultTypes.push_back(Elt: PHI->getType());
7299	}
7300	}
7301
7302	// If the table has holes, we need a constant result for the default case
7303	// or a bitmask that fits in a register.
7304	SmallVector<std::pair<PHINode , Constant >, `4`> DefaultResultsList;
7305	bool HasDefaultResults =
7306	getCaseResults(SI, CaseVal: nullptr, CaseDest: SI->getDefaultDest(), CommonDest: &CommonDest,
7307	Res&: DefaultResultsList, DL, TTI);
7308	for (const auto &I : DefaultResultsList) {
7309	PHINode *PHI = I.first;
7310	Constant *Result = I.second;
7311	DefaultResults [PHI] = Result;
7312	}
7313
7314	bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
7315	MinCaseVal&: MinCaseVal, MaxCaseVal: MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
7316	uint64_t TableSize;
7317	ConstantInt *TableIndexOffset;
7318	if (UseSwitchConditionAsTableIndex) {
7319	TableSize = MaxCaseVal->getLimitedValue() + `1`;
7320	TableIndexOffset = ConstantInt::get(Ty: MaxCaseVal->getIntegerType(), V: `0`);
7321	} else {
7322	TableSize =
7323	(MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + `1`;
7324
7325	TableIndexOffset = MinCaseVal;
7326	}
7327
7328	// If the default destination is unreachable, or if the lookup table covers
7329	// all values of the conditional variable, branch directly to the lookup table
7330	// BB. Otherwise, check that the condition is within the case range.
7331	uint64_t NumResults = ResultLists [PHIs [`0`]].size();
7332	bool DefaultIsReachable = !SI->defaultDestUnreachable();
7333
7334	bool TableHasHoles = (NumResults < TableSize);
7335
7336	// If the table has holes but the default destination doesn't produce any
7337	// constant results, the lookup table entries corresponding to the holes will
7338	// contain poison.
7339	bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7340
7341	// If the default destination doesn't produce a constant result but is still
7342	// reachable, and the lookup table has holes, we need to use a mask to
7343	// determine if the current index should load from the lookup table or jump
7344	// to the default case.
7345	// The mask is unnecessary if the table has holes but the default destination
7346	// is unreachable, as in that case the holes must also be unreachable.
7347	bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7348	if (NeedMask) {
7349	// As an extra penalty for the validity test we require more cases.
7350	if (SI->getNumCases() < `4`) // FIXME: Find best threshold value (benchmark).
7351	return false;
7352	if (!DL.fitsInLegalInteger(Width: TableSize))
7353	return false;
7354	}
7355
7356	if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7357	return false;
7358
7359	// Compute the table index value.
7360	Value *TableIndex;
7361	if (UseSwitchConditionAsTableIndex) {
7362	TableIndex = SI->getCondition();
7363	if (HasDefaultResults) {
7364	// Grow the table to cover all possible index values to avoid the range
7365	// check. It will use the default result to fill in the table hole later,
7366	// so make sure it exist.
7367	ConstantRange CR =
7368	computeConstantRange(V: TableIndex, / ForSigned / false);
7369	// Grow the table shouldn't have any size impact by checking
7370	// wouldFitInRegister.
7371	// TODO: Consider growing the table also when it doesn't fit in a register
7372	// if no optsize is specified.
7373	const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7374	if (!CR.isUpperWrapped() &&
7375	all_of(Range&: ResultTypes, P: [&](const auto &ResultType) {
7376	return SwitchReplacement::wouldFitInRegister(DL, TableSize: UpperBound,
7377	ElementType: ResultType);
7378	})) {
7379	// There may be some case index larger than the UpperBound (unreachable
7380	// case), so make sure the table size does not get smaller.
7381	TableSize = std::max(a: UpperBound, b: TableSize);
7382	// The default branch is unreachable after we enlarge the lookup table.
7383	// Adjust DefaultIsReachable to reuse code path.
7384	DefaultIsReachable = false;
7385	}
7386	}
7387	}
7388
7389	// Keep track of the switch replacement for each phi
7390	SmallDenseMap<PHINode *, SwitchReplacement> PhiToReplacementMap;
7391	for (PHINode *PHI : PHIs) {
7392	const auto &ResultList = ResultLists [PHI];
7393
7394	Type *ResultType = ResultList.begin()->second->getType();
7395	// Use any value to fill the lookup table holes.
7396	Constant *DefaultVal =
7397	AllHolesArePoison ? PoisonValue::get(T: ResultType) : DefaultResults [PHI];
7398	StringRef FuncName = Fn->getName();
7399	SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset,
7400	ResultList, DefaultVal, DL, FuncName);
7401	PhiToReplacementMap.insert(KV: {PHI, Replacement});
7402	}
7403
7404	bool AnyLookupTables = any_of(
7405	Range&: PhiToReplacementMap, P: [](auto &KV) { return KV.second.isLookupTable(); });
7406	bool AnyBitMaps = any_of(Range&: PhiToReplacementMap,
7407	P: [](auto &KV) { return KV.second.isBitMap(); });
7408
7409	// A few conditions prevent the generation of lookup tables:
7410	// 1. The target does not support lookup tables.
7411	// 2. The "no-jump-tables" function attribute is set.
7412	// However, these objections do not apply to other switch replacements, like
7413	// the bitmap, so we only stop here if any of these conditions are met and we
7414	// want to create a LUT. Otherwise, continue with the switch replacement.
7415	if (AnyLookupTables &&
7416	(!TTI.shouldBuildLookupTables() \|\|
7417	Fn->getFnAttribute(Kind: "no-jump-tables").getValueAsBool()))
7418	return false;
7419
7420	// In the early optimization pipeline, disable formation of lookup tables,
7421	// bit maps and mask checks, as they may inhibit further optimization.
7422	if (!ConvertSwitchToLookupTable &&
7423	(AnyLookupTables \|\| AnyBitMaps \|\| NeedMask))
7424	return false;
7425
7426	Builder.SetInsertPoint(SI);
7427	// TableIndex is the switch condition - TableIndexOffset if we don't
7428	// use the condition directly
7429	if (!UseSwitchConditionAsTableIndex) {
7430	// If the default is unreachable, all case values are s>= MinCaseVal. Then
7431	// we can try to attach nsw.
7432	bool MayWrap = true;
7433	if (!DefaultIsReachable) {
7434	APInt Res =
7435	MaxCaseVal->getValue().ssub_ov(RHS: MinCaseVal->getValue(), Overflow&: MayWrap);
7436	(void)Res;
7437	}
7438	TableIndex = Builder.CreateSub(LHS: SI->getCondition(), RHS: TableIndexOffset,
7439	Name: "switch.tableidx", /HasNUW =/false,
7440	/HasNSW =/!MayWrap);
7441	}
7442
7443	std::vector<DominatorTree::UpdateType> Updates;
7444
7445	// Compute the maximum table size representable by the integer type we are
7446	// switching upon.
7447	unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7448	uint64_t MaxTableSize = CaseSize > `63` ? UINT64_MAX : `1ULL` << CaseSize;
7449	assert(MaxTableSize >= TableSize &&
7450	"It is impossible for a switch to have more entries than the max "
7451	"representable value of its input integer type's size.");
7452
7453	// Create the BB that does the lookups.
7454	Module &Mod = *CommonDest->getParent()->getParent();
7455	BasicBlock *LookupBB = BasicBlock::Create(
7456	Context&: Mod.getContext(), Name: "switch.lookup", Parent: CommonDest->getParent(), InsertBefore: CommonDest);
7457
7458	CondBrInst RangeCheckBranch = nullptr*;
7459	CondBrInst CondBranch = nullptr*;
7460
7461	Builder.SetInsertPoint(SI);
7462	const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7463	if (!DefaultIsReachable \|\| GeneratingCoveredLookupTable) {
7464	Builder.CreateBr(Dest: LookupBB);
7465	if (DTU)
7466	Updates.push_back(x: {DominatorTree::Insert, BB, LookupBB});
7467	// Note: We call removeProdecessor later since we need to be able to get the
7468	// PHI value for the default case in case we're using a bit mask.
7469	} else {
7470	Value *Cmp = Builder.CreateICmpULT(
7471	LHS: TableIndex, RHS: ConstantInt::get(Ty: MinCaseVal->getType(), V: TableSize));
7472	RangeCheckBranch =
7473	Builder.CreateCondBr(Cond: Cmp, True: LookupBB, False: SI->getDefaultDest());
7474	CondBranch = RangeCheckBranch;
7475	if (DTU)
7476	Updates.push_back(x: {DominatorTree::Insert, BB, LookupBB});
7477	}
7478
7479	// Populate the BB that does the lookups.
7480	Builder.SetInsertPoint(LookupBB);
7481
7482	if (NeedMask) {
7483	// Before doing the lookup, we do the hole check. The LookupBB is therefore
7484	// re-purposed to do the hole check, and we create a new LookupBB.
7485	BasicBlock *MaskBB = LookupBB;
7486	MaskBB->setName("switch.hole_check");
7487	LookupBB = BasicBlock::Create(Context&: Mod.getContext(), Name: "switch.lookup",
7488	Parent: CommonDest->getParent(), InsertBefore: CommonDest);
7489
7490	// Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7491	// unnecessary illegal types.
7492	uint64_t TableSizePowOf2 = NextPowerOf2(A: std::max(a: `7ULL`, b: TableSize - `1ULL`));
7493	APInt MaskInt(TableSizePowOf2, `0`);
7494	APInt One(TableSizePowOf2, `1`);
7495	// Build bitmask; fill in a 1 bit for every case.
7496	const ResultListTy &ResultList = ResultLists [PHIs [`0`]];
7497	for (const auto &Result : ResultList) {
7498	uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7499	.getLimitedValue();
7500	MaskInt \|= One << Idx;
7501	}
7502	ConstantInt *TableMask = ConstantInt::get(Context&: Mod.getContext(), V: MaskInt);
7503
7504	// Get the TableIndex'th bit of the bitmask.
7505	// If this bit is 0 (meaning hole) jump to the default destination,
7506	// else continue with table lookup.
7507	IntegerType *MapTy = TableMask->getIntegerType();
7508	Value *MaskIndex =
7509	Builder.CreateZExtOrTrunc(V: TableIndex, DestTy: MapTy, Name: "switch.maskindex");
7510	Value *Shifted = Builder.CreateLShr(LHS: TableMask, RHS: MaskIndex, Name: "switch.shifted");
7511	Value *LoBit = Builder.CreateTrunc(
7512	V: Shifted, DestTy: Type::getInt1Ty(C&: Mod.getContext()), Name: "switch.lobit");
7513	CondBranch = Builder.CreateCondBr(Cond: LoBit, True: LookupBB, False: SI->getDefaultDest());
7514	if (DTU) {
7515	Updates.push_back(x: {DominatorTree::Insert, MaskBB, LookupBB});
7516	Updates.push_back(x: {DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7517	}
7518	Builder.SetInsertPoint(LookupBB);
7519	addPredecessorToBlock(Succ: SI->getDefaultDest(), NewPred: MaskBB, ExistPred: BB);
7520	}
7521
7522	if (!DefaultIsReachable \|\| GeneratingCoveredLookupTable) {
7523	// We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7524	// do not delete PHINodes here.
7525	SI->getDefaultDest()->removePredecessor(Pred: BB,
7526	/KeepOneInputPHIs=/true);
7527	if (DTU)
7528	Updates.push_back(x: {DominatorTree::Delete, BB, SI->getDefaultDest()});
7529	}
7530
7531	for (PHINode *PHI : PHIs) {
7532	const ResultListTy &ResultList = ResultLists [PHI];
7533	auto Replacement = PhiToReplacementMap.at(Val: PHI);
7534	auto *Result = Replacement.replaceSwitch(Index: TableIndex, Builder, DL, Func: Fn);
7535	// Do a small peephole optimization: re-use the switch table compare if
7536	// possible.
7537	if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7538	BasicBlock *PhiBlock = PHI->getParent();
7539	// Search for compare instructions which use the phi.
7540	for (auto *User : PHI->users()) {
7541	reuseTableCompare(PhiUser: User, PhiBlock, RangeCheckBranch,
7542	DefaultValue: Replacement.getDefaultValue(), Values: ResultList);
7543	}
7544	}
7545
7546	PHI->addIncoming(V: Result, BB: LookupBB);
7547	}
7548
7549	Builder.CreateBr(Dest: CommonDest);
7550	if (DTU)
7551	Updates.push_back(x: {DominatorTree::Insert, LookupBB, CommonDest});
7552
7553	SmallVector<uint32_t> BranchWeights;
7554	const bool HasBranchWeights = CondBranch && !ProfcheckDisableMetadataFixes &&
7555	extractBranchWeights(I: *SI, Weights&: BranchWeights);
7556	uint64_t ToLookupWeight = `0`;
7557	uint64_t ToDefaultWeight = `0`;
7558
7559	// Remove the switch.
7560	SmallPtrSet<BasicBlock *, `8`> RemovedSuccessors;
7561	for (unsigned I = `0`, E = SI->getNumSuccessors(); I < E; ++I) {
7562	BasicBlock *Succ = SI->getSuccessor(idx: I);
7563
7564	if (Succ == SI->getDefaultDest()) {
7565	if (HasBranchWeights)
7566	ToDefaultWeight += BranchWeights [I];
7567	continue;
7568	}
7569	Succ->removePredecessor(Pred: BB);
7570	if (DTU && RemovedSuccessors.insert(Ptr: Succ).second)
7571	Updates.push_back(x: {DominatorTree::Delete, BB, Succ});
7572	if (HasBranchWeights)
7573	ToLookupWeight += BranchWeights [I];
7574	}
7575	SI->eraseFromParent();
7576	if (HasBranchWeights)
7577	setFittedBranchWeights(I&: *CondBranch, Weights: {ToLookupWeight, ToDefaultWeight},
7578	/IsExpected=/false);
7579	if (DTU)
7580	DTU->applyUpdates(Updates);
7581
7582	if (NeedMask)
7583	++NumLookupTablesHoles;
7584	return true;
7585	}
7586
7587	/// Try to transform a switch that has "holes" in it to a contiguous sequence
7588	/// of cases.
7589	///
7590	/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7591	/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7592	///
7593	/// This converts a sparse switch into a dense switch which allows better
7594	/// lowering and could also allow transforming into a lookup table.
7595	static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
7596	const DataLayout &DL,
7597	const TargetTransformInfo &TTI) {
7598	auto *CondTy = cast<IntegerType>(Val: SI->getCondition()->getType());
7599	if (CondTy->getIntegerBitWidth() > `64` \|\|
7600	!DL.fitsInLegalInteger(Width: CondTy->getIntegerBitWidth()))
7601	return false;
7602	// Only bother with this optimization if there are more than 3 switch cases;
7603	// SDAG will only bother creating jump tables for 4 or more cases.
7604	if (SI->getNumCases() < `4`)
7605	return false;
7606
7607	// This transform is agnostic to the signedness of the input or case values. We
7608	// can treat the case values as signed or unsigned. We can optimize more common
7609	// cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7610	// as signed.
7611	SmallVector<int64_t,`4`> Values;
7612	for (const auto &C : SI->cases())
7613	Values.push_back(Elt: C.getCaseValue()->getValue().getSExtValue());
7614	llvm::sort(C&: Values);
7615
7616	// If the switch is already dense, there's nothing useful to do here.
7617	if (isSwitchDense(Values))
7618	return false;
7619
7620	// First, transform the values such that they start at zero and ascend.
7621	int64_t Base = Values [`0`];
7622	for (auto &V : Values)
7623	V -= (uint64_t)(Base);
7624
7625	// Now we have signed numbers that have been shifted so that, given enough
7626	// precision, there are no negative values. Since the rest of the transform
7627	// is bitwise only, we switch now to an unsigned representation.
7628
7629	// This transform can be done speculatively because it is so cheap - it
7630	// results in a single rotate operation being inserted.
7631
7632	// countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7633	// one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7634	// less than 64.
7635	unsigned Shift = `64`;
7636	for (auto &V : Values)
7637	Shift = std::min(a: Shift, b: (unsigned)llvm::countr_zero(Val: (uint64_t)V));
7638	assert(Shift < `64`);
7639	if (Shift > `0`)
7640	for (auto &V : Values)
7641	V = (int64_t)((uint64_t)V >> Shift);
7642
7643	if (!isSwitchDense(Values))
7644	// Transform didn't create a dense switch.
7645	return false;
7646
7647	// The obvious transform is to shift the switch condition right and emit a
7648	// check that the condition actually cleanly divided by GCD, i.e.
7649	// C & (1 << Shift - 1) == 0
7650	// inserting a new CFG edge to handle the case where it didn't divide cleanly.
7651	//
7652	// A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7653	// shift and puts the shifted-off bits in the uppermost bits. If any of these
7654	// are nonzero then the switch condition will be very large and will hit the
7655	// default case.
7656
7657	auto *Ty = cast<IntegerType>(Val: SI->getCondition()->getType());
7658	Builder.SetInsertPoint(SI);
7659	Value *Sub =
7660	Builder.CreateSub(LHS: SI->getCondition(), RHS: ConstantInt::getSigned(Ty, V: Base));
7661	Value *Rot = Builder.CreateIntrinsic(
7662	RetTy: Ty, ID: Intrinsic::fshl,
7663	Args: {Sub, Sub, ConstantInt::get(Ty, V: Ty->getBitWidth() - Shift)});
7664	SI->replaceUsesOfWith(From: SI->getCondition(), To: Rot);
7665
7666	for (auto Case : SI->cases()) {
7667	auto *Orig = Case.getCaseValue();
7668	auto Sub = Orig->getValue() - APInt (Ty->getBitWidth(), Base, true);
7669	Case.setValue(cast<ConstantInt>(Val: ConstantInt::get(Ty, V: Sub.lshr(shiftAmt: Shift))));
7670	}
7671	return true;
7672	}
7673
7674	/// Tries to transform the switch when the condition is umin with a constant.
7675	/// In that case, the default branch can be replaced by the constant's branch.
7676	/// This method also removes dead cases when the simplification cannot replace
7677	/// the default branch.
7678	///
7679	/// For example:
7680	/// switch(umin(a, 3)) {
7681	/// case 0:
7682	/// case 1:
7683	/// case 2:
7684	/// case 3:
7685	/// case 4:
7686	/// // ...
7687	/// default:
7688	/// unreachable
7689	/// }
7690	///
7691	/// Transforms into:
7692	///
7693	/// switch(a) {
7694	/// case 0:
7695	/// case 1:
7696	/// case 2:
7697	/// default:
7698	/// // This is case 3
7699	/// }
7700	static bool simplifySwitchWhenUMin(SwitchInst SI, DomTreeUpdater DTU) {
7701	Value *A;
7702	ConstantInt *Constant;
7703
7704	if (!match(V: SI->getCondition(), P: m_UMin(L: m_Value(V&: A), R: m_ConstantInt(CI&: Constant))))
7705	return false;
7706
7707	SmallVector<DominatorTree::UpdateType> Updates;
7708	SwitchInstProfUpdateWrapper SIW(*SI);
7709	BasicBlock *BB = SIW ->getParent();
7710
7711	// Dead cases are removed even when the simplification fails.
7712	// A case is dead when its value is higher than the Constant.
7713	for (auto I = SI->case_begin(), E = SI->case_end(); I != E;) {
7714	if (!I ->getCaseValue()->getValue().ugt(RHS: Constant->getValue())) {
7715	++I;
7716	continue;
7717	}
7718	BasicBlock *DeadCaseBB = I ->getCaseSuccessor();
7719	DeadCaseBB->removePredecessor(Pred: BB);
7720	Updates.push_back(Elt: {DominatorTree::Delete, BB, DeadCaseBB});
7721	I = SIW.removeCase(I);
7722	E = SIW ->case_end();
7723	}
7724
7725	auto Case = SI->findCaseValue(C: Constant);
7726	// If the case value is not found, `findCaseValue` returns the default case.
7727	// In this scenario, since there is no explicit `case 3:`, the simplification
7728	// fails. The simplification also fails when the switch’s default destination
7729	// is reachable.
7730	if (!SI->defaultDestUnreachable() \|\| Case == SI->case_default()) {
7731	if (DTU)
7732	DTU->applyUpdates(Updates);
7733	return !Updates.empty();
7734	}
7735
7736	BasicBlock *Unreachable = SI->getDefaultDest();
7737	SIW.replaceDefaultDest(I: Case);
7738	SIW.removeCase(I: Case);
7739	SIW ->setCondition(A);
7740
7741	Updates.push_back(Elt: {DominatorTree::Delete, BB, Unreachable});
7742
7743	if (DTU)
7744	DTU->applyUpdates(Updates);
7745
7746	return true;
7747	}
7748
7749	/// Tries to transform switch of powers of two to reduce switch range.
7750	/// For example, switch like:
7751	/// switch (C) { case 1: case 2: case 64: case 128: }
7752	/// will be transformed to:
7753	/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7754	///
7755	/// This transformation allows better lowering and may transform the switch
7756	/// instruction into a sequence of bit manipulation and a smaller
7757	/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7758	/// address of the jump target, and indirectly jump to it).
7759	static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
7760	DomTreeUpdater *DTU,
7761	const DataLayout &DL,
7762	const TargetTransformInfo &TTI) {
7763	Value *Condition = SI->getCondition();
7764	LLVMContext &Context = SI->getContext();
7765	auto *CondTy = cast<IntegerType>(Val: Condition->getType());
7766
7767	if (CondTy->getIntegerBitWidth() > `64` \|\|
7768	!DL.fitsInLegalInteger(Width: CondTy->getIntegerBitWidth()))
7769	return false;
7770
7771	// Ensure trailing zeroes count intrinsic emission is not too expensive.
7772	IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7773	{Condition, ConstantInt::getTrue(Context)});
7774	if (TTI.getIntrinsicInstrCost(ICA: Attrs, CostKind: TTI::TCK_SizeAndLatency) >
7775	TTI::TCC_Basic * `2`)
7776	return false;
7777
7778	// Only bother with this optimization if there are more than 3 switch cases.
7779	// SDAG will start emitting jump tables for 4 or more cases.
7780	if (SI->getNumCases() < `4`)
7781	return false;
7782
7783	// Check that switch cases are powers of two.
7784	SmallVector<uint64_t, `4`> Values;
7785	for (const auto &Case : SI->cases()) {
7786	uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7787	if (llvm::has_single_bit(Value: CaseValue))
7788	Values.push_back(Elt: CaseValue);
7789	else
7790	return false;
7791	}
7792
7793	// isSwichDense requires case values to be sorted.
7794	llvm::sort(C&: Values);
7795	if (!isSwitchDense(NumCases: Values.size(), CaseRange: llvm::countr_zero(Val: Values.back()) -
7796	llvm::countr_zero(Val: Values.front()) + `1`))
7797	// Transform is unable to generate dense switch.
7798	return false;
7799
7800	Builder.SetInsertPoint(SI);
7801
7802	if (!SI->defaultDestUnreachable()) {
7803	// Let non-power-of-two inputs jump to the default case, when the latter is
7804	// reachable.
7805	auto *PopC = Builder.CreateUnaryIntrinsic(ID: Intrinsic::ctpop, V: Condition);
7806	auto *IsPow2 = Builder.CreateICmpEQ(LHS: PopC, RHS: ConstantInt::get(Ty: CondTy, V: `1`));
7807
7808	auto *OrigBB = SI->getParent();
7809	auto *DefaultCaseBB = SI->getDefaultDest();
7810	BasicBlock *SplitBB = SplitBlock(Old: OrigBB, SplitPt: SI, DTU);
7811	auto It = OrigBB->getTerminator()->getIterator();
7812	SmallVector<uint32_t> Weights;
7813	auto HasWeights =
7814	!ProfcheckDisableMetadataFixes && extractBranchWeights(I: *SI, Weights);
7815	auto *BI = CondBrInst::Create(Cond: IsPow2, IfTrue: SplitBB, IfFalse: DefaultCaseBB, InsertBefore: It);
7816	if (HasWeights && any_of(Range&: Weights, P: not_equal_to(Arg: `0`))) {
7817	// IsPow2 covers a subset of the cases in which we'd go to the default
7818	// label. The other is those powers of 2 that don't appear in the case
7819	// statement. We don't know the distribution of the values coming in, so
7820	// the safest is to split 50-50 the original probability to `default`.
7821	uint64_t OrigDenominator =
7822	sum_of(Range: map_range(C&: Weights, F: StaticCastTo<uint64_t>));
7823	SmallVector<uint64_t> NewWeights(`2`);
7824	NewWeights [`1`] = Weights [`0`] / `2`;
7825	NewWeights [`0`] = OrigDenominator - NewWeights [`1`];
7826	setFittedBranchWeights(I&: BI, Weights: NewWeights, /IsExpected=/*false);
7827	// The probability of executing the default block stays constant. It was
7828	// p_d = Weights[0] / OrigDenominator
7829	// we rewrite as W/D
7830	// We want to find the probability of the default branch of the switch
7831	// statement. Let's call it X. We have W/D = W/2D + X (1-W/2D)*
7832	// i.e. the original probability is the probability we go to the default
7833	// branch from the BI branch, or we take the default branch on the SI.
7834	// Meaning X = W / (2D - W), or (W/2) / (D - W/2)
7835	// This matches using W/2 for the default branch probability numerator and
7836	// D-W/2 as the denominator.
7837	Weights [`0`] = NewWeights [`1`];
7838	uint64_t CasesDenominator = OrigDenominator - Weights [`0`];
7839	for (auto &W : drop_begin(RangeOrContainer&: Weights))
7840	W = NewWeights [`0`] * static_cast<double>(W) / CasesDenominator;
7841
7842	setBranchWeights(I&: SI, Weights, /IsExpected=/*false);
7843	}
7844	// BI is handling the default case for SI, and so should share its DebugLoc.
7845	BI->setDebugLoc(SI->getDebugLoc());
7846	It ->eraseFromParent();
7847
7848	addPredecessorToBlock(Succ: DefaultCaseBB, NewPred: OrigBB, ExistPred: SplitBB);
7849	if (DTU)
7850	DTU->applyUpdates(Updates: {{DominatorTree::Insert, OrigBB, DefaultCaseBB}});
7851	}
7852
7853	// Replace each case with its trailing zeros number.
7854	for (auto &Case : SI->cases()) {
7855	auto *OrigValue = Case.getCaseValue();
7856	Case.setValue(ConstantInt::get(Ty: OrigValue->getIntegerType(),
7857	V: OrigValue->getValue().countr_zero()));
7858	}
7859
7860	// Replace condition with its trailing zeros number.
7861	auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7862	ID: Intrinsic::cttz, Types: {CondTy}, Args: {Condition, ConstantInt::getTrue(Context)});
7863
7864	SI->setCondition(ConditionTrailingZeros);
7865
7866	return true;
7867	}
7868
7869	/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7870	/// the same destination.
7871	static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder,
7872	DomTreeUpdater *DTU) {
7873	auto *Cmp = dyn_cast<CmpIntrinsic>(Val: SI->getCondition());
7874	if (!Cmp \|\| !Cmp->hasOneUse())
7875	return false;
7876
7877	SmallVector<uint32_t, `4`> Weights;
7878	bool HasWeights = extractBranchWeights(ProfileData: getBranchWeightMDNode(I: *SI), Weights);
7879	if (!HasWeights)
7880	Weights.resize(N: `4`); // Avoid checking HasWeights everywhere.
7881
7882	// Normalize to [us]cmp == Res ? Succ : OtherSucc.
7883	int64_t Res;
7884	BasicBlock Succ, OtherSucc;
7885	uint32_t SuccWeight = `0`, OtherSuccWeight = `0`;
7886	BasicBlock Unreachable = nullptr*;
7887
7888	if (SI->getNumCases() == `2`) {
7889	// Find which of 1, 0 or -1 is missing (handled by default dest).
7890	SmallSet<int64_t, `3`> Missing;
7891	Missing.insert(V: `1`);
7892	Missing.insert(V: `0`);
7893	Missing.insert(V: -`1`);
7894
7895	Succ = SI->getDefaultDest();
7896	SuccWeight = Weights [`0`];
7897	OtherSucc = nullptr;
7898	for (auto &Case : SI->cases()) {
7899	std::optional<int64_t> Val =
7900	Case.getCaseValue()->getValue().trySExtValue();
7901	if (!Val)
7902	return false;
7903	if (!Missing.erase(V: *Val))
7904	return false;
7905	if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7906	return false;
7907	OtherSucc = Case.getCaseSuccessor();
7908	OtherSuccWeight += Weights [Case.getSuccessorIndex()];
7909	}
7910
7911	assert(Missing.size() == `1` && "Should have one case left");
7912	Res = *Missing.begin();
7913	} else if (SI->getNumCases() == `3` && SI->defaultDestUnreachable()) {
7914	// Normalize so that Succ is taken once and OtherSucc twice.
7915	Unreachable = SI->getDefaultDest();
7916	Succ = OtherSucc = nullptr;
7917	for (auto &Case : SI->cases()) {
7918	BasicBlock *NewSucc = Case.getCaseSuccessor();
7919	uint32_t Weight = Weights [Case.getSuccessorIndex()];
7920	if (!OtherSucc \|\| OtherSucc == NewSucc) {
7921	OtherSucc = NewSucc;
7922	OtherSuccWeight += Weight;
7923	} else if (!Succ) {
7924	Succ = NewSucc;
7925	SuccWeight = Weight;
7926	} else if (Succ == NewSucc) {
7927	std::swap(a&: Succ, b&: OtherSucc);
7928	std::swap(a&: SuccWeight, b&: OtherSuccWeight);
7929	} else
7930	return false;
7931	}
7932	for (auto &Case : SI->cases()) {
7933	std::optional<int64_t> Val =
7934	Case.getCaseValue()->getValue().trySExtValue();
7935	if (!Val \|\| (Val != `1` && Val != `0` && Val != -`1`))
7936	return false;
7937	if (Case.getCaseSuccessor() == Succ) {
7938	Res = *Val;
7939	break;
7940	}
7941	}
7942	} else {
7943	return false;
7944	}
7945
7946	// Determine predicate for the missing case.
7947	ICmpInst::Predicate Pred;
7948	switch (Res) {
7949	case `1`:
7950	Pred = ICmpInst::ICMP_UGT;
7951	break;
7952	case `0`:
7953	Pred = ICmpInst::ICMP_EQ;
7954	break;
7955	case -`1`:
7956	Pred = ICmpInst::ICMP_ULT;
7957	break;
7958	}
7959	if (Cmp->isSigned())
7960	Pred = ICmpInst::getSignedPredicate(Pred);
7961
7962	MDNode NewWeights = nullptr*;
7963	if (HasWeights)
7964	NewWeights = MDBuilder (SI->getContext())
7965	.createBranchWeights(TrueWeight: SuccWeight, FalseWeight: OtherSuccWeight);
7966
7967	BasicBlock *BB = SI->getParent();
7968	Builder.SetInsertPoint(SI->getIterator());
7969	Value *ICmp = Builder.CreateICmp(P: Pred, LHS: Cmp->getLHS(), RHS: Cmp->getRHS());
7970	Builder.CreateCondBr(Cond: ICmp, True: Succ, False: OtherSucc, BranchWeights: NewWeights,
7971	Unpredictable: SI->getMetadata(KindID: LLVMContext::MD_unpredictable));
7972	OtherSucc->removePredecessor(Pred: BB);
7973	if (Unreachable)
7974	Unreachable->removePredecessor(Pred: BB);
7975	SI->eraseFromParent();
7976	Cmp->eraseFromParent();
7977	if (DTU && Unreachable)
7978	DTU->applyUpdates(Updates: {{DominatorTree::Delete, BB, Unreachable}});
7979	return true;
7980	}
7981
7982	/// Checking whether two BBs are equal depends on the contents of the
7983	/// BasicBlock and the incoming values of their successor PHINodes.
7984	/// PHINode::getIncomingValueForBlock is O(\|Preds\|), so we'd like to avoid
7985	/// calling this function on each BasicBlock every time isEqual is called,
7986	/// especially since the same BasicBlock may be passed as an argument multiple
7987	/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7988	/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7989	/// of the incoming values.
7990	struct EqualBBWrapper {
7991	BasicBlock *BB;
7992
7993	// One Phi usually has < 8 incoming values.
7994	using BB2ValueMap = SmallDenseMap<BasicBlock , Value , `8`>;
7995	using Phi2IVsMap = DenseMap<PHINode *, BB2ValueMap>;
7996	Phi2IVsMap *PhiPredIVs;
7997
7998	// We only merge the identical non-entry BBs with
7999	// - terminator unconditional br to Succ (pending relaxation),
8000	// - does not have address taken / weird control.
8001	static bool canBeMerged(const BasicBlock *BB) {
8002	assert(BB && "Expected non-null BB");
8003	// Entry block cannot be eliminated or have predecessors.
8004	if (BB->isEntryBlock())
8005	return false;
8006
8007	// Single successor and must be Succ.
8008	// FIXME: Relax that the terminator is a BranchInst by checking for equality
8009	// on other kinds of terminators. We decide to only support unconditional
8010	// branches for now for compile time reasons.
8011	auto *BI = dyn_cast<UncondBrInst>(Val: BB->getTerminator());
8012	if (!BI)
8013	return false;
8014
8015	// Avoid blocks that are "address-taken" (blockaddress) or have unusual
8016	// uses.
8017	if (BB->hasAddressTaken() \|\| BB->isEHPad())
8018	return false;
8019
8020	// TODO: relax this condition to merge equal blocks with >1 instructions?
8021	// Here, we use a O(1) form of the O(n) comparison of `size() != 1`.
8022	if (&BB->front() != &BB->back())
8023	return false;
8024
8025	// The BB must have at least one predecessor.
8026	if (pred_empty(BB))
8027	return false;
8028
8029	return true;
8030	}
8031	};
8032
8033	template <> struct llvm::DenseMapInfo<const EqualBBWrapper *> {
8034	static const EqualBBWrapper *getEmptyKey() {
8035	return static_cast<EqualBBWrapper >(DenseMapInfo<void* *>::getEmptyKey());
8036	}
8037	static const EqualBBWrapper *getTombstoneKey() {
8038	return static_cast<EqualBBWrapper *>(
8039	DenseMapInfo<void *>::getTombstoneKey());
8040	}
8041	static unsigned getHashValue(const EqualBBWrapper *EBW) {
8042	BasicBlock *BB = EBW->BB;
8043	UncondBrInst *BI = cast<UncondBrInst>(Val: BB->getTerminator());
8044	assert(BB->size() == `1` && "Expected just a single branch in the BB");
8045
8046	// Since we assume the BB is just a single UncondBrInst with a single
8047	// successor, we hash as the BB and the incoming Values of its successor
8048	// PHIs. Initially, we tried to just use the successor BB as the hash, but
8049	// including the incoming PHI values leads to better performance.
8050	// We also tried to build a map from BB -> Succs.IncomingValues ahead of
8051	// time and passing it in EqualBBWrapper, but this slowed down the average
8052	// compile time without having any impact on the worst case compile time.
8053	BasicBlock *Succ = BI->getSuccessor();
8054	auto PhiValsForBB = map_range(C: Succ->phis(), F: [&](PHINode &Phi) {
8055	return (*EBW->PhiPredIVs)[&Phi][BB];
8056	});
8057	return hash_combine(args: Succ, args: hash_combine_range(R&: PhiValsForBB));
8058	}
8059	static bool isEqual(const EqualBBWrapper LHS, const* EqualBBWrapper *RHS) {
8060	auto EKey = DenseMapInfo<EqualBBWrapper >::getEmptyKey();
8061	auto TKey = DenseMapInfo<EqualBBWrapper >::getTombstoneKey();
8062	if (LHS == EKey \|\| RHS == EKey \|\| LHS == TKey \|\| RHS == TKey)
8063	return LHS == RHS;
8064
8065	BasicBlock *A = LHS->BB;
8066	BasicBlock *B = RHS->BB;
8067
8068	// FIXME: we checked that the size of A and B are both 1 in
8069	// mergeIdenticalUncondBBs to make the Case list smaller to
8070	// improve performance. If we decide to support BasicBlocks with more
8071	// than just a single instruction, we need to check that A.size() ==
8072	// B.size() here, and we need to check more than just the BranchInsts
8073	// for equality.
8074
8075	UncondBrInst *ABI = cast<UncondBrInst>(Val: A->getTerminator());
8076	UncondBrInst *BBI = cast<UncondBrInst>(Val: B->getTerminator());
8077	if (ABI->getSuccessor() != BBI->getSuccessor())
8078	return false;
8079
8080	// Need to check that PHIs in successor have matching values.
8081	BasicBlock *Succ = ABI->getSuccessor();
8082	auto IfPhiIVMatch = [&](PHINode &Phi) {
8083	// Replace O(\|Pred\|) Phi.getIncomingValueForBlock with this O(1) hashmap
8084	// query.
8085	auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
8086	return PredIVs [A] == PredIVs [B];
8087	};
8088	return all_of(Range: Succ->phis(), P: IfPhiIVMatch);
8089	}
8090	};
8091
8092	// Merge identical BBs into one of them.
8093	static bool mergeIdenticalBBs(ArrayRef<BasicBlock *> Candidates,
8094	DomTreeUpdater *DTU) {
8095	if (Candidates.size() < `2`)
8096	return false;
8097
8098	// Build Cases. Skip BBs that are not candidates for simplification. Mark
8099	// PHINodes which need to be processed into PhiPredIVs. We decide to process
8100	// an entire PHI at once after the loop, opposed to calling
8101	// getIncomingValueForBlock inside this loop, since each call to
8102	// getIncomingValueForBlock is O(\|Preds\|).
8103	EqualBBWrapper::Phi2IVsMap PhiPredIVs;
8104	SmallVector<EqualBBWrapper> BBs2Merge;
8105	BBs2Merge.reserve(N: Candidates.size());
8106	SmallSetVector<PHINode *, `8`> Phis;
8107
8108	for (BasicBlock *BB : Candidates) {
8109	BasicBlock *Succ = BB->getSingleSuccessor();
8110	assert(Succ && "Expected unconditional BB");
8111	BBs2Merge.emplace_back(Args: EqualBBWrapper{.BB: BB, .PhiPredIVs: &PhiPredIVs});
8112	Phis.insert_range(R: make_pointer_range(Range: Succ->phis()));
8113	}
8114
8115	// Precompute a data structure to improve performance of isEqual for
8116	// EqualBBWrapper.
8117	PhiPredIVs.reserve(NumEntries: Phis.size());
8118	for (PHINode *Phi : Phis) {
8119	auto &IVs =
8120	PhiPredIVs.try_emplace(Key: Phi, Args: Phi->getNumIncomingValues()).first ->second;
8121	// Pre-fill all incoming for O(1) lookup as Phi.getIncomingValueForBlock is
8122	// O(\|Pred\|).
8123	for (auto &IV : Phi->incoming_values())
8124	IVs.insert(KV: {Phi->getIncomingBlock(U: IV), IV.get()});
8125	}
8126
8127	// Group duplicates using DenseSet with custom equality/hashing.
8128	// Build a set such that if the EqualBBWrapper exists in the set and another
8129	// EqualBBWrapper isEqual, then the equivalent EqualBBWrapper which is not in
8130	// the set should be replaced with the one in the set. If the EqualBBWrapper
8131	// is not in the set, then it should be added to the set so other
8132	// EqualBBWrapper can check against it in the same manner. We use
8133	// EqualBBWrapper instead of just BasicBlock because we'd like to pass around
8134	// information to isEquality, getHashValue, and when doing the replacement
8135	// with better performance.
8136	DenseSet<const EqualBBWrapper *> Keep;
8137	Keep.reserve(Size: BBs2Merge.size());
8138
8139	SmallVector<DominatorTree::UpdateType> Updates;
8140	Updates.reserve(N: BBs2Merge.size() * `2`);
8141
8142	bool MadeChange = false;
8143
8144	// Helper: redirect all edges X -> DeadPred to X -> LivePred.
8145	auto RedirectIncomingEdges = [&](BasicBlock Dead, BasicBlock Live) {
8146	SmallSetVector<BasicBlock *, `8`> DeadPreds(llvm::from_range,
8147	predecessors(BB: Dead));
8148	if (DTU) {
8149	// All predecessors of DeadPred (except the common predecessor) will be
8150	// moved to LivePred.
8151	Updates.reserve(N: Updates.size() + DeadPreds.size() * `2`);
8152	SmallPtrSet<BasicBlock *, `16`> LivePreds(llvm::from_range,
8153	predecessors(BB: Live));
8154	for (BasicBlock *PredOfDead : DeadPreds) {
8155	// Do not modify those common predecessors of DeadPred and LivePred.
8156	if (!LivePreds.contains(Ptr: PredOfDead))
8157	Updates.push_back(Elt: {DominatorTree::Insert, PredOfDead, Live});
8158	Updates.push_back(Elt: {DominatorTree::Delete, PredOfDead, Dead});
8159	}
8160	}
8161	LLVM_DEBUG(dbgs() << "Replacing duplicate pred BB ";
8162	Dead->printAsOperand(dbgs()); dbgs() << " with pred ";
8163	Live->printAsOperand(dbgs()); dbgs() << " for ";
8164	Live->getSingleSuccessor()->printAsOperand(dbgs());
8165	dbgs() << "\n");
8166	// Replace successors in all predecessors of DeadPred.
8167	for (BasicBlock *PredOfDead : DeadPreds) {
8168	Instruction *T = PredOfDead->getTerminator();
8169	T->replaceSuccessorWith(OldBB: Dead, NewBB: Live);
8170	}
8171	};
8172
8173	// Try to eliminate duplicate predecessors.
8174	for (const auto &EBW : BBs2Merge) {
8175	// EBW is a candidate for simplification. If we find a duplicate BB,
8176	// replace it.
8177	const auto &[It, Inserted] = Keep.insert(V: &EBW);
8178	if (Inserted)
8179	continue;
8180
8181	// Found duplicate: merge P into canonical predecessor It->Pred.
8182	BasicBlock KeepBB = (It)->BB;
8183	BasicBlock *DeadBB = EBW.BB;
8184
8185	// Avoid merging a BB with itself.
8186	if (KeepBB == DeadBB)
8187	continue;
8188
8189	// Redirect all edges into DeadPred to KeepPred.
8190	RedirectIncomingEdges (DeadBB, KeepBB);
8191
8192	// Now DeadBB should become unreachable; leave DCE to later,
8193	// but we can try to simplify it if it only branches to Succ.
8194	// (We won't erase here to keep the routine simple and DT-safe.)
8195	assert(pred_empty(DeadBB) && "DeadBB should be unreachable.");
8196	MadeChange = true;
8197	}
8198
8199	if (DTU && !Updates.empty())
8200	DTU->applyUpdates(Updates);
8201
8202	return MadeChange;
8203	}
8204
8205	bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
8206	DomTreeUpdater *DTU) {
8207	// Collect candidate switch-arms top-down.
8208	SmallSetVector<BasicBlock *, `16`> FilteredArms(
8209	llvm::from_range,
8210	make_filter_range(Range: successors(I: SI), Pred: EqualBBWrapper::canBeMerged));
8211	return mergeIdenticalBBs(Candidates: FilteredArms.getArrayRef(), DTU);
8212	}
8213
8214	bool SimplifyCFGOpt::simplifyDuplicatePredecessors(BasicBlock *BB,
8215	DomTreeUpdater *DTU) {
8216	// Need at least 2 predecessors to do anything.
8217	if (!BB \|\| !BB->hasNPredecessorsOrMore(N: `2`))
8218	return false;
8219
8220	// Compilation time consideration: retain the canonical loop, otherwise, we
8221	// require more time in the later loop canonicalization.
8222	if (Options.NeedCanonicalLoop && is_contained(Range&: LoopHeaders, Element: BB))
8223	return false;
8224
8225	// Collect candidate predecessors bottom-up.
8226	SmallSetVector<BasicBlock *, `8`> FilteredPreds(
8227	llvm::from_range,
8228	make_filter_range(Range: predecessors(BB), Pred: EqualBBWrapper::canBeMerged));
8229	return mergeIdenticalBBs(Candidates: FilteredPreds.getArrayRef(), DTU);
8230	}
8231
8232	bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
8233	BasicBlock *BB = SI->getParent();
8234
8235	if (isValueEqualityComparison(TI: SI)) {
8236	// If we only have one predecessor, and if it is a branch on this value,
8237	// see if that predecessor totally determines the outcome of this switch.
8238	if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8239	if (simplifyEqualityComparisonWithOnlyPredecessor(TI: SI, Pred: OnlyPred, Builder))
8240	return requestResimplify();
8241
8242	Value *Cond = SI->getCondition();
8243	if (SelectInst *Select = dyn_cast<SelectInst>(Val: Cond))
8244	if (simplifySwitchOnSelect(SI, Select))
8245	return requestResimplify();
8246
8247	// If the block only contains the switch, see if we can fold the block
8248	// away into any preds.
8249	if (SI == &BB->instructionsWithoutDebug(SkipPseudoOp: false*).begin())
8250	if (foldValueComparisonIntoPredecessors(TI: SI, Builder))
8251	return requestResimplify();
8252	}
8253
8254	// Try to transform the switch into an icmp and a branch.
8255	// The conversion from switch to comparison may lose information on
8256	// impossible switch values, so disable it early in the pipeline.
8257	if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
8258	return requestResimplify();
8259
8260	// Remove unreachable cases.
8261	if (eliminateDeadSwitchCases(SI, DTU, AC: Options.AC, DL))
8262	return requestResimplify();
8263
8264	if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
8265	return requestResimplify();
8266
8267	if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
8268	return requestResimplify();
8269
8270	if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
8271	return requestResimplify();
8272
8273	// The conversion of switches to arithmetic or lookup table is disabled in
8274	// the early optimization pipeline, as it may lose information or make the
8275	// resulting code harder to analyze.
8276	if (Options.ConvertSwitchToArithmetic \|\| Options.ConvertSwitchToLookupTable)
8277	if (simplifySwitchLookup(SI, Builder, DTU, DL, TTI,
8278	ConvertSwitchToLookupTable: Options.ConvertSwitchToLookupTable))
8279	return requestResimplify();
8280
8281	if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI))
8282	return requestResimplify();
8283
8284	if (reduceSwitchRange(SI, Builder, DL, TTI))
8285	return requestResimplify();
8286
8287	if (HoistCommon &&
8288	hoistCommonCodeFromSuccessors(TI: SI, AllInstsEqOnly: !Options.HoistCommonInsts))
8289	return requestResimplify();
8290
8291	// We can merge identical switch arms early to enhance more aggressive
8292	// optimization on switch.
8293	if (simplifyDuplicateSwitchArms(SI, DTU))
8294	return requestResimplify();
8295
8296	if (simplifySwitchWhenUMin(SI, DTU))
8297	return requestResimplify();
8298
8299	return false;
8300	}
8301
8302	bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
8303	BasicBlock *BB = IBI->getParent();
8304	bool Changed = false;
8305	SmallVector<uint32_t> BranchWeights;
8306	const bool HasBranchWeights = !ProfcheckDisableMetadataFixes &&
8307	extractBranchWeights(I: *IBI, Weights&: BranchWeights);
8308
8309	DenseMap<const BasicBlock *, uint64_t> TargetWeight;
8310	if (HasBranchWeights)
8311	for (size_t I = `0`, E = IBI->getNumDestinations(); I < E; ++I)
8312	TargetWeight [IBI->getDestination(i: I)] += BranchWeights [I];
8313
8314	// Eliminate redundant destinations.
8315	SmallPtrSet<Value *, `8`> Succs;
8316	SmallSetVector<BasicBlock *, `8`> RemovedSuccs;
8317	for (unsigned I = `0`, E = IBI->getNumDestinations(); I != E; ++I) {
8318	BasicBlock *Dest = IBI->getDestination(i: I);
8319	if (!Dest->hasAddressTaken() \|\| !Succs.insert(Ptr: Dest).second) {
8320	if (!Dest->hasAddressTaken())
8321	RemovedSuccs.insert(X: Dest);
8322	Dest->removePredecessor(Pred: BB);
8323	IBI->removeDestination(i: I);
8324	--I;
8325	--E;
8326	Changed = true;
8327	}
8328	}
8329
8330	if (DTU) {
8331	std::vector<DominatorTree::UpdateType> Updates;
8332	Updates.reserve(n: RemovedSuccs.size());
8333	for (auto *RemovedSucc : RemovedSuccs)
8334	Updates.push_back(x: {DominatorTree::Delete, BB, RemovedSucc});
8335	DTU->applyUpdates(Updates);
8336	}
8337
8338	if (IBI->getNumDestinations() == `0`) {
8339	// If the indirectbr has no successors, change it to unreachable.
8340	new UnreachableInst (IBI->getContext(), IBI->getIterator());
8341	eraseTerminatorAndDCECond(TI: IBI);
8342	return true;
8343	}
8344
8345	if (IBI->getNumDestinations() == `1`) {
8346	// If the indirectbr has one successor, change it to a direct branch.
8347	UncondBrInst::Create(IfTrue: IBI->getDestination(i: `0`), InsertBefore: IBI->getIterator());
8348	eraseTerminatorAndDCECond(TI: IBI);
8349	return true;
8350	}
8351	if (HasBranchWeights) {
8352	SmallVector<uint64_t> NewBranchWeights(IBI->getNumDestinations());
8353	for (size_t I = `0`, E = IBI->getNumDestinations(); I < E; ++I)
8354	NewBranchWeights [I] += TargetWeight.find(Val: IBI->getDestination(i: I))->second;
8355	setFittedBranchWeights(I&: IBI, Weights: NewBranchWeights, /IsExpected=/*false);
8356	}
8357	if (SelectInst *SI = dyn_cast<SelectInst>(Val: IBI->getAddress())) {
8358	if (simplifyIndirectBrOnSelect(IBI, SI))
8359	return requestResimplify();
8360	}
8361	return Changed;
8362	}
8363
8364	/// Given an block with only a single landing pad and a unconditional branch
8365	/// try to find another basic block which this one can be merged with. This
8366	/// handles cases where we have multiple invokes with unique landing pads, but
8367	/// a shared handler.
8368	///
8369	/// We specifically choose to not worry about merging non-empty blocks
8370	/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
8371	/// practice, the optimizer produces empty landing pad blocks quite frequently
8372	/// when dealing with exception dense code. (see: instcombine, gvn, if-else
8373	/// sinking in this file)
8374	///
8375	/// This is primarily a code size optimization. We need to avoid performing
8376	/// any transform which might inhibit optimization (such as our ability to
8377	/// specialize a particular handler via tail commoning). We do this by not
8378	/// merging any blocks which require us to introduce a phi. Since the same
8379	/// values are flowing through both blocks, we don't lose any ability to
8380	/// specialize. If anything, we make such specialization more likely.
8381	///
8382	/// TODO - This transformation could remove entries from a phi in the target
8383	/// block when the inputs in the phi are the same for the two blocks being
8384	/// merged. In some cases, this could result in removal of the PHI entirely.
8385	static bool tryToMergeLandingPad(LandingPadInst LPad, UncondBrInst BI,
8386	BasicBlock BB, DomTreeUpdater DTU) {
8387	auto Succ = BB->getUniqueSuccessor();
8388	assert(Succ);
8389	// If there's a phi in the successor block, we'd likely have to introduce
8390	// a phi into the merged landing pad block.
8391	if (isa<PHINode>(Val: *Succ->begin()))
8392	return false;
8393
8394	for (BasicBlock *OtherPred : predecessors(BB: Succ)) {
8395	if (BB == OtherPred)
8396	continue;
8397	BasicBlock::iterator I = OtherPred->begin();
8398	LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(Val&: I);
8399	if (!LPad2 \|\| !LPad2->isIdenticalTo(I: LPad))
8400	continue;
8401	++I;
8402	UncondBrInst *BI2 = dyn_cast<UncondBrInst>(Val&: I);
8403	if (!BI2 \|\| !BI2->isIdenticalTo(I: BI))
8404	continue;
8405
8406	std::vector<DominatorTree::UpdateType> Updates;
8407
8408	// We've found an identical block. Update our predecessors to take that
8409	// path instead and make ourselves dead.
8410	SmallSetVector<BasicBlock *, `16`> UniquePreds(pred_begin(BB), pred_end(BB));
8411	for (BasicBlock *Pred : UniquePreds) {
8412	InvokeInst *II = cast<InvokeInst>(Val: Pred->getTerminator());
8413	assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
8414	"unexpected successor");
8415	II->setUnwindDest(OtherPred);
8416	if (DTU) {
8417	Updates.push_back(x: {DominatorTree::Insert, Pred, OtherPred});
8418	Updates.push_back(x: {DominatorTree::Delete, Pred, BB});
8419	}
8420	}
8421
8422	SmallSetVector<BasicBlock *, `16`> UniqueSuccs(succ_begin(BB), succ_end(BB));
8423	for (BasicBlock *Succ : UniqueSuccs) {
8424	Succ->removePredecessor(Pred: BB);
8425	if (DTU)
8426	Updates.push_back(x: {DominatorTree::Delete, BB, Succ});
8427	}
8428
8429	IRBuilder<> Builder(BI);
8430	Builder.CreateUnreachable();
8431	BI->eraseFromParent();
8432	if (DTU)
8433	DTU->applyUpdates(Updates);
8434	return true;
8435	}
8436	return false;
8437	}
8438
8439	bool SimplifyCFGOpt::simplifyUncondBranch(UncondBrInst *BI,
8440	IRBuilder<> &Builder) {
8441	BasicBlock *BB = BI->getParent();
8442	BasicBlock *Succ = BI->getSuccessor(i: `0`);
8443
8444	// If the Terminator is the only non-phi instruction, simplify the block.
8445	// If LoopHeader is provided, check if the block or its successor is a loop
8446	// header. (This is for early invocations before loop simplify and
8447	// vectorization to keep canonical loop forms for nested loops. These blocks
8448	// can be eliminated when the pass is invoked later in the back-end.)
8449	// Note that if BB has only one predecessor then we do not introduce new
8450	// backedge, so we can eliminate BB.
8451	bool NeedCanonicalLoop =
8452	Options.NeedCanonicalLoop &&
8453	(!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(N: `2`) &&
8454	(is_contained(Range&: LoopHeaders, Element: BB) \|\| is_contained(Range&: LoopHeaders, Element: Succ)));
8455	BasicBlock::iterator I = BB->getFirstNonPHIOrDbg();
8456	if (I ->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
8457	!NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
8458	return true;
8459
8460	// If the only instruction in the block is a seteq/setne comparison against a
8461	// constant, try to simplify the block.
8462	if (ICmpInst *ICI = dyn_cast<ICmpInst>(Val&: I)) {
8463	if (ICI->isEquality() && isa<ConstantInt>(Val: ICI->getOperand(i_nocapture: `1`))) {
8464	++I;
8465	if (I ->isTerminator() &&
8466	tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
8467	return true;
8468	if (isa<SelectInst>(Val: I) && I ->getNextNode()->isTerminator() &&
8469	tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, Select: cast<SelectInst>(Val&: I),
8470	Builder))
8471	return true;
8472	}
8473	}
8474
8475	// See if we can merge an empty landing pad block with another which is
8476	// equivalent.
8477	if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(Val&: I)) {
8478	++I;
8479	if (I ->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
8480	return true;
8481	}
8482
8483	return false;
8484	}
8485
8486	static BasicBlock allPredecessorsComeFromSameSource(BasicBlock BB) {
8487	BasicBlock PredPred = nullptr*;
8488	for (auto *P : predecessors(BB)) {
8489	BasicBlock *PPred = P->getSinglePredecessor();
8490	if (!PPred \|\| (PredPred && PredPred != PPred))
8491	return nullptr;
8492	PredPred = PPred;
8493	}
8494	return PredPred;
8495	}
8496
8497	/// Fold the following pattern:
8498	/// bb0:
8499	/// br i1 %cond1, label %bb1, label %bb2
8500	/// bb1:
8501	/// br i1 %cond2, label %bb3, label %bb4
8502	/// bb2:
8503	/// br i1 %cond2, label %bb4, label %bb3
8504	/// bb3:
8505	/// ...
8506	/// bb4:
8507	/// ...
8508	/// into
8509	/// bb0:
8510	/// %cond = xor i1 %cond1, %cond2
8511	/// br i1 %cond, label %bb4, label %bb3
8512	/// bb3:
8513	/// ...
8514	/// bb4:
8515	/// ...
8516	/// NOTE: %cond2 always dominates the terminator of bb0.
8517	static bool mergeNestedCondBranch(CondBrInst BI, DomTreeUpdater DTU) {
8518	BasicBlock *BB = BI->getParent();
8519	BasicBlock *BB1 = BI->getSuccessor(i: `0`);
8520	BasicBlock *BB2 = BI->getSuccessor(i: `1`);
8521	auto IsSimpleSuccessor = [BB](BasicBlock Succ, CondBrInst &SuccBI) {
8522	if (Succ == BB)
8523	return false;
8524	if (&Succ->front() != Succ->getTerminator())
8525	return false;
8526	SuccBI = dyn_cast<CondBrInst>(Val: Succ->getTerminator());
8527	if (!SuccBI)
8528	return false;
8529	BasicBlock *Succ1 = SuccBI->getSuccessor(i: `0`);
8530	BasicBlock *Succ2 = SuccBI->getSuccessor(i: `1`);
8531	return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
8532	!isa<PHINode>(Val: Succ1->front()) && !isa<PHINode>(Val: Succ2->front());
8533	};
8534	CondBrInst BB1BI, BB2BI;
8535	if (!IsSimpleSuccessor (BB1, BB1BI) \|\| !IsSimpleSuccessor (BB2, BB2BI))
8536	return false;
8537
8538	if (BB1BI->getCondition() != BB2BI->getCondition() \|\|
8539	BB1BI->getSuccessor(i: `0`) != BB2BI->getSuccessor(i: `1`) \|\|
8540	BB1BI->getSuccessor(i: `1`) != BB2BI->getSuccessor(i: `0`))
8541	return false;
8542
8543	BasicBlock *BB3 = BB1BI->getSuccessor(i: `0`);
8544	BasicBlock *BB4 = BB1BI->getSuccessor(i: `1`);
8545	IRBuilder<> Builder(BI);
8546	BI->setCondition(
8547	Builder.CreateXor(LHS: BI->getCondition(), RHS: BB1BI->getCondition()));
8548	BB1->removePredecessor(Pred: BB);
8549	BI->setSuccessor(idx: `0`, NewSucc: BB4);
8550	BB2->removePredecessor(Pred: BB);
8551	BI->setSuccessor(idx: `1`, NewSucc: BB3);
8552	if (DTU) {
8553	SmallVector<DominatorTree::UpdateType, `4`> Updates;
8554	Updates.push_back(Elt: {DominatorTree::Delete, BB, BB1});
8555	Updates.push_back(Elt: {DominatorTree::Insert, BB, BB4});
8556	Updates.push_back(Elt: {DominatorTree::Delete, BB, BB2});
8557	Updates.push_back(Elt: {DominatorTree::Insert, BB, BB3});
8558
8559	DTU->applyUpdates(Updates);
8560	}
8561	bool HasWeight = false;
8562	uint64_t BBTWeight, BBFWeight;
8563	if (extractBranchWeights(I: *BI, TrueVal&: BBTWeight, FalseVal&: BBFWeight))
8564	HasWeight = true;
8565	else
8566	BBTWeight = BBFWeight = `1`;
8567	uint64_t BB1TWeight, BB1FWeight;
8568	if (extractBranchWeights(I: *BB1BI, TrueVal&: BB1TWeight, FalseVal&: BB1FWeight))
8569	HasWeight = true;
8570	else
8571	BB1TWeight = BB1FWeight = `1`;
8572	uint64_t BB2TWeight, BB2FWeight;
8573	if (extractBranchWeights(I: *BB2BI, TrueVal&: BB2TWeight, FalseVal&: BB2FWeight))
8574	HasWeight = true;
8575	else
8576	BB2TWeight = BB2FWeight = `1`;
8577	if (HasWeight) {
8578	uint64_t Weights[`2`] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8579	BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8580	setFittedBranchWeights(I&: BI, Weights, /IsExpected=/*false,
8581	/ElideAllZero=/true);
8582	}
8583	return true;
8584	}
8585
8586	bool SimplifyCFGOpt::simplifyCondBranch(CondBrInst *BI, IRBuilder<> &Builder) {
8587	assert(
8588	!isa<ConstantInt>(BI->getCondition()) &&
8589	BI->getSuccessor(`0`) != BI->getSuccessor(`1`) &&
8590	"Tautological conditional branch should have been eliminated already.");
8591
8592	BasicBlock *BB = BI->getParent();
8593	if (!Options.SimplifyCondBranch \|\|
8594	BI->getFunction()->hasFnAttribute(Kind: Attribute::OptForFuzzing))
8595	return false;
8596
8597	// Conditional branch
8598	if (isValueEqualityComparison(TI: BI)) {
8599	// If we only have one predecessor, and if it is a branch on this value,
8600	// see if that predecessor totally determines the outcome of this
8601	// switch.
8602	if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8603	if (simplifyEqualityComparisonWithOnlyPredecessor(TI: BI, Pred: OnlyPred, Builder))
8604	return requestResimplify();
8605
8606	// This block must be empty, except for the setcond inst, if it exists.
8607	// Ignore dbg and pseudo intrinsics.
8608	auto I = BB->instructionsWithoutDebug(SkipPseudoOp: true).begin();
8609	if (&*I == BI) {
8610	if (foldValueComparisonIntoPredecessors(TI: BI, Builder))
8611	return requestResimplify();
8612	} else if (&*I == cast<Instruction>(Val: BI->getCondition())) {
8613	++I;
8614	if (&*I == BI && foldValueComparisonIntoPredecessors(TI: BI, Builder))
8615	return requestResimplify();
8616	}
8617	}
8618
8619	// Try to turn "br (X == 0 \| X == 1), T, F" into a switch instruction.
8620	if (simplifyBranchOnICmpChain(BI, Builder, DL))
8621	return true;
8622
8623	// If this basic block has dominating predecessor blocks and the dominating
8624	// blocks' conditions imply BI's condition, we know the direction of BI.
8625	std::optional<bool> Imp = isImpliedByDomCondition(Cond: BI->getCondition(), ContextI: BI, DL);
8626	if (Imp) {
8627	// Turn this into a branch on constant.
8628	auto *OldCond = BI->getCondition();
8629	ConstantInt TorF = Imp ? ConstantInt::getTrue(Context&: BB->getContext())
8630	: ConstantInt::getFalse(Context&: BB->getContext());
8631	BI->setCondition(TorF);
8632	RecursivelyDeleteTriviallyDeadInstructions(V: OldCond);
8633	return requestResimplify();
8634	}
8635
8636	// If this basic block is ONLY a compare and a branch, and if a predecessor
8637	// branches to us and one of our successors, fold the comparison into the
8638	// predecessor and use logical operations to pick the right destination.
8639	if (Options.SpeculateBlocks &&
8640	foldBranchToCommonDest(BI, DTU, /MSSAU=/nullptr, TTI: &TTI,
8641	BonusInstThreshold: Options.BonusInstThreshold))
8642	return requestResimplify();
8643
8644	// We have a conditional branch to two blocks that are only reachable
8645	// from BI. We know that the condbr dominates the two blocks, so see if
8646	// there is any identical code in the "then" and "else" blocks. If so, we
8647	// can hoist it up to the branching block.
8648	if (BI->getSuccessor(i: `0`)->getSinglePredecessor()) {
8649	if (BI->getSuccessor(i: `1`)->getSinglePredecessor()) {
8650	if (HoistCommon &&
8651	hoistCommonCodeFromSuccessors(TI: BI, AllInstsEqOnly: !Options.HoistCommonInsts))
8652	return requestResimplify();
8653
8654	if (BI && Options.HoistLoadsStoresWithCondFaulting &&
8655	isProfitableToSpeculate(BI, Invert: std::nullopt, TTI)) {
8656	SmallVector<Instruction *, `2`> SpeculatedConditionalLoadsStores;
8657	auto CanSpeculateConditionalLoadsStores = [&]() {
8658	for (auto *Succ : successors(BB)) {
8659	for (Instruction &I : *Succ) {
8660	if (I.isTerminator()) {
8661	if (I.getNumSuccessors() > `1`)
8662	return false;
8663	continue;
8664	} else if (!isSafeCheapLoadStore(I: &I, TTI) \|\|
8665	SpeculatedConditionalLoadsStores.size() ==
8666	HoistLoadsStoresWithCondFaultingThreshold) {
8667	return false;
8668	}
8669	SpeculatedConditionalLoadsStores.push_back(Elt: &I);
8670	}
8671	}
8672	return !SpeculatedConditionalLoadsStores.empty();
8673	};
8674
8675	if (CanSpeculateConditionalLoadsStores ()) {
8676	hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8677	Invert: std::nullopt, Sel: nullptr);
8678	return requestResimplify();
8679	}
8680	}
8681	} else {
8682	// If Successor #1 has multiple preds, we may be able to conditionally
8683	// execute Successor #0 if it branches to Successor #1.
8684	Instruction *Succ0TI = BI->getSuccessor(i: `0`)->getTerminator();
8685	if (Succ0TI->getNumSuccessors() == `1` &&
8686	Succ0TI->getSuccessor(Idx: `0`) == BI->getSuccessor(i: `1`))
8687	if (speculativelyExecuteBB(BI, ThenBB: BI->getSuccessor(i: `0`)))
8688	return requestResimplify();
8689	}
8690	} else if (BI->getSuccessor(i: `1`)->getSinglePredecessor()) {
8691	// If Successor #0 has multiple preds, we may be able to conditionally
8692	// execute Successor #1 if it branches to Successor #0.
8693	Instruction *Succ1TI = BI->getSuccessor(i: `1`)->getTerminator();
8694	if (Succ1TI->getNumSuccessors() == `1` &&
8695	Succ1TI->getSuccessor(Idx: `0`) == BI->getSuccessor(i: `0`))
8696	if (speculativelyExecuteBB(BI, ThenBB: BI->getSuccessor(i: `1`)))
8697	return requestResimplify();
8698	}
8699
8700	// If this is a branch on something for which we know the constant value in
8701	// predecessors (e.g. a phi node in the current block), thread control
8702	// through this block.
8703	if (foldCondBranchOnValueKnownInPredecessor(BI))
8704	return requestResimplify();
8705
8706	// Scan predecessor blocks for conditional branches.
8707	for (BasicBlock *Pred : predecessors(BB))
8708	if (CondBrInst *PBI = dyn_cast<CondBrInst>(Val: Pred->getTerminator()))
8709	if (PBI != BI)
8710	if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8711	return requestResimplify();
8712
8713	// Look for diamond patterns.
8714	if (MergeCondStores)
8715	if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
8716	if (CondBrInst *PBI = dyn_cast<CondBrInst>(Val: PrevBB->getTerminator()))
8717	if (PBI != BI)
8718	if (mergeConditionalStores(PBI, QBI: BI, DTU, DL, TTI))
8719	return requestResimplify();
8720
8721	// Look for nested conditional branches.
8722	if (mergeNestedCondBranch(BI, DTU))
8723	return requestResimplify();
8724
8725	return false;
8726	}
8727
8728	/// Check if passing a value to an instruction will cause undefined behavior.
8729	static bool passingValueIsAlwaysUndefined(Value V, Instruction I, bool PtrValueMayBeModified) {
8730	assert(V->getType() == I->getType() && "Mismatched types");
8731	Constant *C = dyn_cast<Constant>(Val: V);
8732	if (!C)
8733	return false;
8734
8735	if (I->use_empty())
8736	return false;
8737
8738	if (C->isNullValue() \|\| isa<UndefValue>(Val: C)) {
8739	// Only look at the first use we can handle, avoid hurting compile time with
8740	// long uselists
8741	auto FindUse = llvm::find_if(Range: I->uses(), P: [](auto &U) {
8742	auto *Use = cast<Instruction>(U.getUser());
8743	// Change this list when we want to add new instructions.
8744	switch (Use->getOpcode()) {
8745	default:
8746	return false;
8747	case Instruction::GetElementPtr:
8748	case Instruction::Ret:
8749	case Instruction::BitCast:
8750	case Instruction::Load:
8751	case Instruction::Store:
8752	case Instruction::Call:
8753	case Instruction::CallBr:
8754	case Instruction::Invoke:
8755	case Instruction::UDiv:
8756	case Instruction::URem:
8757	// Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8758	// implemented to avoid code complexity as it is unclear how useful such
8759	// logic is.
8760	case Instruction::SDiv:
8761	case Instruction::SRem:
8762	return true;
8763	}
8764	});
8765	if (FindUse == I->use_end())
8766	return false;
8767	auto &Use = *FindUse;
8768	auto *User = cast<Instruction>(Val: Use.getUser());
8769	// Bail out if User is not in the same BB as I or User == I or User comes
8770	// before I in the block. The latter two can be the case if User is a
8771	// PHI node.
8772	if (User->getParent() != I->getParent() \|\| User == I \|\|
8773	User->comesBefore(Other: I))
8774	return false;
8775
8776	// Now make sure that there are no instructions in between that can alter
8777	// control flow (eg. calls)
8778	auto InstrRange =
8779	make_range(x: std::next(x: I->getIterator()), y: User->getIterator());
8780	if (any_of(Range&: InstrRange, P: [](Instruction &I) {
8781	return !isGuaranteedToTransferExecutionToSuccessor(I: &I);
8782	}))
8783	return false;
8784
8785	// Look through GEPs. A load from a GEP derived from NULL is still undefined
8786	if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Val: User))
8787	if (GEP->getPointerOperand() == I) {
8788	// The type of GEP may differ from the type of base pointer.
8789	// Bail out on vector GEPs, as they are not handled by other checks.
8790	if (GEP->getType()->isVectorTy())
8791	return false;
8792	// The current base address is null, there are four cases to consider:
8793	// getelementptr (TY, null, 0) -> null
8794	// getelementptr (TY, null, not zero) -> may be modified
8795	// getelementptr inbounds (TY, null, 0) -> null
8796	// getelementptr inbounds (TY, null, not zero) -> poison iff null is
8797	// undefined?
8798	if (!GEP->hasAllZeroIndices() &&
8799	(!GEP->isInBounds() \|\|
8800	NullPointerIsDefined(F: GEP->getFunction(),
8801	AS: GEP->getPointerAddressSpace())))
8802	PtrValueMayBeModified = true;
8803	return passingValueIsAlwaysUndefined(V, I: GEP, PtrValueMayBeModified);
8804	}
8805
8806	// Look through return.
8807	if (ReturnInst *Ret = dyn_cast<ReturnInst>(Val: User)) {
8808	bool HasNoUndefAttr =
8809	Ret->getFunction()->hasRetAttribute(Kind: Attribute::NoUndef);
8810	// Return undefined to a noundef return value is undefined.
8811	if (isa<UndefValue>(Val: C) && HasNoUndefAttr)
8812	return true;
8813	// Return null to a nonnull+noundef return value is undefined.
8814	if (C->isNullValue() && HasNoUndefAttr &&
8815	Ret->getFunction()->hasRetAttribute(Kind: Attribute::NonNull)) {
8816	return !PtrValueMayBeModified;
8817	}
8818	}
8819
8820	// Load from null is undefined.
8821	if (LoadInst *LI = dyn_cast<LoadInst>(Val: User))
8822	if (!LI->isVolatile())
8823	return !NullPointerIsDefined(F: LI->getFunction(),
8824	AS: LI->getPointerAddressSpace());
8825
8826	// Store to null is undefined.
8827	if (StoreInst *SI = dyn_cast<StoreInst>(Val: User))
8828	if (!SI->isVolatile())
8829	return (!NullPointerIsDefined(F: SI->getFunction(),
8830	AS: SI->getPointerAddressSpace())) &&
8831	SI->getPointerOperand() == I;
8832
8833	// llvm.assume(false/undef) always triggers immediate UB.
8834	if (auto *Assume = dyn_cast<AssumeInst>(Val: User)) {
8835	// Ignore assume operand bundles.
8836	if (I == Assume->getArgOperand(i: `0`))
8837	return true;
8838	}
8839
8840	if (auto *CB = dyn_cast<CallBase>(Val: User)) {
8841	if (C->isNullValue() && NullPointerIsDefined(F: CB->getFunction()))
8842	return false;
8843	// A call to null is undefined.
8844	if (CB->getCalledOperand() == I)
8845	return true;
8846
8847	if (CB->isArgOperand(U: &Use)) {
8848	unsigned ArgIdx = CB->getArgOperandNo(U: &Use);
8849	// Passing null to a nonnnull+noundef argument is undefined.
8850	if (isa<ConstantPointerNull>(Val: C) &&
8851	CB->paramHasNonNullAttr(ArgNo: ArgIdx, /AllowUndefOrPoison=/false))
8852	return !PtrValueMayBeModified;
8853	// Passing undef to a noundef argument is undefined.
8854	if (isa<UndefValue>(Val: C) && CB->isPassingUndefUB(ArgNo: ArgIdx))
8855	return true;
8856	}
8857	}
8858	// Div/Rem by zero is immediate UB
8859	if (match(V: User, P: m_BinOp(L: m_Value(), R: m_Specific(V: I))) && User->isIntDivRem())
8860	return true;
8861	}
8862	return false;
8863	}
8864
8865	/// If BB has an incoming value that will always trigger undefined behavior
8866	/// (eg. null pointer dereference), remove the branch leading here.
8867	static bool removeUndefIntroducingPredecessor(BasicBlock *BB,
8868	DomTreeUpdater *DTU,
8869	AssumptionCache *AC) {
8870	for (PHINode &PHI : BB->phis())
8871	for (unsigned i = `0`, e = PHI.getNumIncomingValues(); i != e; ++i)
8872	if (passingValueIsAlwaysUndefined(V: PHI.getIncomingValue(i), I: &PHI)) {
8873	BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8874	Instruction *T = Predecessor->getTerminator();
8875	IRBuilder<> Builder(T);
8876	if (isa<UncondBrInst>(Val: T)) {
8877	BB->removePredecessor(Pred: Predecessor);
8878	// Turn unconditional branches into unreachables.
8879	Builder.CreateUnreachable();
8880	T->eraseFromParent();
8881	if (DTU)
8882	DTU->applyUpdates(Updates: {{DominatorTree::Delete, Predecessor, BB}});
8883	return true;
8884	} else if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: T)) {
8885	BB->removePredecessor(Pred: Predecessor);
8886	// Preserve guarding condition in assume, because it might not be
8887	// inferrable from any dominating condition.
8888	Value *Cond = BI->getCondition();
8889	CallInst *Assumption;
8890	if (BI->getSuccessor(i: `0`) == BB)
8891	Assumption = Builder.CreateAssumption(Cond: Builder.CreateNot(V: Cond));
8892	else
8893	Assumption = Builder.CreateAssumption(Cond);
8894	if (AC)
8895	AC->registerAssumption(CI: cast<AssumeInst>(Val: Assumption));
8896	Builder.CreateBr(Dest: BI->getSuccessor(i: `0`) == BB ? BI->getSuccessor(i: `1`)
8897	: BI->getSuccessor(i: `0`));
8898	BI->eraseFromParent();
8899	if (DTU)
8900	DTU->applyUpdates(Updates: {{DominatorTree::Delete, Predecessor, BB}});
8901	return true;
8902	} else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: T)) {
8903	// Redirect all branches leading to UB into
8904	// a newly created unreachable block.
8905	BasicBlock *Unreachable = BasicBlock::Create(
8906	Context&: Predecessor->getContext(), Name: "unreachable", Parent: BB->getParent(), InsertBefore: BB);
8907	Builder.SetInsertPoint(Unreachable);
8908	// The new block contains only one instruction: Unreachable
8909	Builder.CreateUnreachable();
8910	for (const auto &Case : SI->cases())
8911	if (Case.getCaseSuccessor() == BB) {
8912	BB->removePredecessor(Pred: Predecessor);
8913	Case.setSuccessor(Unreachable);
8914	}
8915	if (SI->getDefaultDest() == BB) {
8916	BB->removePredecessor(Pred: Predecessor);
8917	SI->setDefaultDest(Unreachable);
8918	}
8919
8920	if (DTU)
8921	DTU->applyUpdates(
8922	Updates: { { DominatorTree::Insert, Predecessor, Unreachable },
8923	{ DominatorTree::Delete, Predecessor, BB } });
8924	return true;
8925	}
8926	}
8927
8928	return false;
8929	}
8930
8931	bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8932	bool Changed = false;
8933
8934	assert(BB && BB->getParent() && "Block not embedded in function!");
8935	assert(BB->getTerminator() && "Degenerate basic block encountered!");
8936
8937	// Remove basic blocks that have no predecessors (except the entry block)...
8938	// or that just have themself as a predecessor. These are unreachable.
8939	if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) \|\|
8940	BB->getSinglePredecessor() == BB) {
8941	LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8942	DeleteDeadBlock(BB, DTU);
8943	return true;
8944	}
8945
8946	// Check to see if we can constant propagate this terminator instruction
8947	// away...
8948	Changed \|= ConstantFoldTerminator(BB, /DeleteDeadConditions=/true,
8949	/TLI=/nullptr, DTU);
8950
8951	// Check for and eliminate duplicate PHI nodes in this block.
8952	Changed \|= EliminateDuplicatePHINodes(BB);
8953
8954	// Check for and remove branches that will always cause undefined behavior.
8955	if (removeUndefIntroducingPredecessor(BB, DTU, AC: Options.AC))
8956	return requestResimplify();
8957
8958	// Merge basic blocks into their predecessor if there is only one distinct
8959	// pred, and if there is only one distinct successor of the predecessor, and
8960	// if there are no PHI nodes.
8961	if (MergeBlockIntoPredecessor(BB, DTU))
8962	return true;
8963
8964	if (SinkCommon && Options.SinkCommonInsts) {
8965	if (sinkCommonCodeFromPredecessors(BB, DTU) \|\|
8966	mergeCompatibleInvokes(BB, DTU)) {
8967	// sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8968	// so we may now how duplicate PHI's.
8969	// Let's rerun EliminateDuplicatePHINodes() first,
8970	// before foldTwoEntryPHINode() potentially converts them into select's,
8971	// after which we'd need a whole EarlyCSE pass run to cleanup them.
8972	return true;
8973	}
8974	// Merge identical predecessors of this block.
8975	if (simplifyDuplicatePredecessors(BB, DTU))
8976	return true;
8977	}
8978
8979	if (Options.SpeculateBlocks &&
8980	!BB->getParent()->hasFnAttribute(Kind: Attribute::OptForFuzzing)) {
8981	// If there is a trivial two-entry PHI node in this basic block, and we can
8982	// eliminate it, do so now.
8983	if (auto *PN = dyn_cast<PHINode>(Val: BB->begin()))
8984	if (PN->getNumIncomingValues() == `2`)
8985	if (foldTwoEntryPHINode(PN, TTI, DTU, AC: Options.AC, DL,
8986	SpeculateUnpredictables: Options.SpeculateUnpredictables))
8987	return true;
8988	}
8989
8990	IRBuilder<> Builder(BB);
8991	Instruction *Terminator = BB->getTerminator();
8992	Builder.SetInsertPoint(Terminator);
8993	switch (Terminator->getOpcode()) {
8994	case Instruction::UncondBr:
8995	Changed \|= simplifyUncondBranch(BI: cast<UncondBrInst>(Val: Terminator), Builder);
8996	break;
8997	case Instruction::CondBr:
8998	Changed \|= simplifyCondBranch(BI: cast<CondBrInst>(Val: Terminator), Builder);
8999	break;
9000	case Instruction::Resume:
9001	Changed \|= simplifyResume(RI: cast<ResumeInst>(Val: Terminator), Builder);
9002	break;
9003	case Instruction::CleanupRet:
9004	Changed \|= simplifyCleanupReturn(RI: cast<CleanupReturnInst>(Val: Terminator));
9005	break;
9006	case Instruction::Switch:
9007	Changed \|= simplifySwitch(SI: cast<SwitchInst>(Val: Terminator), Builder);
9008	break;
9009	case Instruction::Unreachable:
9010	Changed \|= simplifyUnreachable(UI: cast<UnreachableInst>(Val: Terminator));
9011	break;
9012	case Instruction::IndirectBr:
9013	Changed \|= simplifyIndirectBr(IBI: cast<IndirectBrInst>(Val: Terminator));
9014	break;
9015	}
9016
9017	return Changed;
9018	}
9019
9020	bool SimplifyCFGOpt::run(BasicBlock *BB) {
9021	bool Changed = false;
9022
9023	// Repeated simplify BB as long as resimplification is requested.
9024	do {
9025	Resimplify = false;
9026
9027	// Perform one round of simplifcation. Resimplify flag will be set if
9028	// another iteration is requested.
9029	Changed \|= simplifyOnce(BB);
9030	} while (Resimplify);
9031
9032	return Changed;
9033	}
9034
9035	bool llvm::simplifyCFG(BasicBlock BB, const* TargetTransformInfo &TTI,
9036	DomTreeUpdater DTU, const* SimplifyCFGOptions &Options,
9037	ArrayRef<WeakVH> LoopHeaders) {
9038	return SimplifyCFGOpt (TTI, DTU, BB->getDataLayout(), LoopHeaders,
9039	Options)
9040	.run(BB);
9041	}
9042

Browse the source code of llvm_projects/llvm/lib/Transforms/Utils/SimplifyCFG.cpp