JumpThreading.cpp source code [llvm_projects/llvm/lib/Transforms/Scalar/JumpThreading.cpp]

1	//===- JumpThreading.cpp - Thread control through conditional blocks ------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the Jump Threading pass.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/Transforms/Scalar/JumpThreading.h"
14	#include "llvm/ADT/DenseMap.h"
15	#include "llvm/ADT/MapVector.h"
16	#include "llvm/ADT/STLExtras.h"
17	#include "llvm/ADT/ScopeExit.h"
18	#include "llvm/ADT/SmallPtrSet.h"
19	#include "llvm/ADT/SmallVector.h"
20	#include "llvm/ADT/Statistic.h"
21	#include "llvm/Analysis/AliasAnalysis.h"
22	#include "llvm/Analysis/BlockFrequencyInfo.h"
23	#include "llvm/Analysis/BranchProbabilityInfo.h"
24	#include "llvm/Analysis/CFG.h"
25	#include "llvm/Analysis/ConstantFolding.h"
26	#include "llvm/Analysis/GlobalsModRef.h"
27	#include "llvm/Analysis/GuardUtils.h"
28	#include "llvm/Analysis/InstructionSimplify.h"
29	#include "llvm/Analysis/LazyValueInfo.h"
30	#include "llvm/Analysis/Loads.h"
31	#include "llvm/Analysis/LoopInfo.h"
32	#include "llvm/Analysis/MemoryLocation.h"
33	#include "llvm/Analysis/PostDominators.h"
34	#include "llvm/Analysis/TargetLibraryInfo.h"
35	#include "llvm/Analysis/TargetTransformInfo.h"
36	#include "llvm/Analysis/ValueTracking.h"
37	#include "llvm/IR/BasicBlock.h"
38	#include "llvm/IR/CFG.h"
39	#include "llvm/IR/Constant.h"
40	#include "llvm/IR/ConstantRange.h"
41	#include "llvm/IR/Constants.h"
42	#include "llvm/IR/DataLayout.h"
43	#include "llvm/IR/DebugInfo.h"
44	#include "llvm/IR/Dominators.h"
45	#include "llvm/IR/Function.h"
46	#include "llvm/IR/InstrTypes.h"
47	#include "llvm/IR/Instruction.h"
48	#include "llvm/IR/Instructions.h"
49	#include "llvm/IR/IntrinsicInst.h"
50	#include "llvm/IR/Intrinsics.h"
51	#include "llvm/IR/LLVMContext.h"
52	#include "llvm/IR/MDBuilder.h"
53	#include "llvm/IR/Metadata.h"
54	#include "llvm/IR/Module.h"
55	#include "llvm/IR/PassManager.h"
56	#include "llvm/IR/PatternMatch.h"
57	#include "llvm/IR/ProfDataUtils.h"
58	#include "llvm/IR/Type.h"
59	#include "llvm/IR/Use.h"
60	#include "llvm/IR/Value.h"
61	#include "llvm/Support/BlockFrequency.h"
62	#include "llvm/Support/BranchProbability.h"
63	#include "llvm/Support/Casting.h"
64	#include "llvm/Support/CommandLine.h"
65	#include "llvm/Support/Debug.h"
66	#include "llvm/Support/raw_ostream.h"
67	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
68	#include "llvm/Transforms/Utils/Cloning.h"
69	#include "llvm/Transforms/Utils/Local.h"
70	#include "llvm/Transforms/Utils/SSAUpdater.h"
71	#include "llvm/Transforms/Utils/ValueMapper.h"
72	#include <cassert>
73	#include <cstdint>
74	#include <iterator>
75	#include <memory>
76	#include <utility>
77
78	using namespace llvm;
79	using namespace jumpthreading;
80
81	#define DEBUG_TYPE "jump-threading"
82
83	STATISTIC(NumThreads, "Number of jumps threaded");
84	STATISTIC(NumFolds, "Number of terminators folded");
85	STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
86
87	static cl::opt<unsigned>
88	BBDuplicateThreshold("jump-threading-threshold",
89	cl::desc ("Max block size to duplicate for jump threading"),
90	cl::init(Val: `6`), cl::Hidden);
91
92	static cl::opt<unsigned>
93	ImplicationSearchThreshold(
94	"jump-threading-implication-search-threshold",
95	cl::desc ("The number of predecessors to search for a stronger "
96	"condition to use to thread over a weaker condition"),
97	cl::init(Val: `3`), cl::Hidden);
98
99	static cl::opt<unsigned> PhiDuplicateThreshold(
100	"jump-threading-phi-threshold",
101	cl::desc ("Max PHIs in BB to duplicate for jump threading"), cl::init(Val: `76`),
102	cl::Hidden);
103
104	static cl::opt<bool> ThreadAcrossLoopHeaders(
105	"jump-threading-across-loop-headers",
106	cl::desc ("Allow JumpThreading to thread across loop headers, for testing"),
107	cl::init(Val: false), cl::Hidden);
108
109	JumpThreadingPass::JumpThreadingPass(int T) {
110	DefaultBBDupThreshold = (T == -`1`) ? BBDuplicateThreshold : unsigned(T);
111	}
112
113	// Update branch probability information according to conditional
114	// branch probability. This is usually made possible for cloned branches
115	// in inline instances by the context specific profile in the caller.
116	// For instance,
117	//
118	// [Block PredBB]
119	// [Branch PredBr]
120	// if (t) {
121	// Block A;
122	// } else {
123	// Block B;
124	// }
125	//
126	// [Block BB]
127	// cond = PN([true, %A], [..., %B]); // PHI node
128	// [Branch CondBr]
129	// if (cond) {
130	// ... // P(cond == true) = 1%
131	// }
132	//
133	// Here we know that when block A is taken, cond must be true, which means
134	// P(cond == true \| A) = 1
135	//
136	// Given that P(cond == true) = P(cond == true \| A) P(A) +*
137	// P(cond == true \| B) P(B)*
138	// we get:
139	// P(cond == true ) = P(A) + P(cond == true \| B) P(B)*
140	//
141	// which gives us:
142	// P(A) is less than P(cond == true), i.e.
143	// P(t == true) <= P(cond == true)
144	//
145	// In other words, if we know P(cond == true) is unlikely, we know
146	// that P(t == true) is also unlikely.
147	//
148	static void updatePredecessorProfileMetadata(PHINode PN, BasicBlock BB) {
149	CondBrInst *CondBr = dyn_cast<CondBrInst>(Val: BB->getTerminator());
150	if (!CondBr)
151	return;
152
153	uint64_t TrueWeight, FalseWeight;
154	if (!extractBranchWeights(I: *CondBr, TrueVal&: TrueWeight, FalseVal&: FalseWeight))
155	return;
156
157	if (TrueWeight + FalseWeight == `0`)
158	// Zero branch_weights do not give a hint for getting branch probabilities.
159	// Technically it would result in division by zero denominator, which is
160	// TrueWeight + FalseWeight.
161	return;
162
163	// Returns the outgoing edge of the dominating predecessor block
164	// that leads to the PhiNode's incoming block:
165	auto GetPredOutEdge =
166	[](BasicBlock *IncomingBB,
167	BasicBlock PhiBB) -> std::pair<BasicBlock , BasicBlock *> {
168	auto *PredBB = IncomingBB;
169	auto *SuccBB = PhiBB;
170	SmallPtrSet<BasicBlock *, `16`> Visited;
171	while (true) {
172	if (isa<CondBrInst>(Val: PredBB->getTerminator()))
173	return {PredBB, SuccBB};
174	Visited.insert(Ptr: PredBB);
175	auto *SinglePredBB = PredBB->getSinglePredecessor();
176	if (!SinglePredBB)
177	return {nullptr, nullptr};
178
179	// Stop searching when SinglePredBB has been visited. It means we see
180	// an unreachable loop.
181	if (Visited.count(Ptr: SinglePredBB))
182	return {nullptr, nullptr};
183
184	SuccBB = PredBB;
185	PredBB = SinglePredBB;
186	}
187	};
188
189	for (unsigned i = `0`, e = PN->getNumIncomingValues(); i != e; ++i) {
190	Value *PhiOpnd = PN->getIncomingValue(i);
191	ConstantInt *CI = dyn_cast<ConstantInt>(Val: PhiOpnd);
192
193	if (!CI \|\| !CI->getType()->isIntegerTy(Bitwidth: `1`))
194	continue;
195
196	BranchProbability BP =
197	(CI->isOne() ? BranchProbability::getBranchProbability(
198	Numerator: TrueWeight, Denominator: TrueWeight + FalseWeight)
199	: BranchProbability::getBranchProbability(
200	Numerator: FalseWeight, Denominator: TrueWeight + FalseWeight));
201
202	auto PredOutEdge = GetPredOutEdge (PN->getIncomingBlock(i), BB);
203	if (!PredOutEdge.first)
204	return;
205
206	BasicBlock *PredBB = PredOutEdge.first;
207	CondBrInst *PredBr = dyn_cast<CondBrInst>(Val: PredBB->getTerminator());
208	if (!PredBr)
209	return;
210
211	uint64_t PredTrueWeight, PredFalseWeight;
212	// FIXME: We currently only set the profile data when it is missing.
213	// With PGO, this can be used to refine even existing profile data with
214	// context information. This needs to be done after more performance
215	// testing.
216	if (extractBranchWeights(I: *PredBr, TrueVal&: PredTrueWeight, FalseVal&: PredFalseWeight))
217	continue;
218
219	// We can not infer anything useful when BP >= 50%, because BP is the
220	// upper bound probability value.
221	if (BP >= BranchProbability (`50`, `100`))
222	continue;
223
224	uint32_t Weights[`2`];
225	if (PredBr->getSuccessor(i: `0`) == PredOutEdge.second) {
226	Weights[`0`] = BP.getNumerator();
227	Weights[`1`] = BP.getCompl().getNumerator();
228	} else {
229	Weights[`0`] = BP.getCompl().getNumerator();
230	Weights[`1`] = BP.getNumerator();
231	}
232	setBranchWeights(I&: PredBr, Weights, IsExpected: hasBranchWeightOrigin(I: PredBr));
233	}
234	}
235
236	PreservedAnalyses JumpThreadingPass::run(Function &F,
237	FunctionAnalysisManager &AM) {
238	auto &TTI = AM.getResult<TargetIRAnalysis>(IR&: F);
239	// Jump Threading has no sense for the targets with divergent CF
240	if (TTI.hasBranchDivergence(F: &F))
241	return PreservedAnalyses::all();
242	auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
243	auto &LVI = AM.getResult<LazyValueAnalysis>(IR&: F);
244	auto &AA = AM.getResult<AAManager>(IR&: F);
245	auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
246
247	bool Changed =
248	runImpl(F, FAM: &AM, TLI: &TLI, TTI: &TTI, LVI: &LVI, AA: &AA,
249	DTU: std::make_unique<DomTreeUpdater>(
250	args: &DT, args: nullptr, args: DomTreeUpdater::UpdateStrategy::Lazy),
251	BFI: nullptr, BPI: nullptr);
252
253	if (!Changed)
254	return PreservedAnalyses::all();
255
256
257	getDomTreeUpdater()->flush();
258
259	#if defined(EXPENSIVE_CHECKS)
260	assert(getDomTreeUpdater()->getDomTree().verify(
261	DominatorTree::VerificationLevel::Full) &&
262	"DT broken after JumpThreading");
263	assert((!getDomTreeUpdater()->hasPostDomTree() \|\|
264	getDomTreeUpdater()->getPostDomTree().verify(
265	PostDominatorTree::VerificationLevel::Full)) &&
266	"PDT broken after JumpThreading");
267	#else
268	assert(getDomTreeUpdater()->getDomTree().verify(
269	DominatorTree::VerificationLevel::Fast) &&
270	"DT broken after JumpThreading");
271	assert((!getDomTreeUpdater()->hasPostDomTree() \|\|
272	getDomTreeUpdater()->getPostDomTree().verify(
273	PostDominatorTree::VerificationLevel::Fast)) &&
274	"PDT broken after JumpThreading");
275	#endif
276
277	return getPreservedAnalysis();
278	}
279
280	bool JumpThreadingPass::runImpl(Function &F_, FunctionAnalysisManager *FAM_,
281	TargetLibraryInfo *TLI_,
282	TargetTransformInfo TTI_, LazyValueInfo LVI_,
283	AliasAnalysis *AA_,
284	std::unique_ptr<DomTreeUpdater> DTU_,
285	BlockFrequencyInfo *BFI_,
286	BranchProbabilityInfo *BPI_) {
287	LLVM_DEBUG(dbgs() << "Jump threading on function '" << F_.getName() << "'\n");
288	F = &F_;
289	FAM = FAM_;
290	TLI = TLI_;
291	TTI = TTI_;
292	LVI = LVI_;
293	AA = AA_;
294	DTU = std::move(DTU_);
295	BFI = BFI_;
296	BPI = BPI_;
297	auto *GuardDecl = Intrinsic::getDeclarationIfExists(
298	M: F->getParent(), id: Intrinsic::experimental_guard);
299	HasGuards = GuardDecl && !GuardDecl->use_empty();
300
301	// Reduce the number of instructions duplicated when optimizing strictly for
302	// size.
303	if (BBDuplicateThreshold.getNumOccurrences())
304	BBDupThreshold = BBDuplicateThreshold;
305	else if (F->hasMinSize())
306	BBDupThreshold = `3`;
307	else
308	BBDupThreshold = DefaultBBDupThreshold;
309
310	assert(DTU && "DTU isn't passed into JumpThreading before using it.");
311	assert(DTU->hasDomTree() && "JumpThreading relies on DomTree to proceed.");
312	DominatorTree &DT = DTU ->getDomTree();
313
314	Unreachable.clear();
315	for (auto &BB : *F)
316	if (!DT.isReachableFromEntry(A: &BB))
317	Unreachable.insert(Ptr: &BB);
318
319	if (!ThreadAcrossLoopHeaders)
320	findLoopHeaders(F&: *F);
321
322	bool EverChanged = false;
323	bool Changed;
324	do {
325	Changed = false;
326	for (auto &BB : *F) {
327	if (Unreachable.count(Ptr: &BB))
328	continue;
329	while (processBlock(BB: &BB)) // Thread all of the branches we can over BB.
330	Changed = ChangedSinceLastAnalysisUpdate = true;
331
332	// Stop processing BB if it's the entry or is now deleted. The following
333	// routines attempt to eliminate BB and locating a suitable replacement
334	// for the entry is non-trivial.
335	if (&BB == &F->getEntryBlock() \|\| DTU ->isBBPendingDeletion(DelBB: &BB))
336	continue;
337
338	if (pred_empty(BB: &BB)) {
339	// When processBlock makes BB unreachable it doesn't bother to fix up
340	// the instructions in it. We must remove BB to prevent invalid IR.
341	LLVM_DEBUG(dbgs() << " JT: Deleting dead block '" << BB.getName()
342	<< "' with terminator: " << *BB.getTerminator()
343	<< `'\n'`);
344	LoopHeaders.erase(Ptr: &BB);
345	LVI->eraseBlock(BB: &BB);
346	DeleteDeadBlock(BB: &BB, DTU: DTU.get());
347	Changed = ChangedSinceLastAnalysisUpdate = true;
348	continue;
349	}
350
351	// processBlock doesn't thread BBs with unconditional TIs. However, if BB
352	// is "almost empty", we attempt to merge BB with its sole successor.
353	if (auto *BI = dyn_cast<UncondBrInst>(Val: BB.getTerminator())) {
354	BasicBlock *Succ = BI->getSuccessor();
355	if (
356	// The terminator must be the only non-phi instruction in BB.
357	BB.getFirstNonPHIOrDbg(SkipPseudoOp: true)->isTerminator() &&
358	// Don't alter Loop headers and latches to ensure another pass can
359	// detect and transform nested loops later.
360	!LoopHeaders.count(Ptr: &BB) && !LoopHeaders.count(Ptr: Succ) &&
361	TryToSimplifyUncondBranchFromEmptyBlock(BB: &BB, DTU: DTU.get())) {
362	// BB is valid for cleanup here because we passed in DTU. F remains
363	// BB's parent until a DTU->getDomTree() event.
364	LVI->eraseBlock(BB: &BB);
365	Changed = ChangedSinceLastAnalysisUpdate = true;
366	}
367	}
368	}
369	EverChanged \|= Changed;
370	} while (Changed);
371
372	// Jump threading may have introduced redundant debug values into F which
373	// should be removed.
374	if (EverChanged)
375	for (auto &BB : *F) {
376	RemoveRedundantDbgInstrs(BB: &BB);
377	}
378
379	LoopHeaders.clear();
380	return EverChanged;
381	}
382
383	// Replace uses of Cond with ToVal when safe to do so. If all uses are
384	// replaced, we can remove Cond. We cannot blindly replace all uses of Cond
385	// because we may incorrectly replace uses when guards/assumes are uses of
386	// of `Cond` and we used the guards/assume to reason about the `Cond` value
387	// at the end of block. RAUW unconditionally replaces all uses
388	// including the guards/assumes themselves and the uses before the
389	// guard/assume.
390	static bool replaceFoldableUses(Instruction Cond, Value ToVal,
391	BasicBlock *KnownAtEndOfBB) {
392	bool Changed = false;
393	assert(Cond->getType() == ToVal->getType());
394	// We can unconditionally replace all uses in non-local blocks (i.e. uses
395	// strictly dominated by BB), since LVI information is true from the
396	// terminator of BB.
397	if (Cond->getParent() == KnownAtEndOfBB)
398	Changed \|= replaceNonLocalUsesWith(From: Cond, To: ToVal);
399	for (Instruction &I : reverse(C&: *KnownAtEndOfBB)) {
400	// Replace any debug-info record users of Cond with ToVal.
401	for (DbgVariableRecord &DVR : filterDbgVars(R: I.getDbgRecordRange()))
402	DVR.replaceVariableLocationOp(OldValue: Cond, NewValue: ToVal, AllowEmpty: true);
403
404	// Reached the Cond whose uses we are trying to replace, so there are no
405	// more uses.
406	if (&I == Cond)
407	break;
408	// We only replace uses in instructions that are guaranteed to reach the end
409	// of BB, where we know Cond is ToVal.
410	if (!isGuaranteedToTransferExecutionToSuccessor(I: &I))
411	break;
412	Changed \|= I.replaceUsesOfWith(From: Cond, To: ToVal);
413	}
414	if (Cond->use_empty() && !Cond->mayHaveSideEffects()) {
415	Cond->eraseFromParent();
416	Changed = true;
417	}
418	return Changed;
419	}
420
421	/// Return the cost of duplicating a piece of this block from first non-phi
422	/// and before StopAt instruction to thread across it. Stop scanning the block
423	/// when exceeding the threshold. If duplication is impossible, returns ~0U.
424	static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI,
425	BasicBlock *BB,
426	Instruction *StopAt,
427	unsigned Threshold) {
428	assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
429
430	// Do not duplicate the BB if it has a lot of PHI nodes.
431	// If a threadable chain is too long then the number of PHI nodes can add up,
432	// leading to a substantial increase in compile time when rewriting the SSA.
433	unsigned PhiCount = `0`;
434	Instruction FirstNonPHI = nullptr*;
435	for (Instruction &I : *BB) {
436	if (!isa<PHINode>(Val: &I)) {
437	FirstNonPHI = &I;
438	break;
439	}
440	if (++PhiCount > PhiDuplicateThreshold)
441	return ~`0U`;
442	}
443
444	/// Ignore PHI nodes, these will be flattened when duplication happens.
445	BasicBlock::const_iterator I(FirstNonPHI);
446
447	// FIXME: THREADING will delete values that are just used to compute the
448	// branch, so they shouldn't count against the duplication cost.
449
450	unsigned Bonus = `0`;
451	if (BB->getTerminator() == StopAt) {
452	// Threading through a switch statement is particularly profitable. If this
453	// block ends in a switch, decrease its cost to make it more likely to
454	// happen.
455	if (isa<SwitchInst>(Val: StopAt))
456	Bonus = `6`;
457
458	// The same holds for indirect branches, but slightly more so.
459	if (isa<IndirectBrInst>(Val: StopAt))
460	Bonus = `8`;
461	}
462
463	// Bump the threshold up so the early exit from the loop doesn't skip the
464	// terminator-based Size adjustment at the end.
465	Threshold += Bonus;
466
467	// Sum up the cost of each instruction until we get to the terminator. Don't
468	// include the terminator because the copy won't include it.
469	unsigned Size = `0`;
470	for (; &*I != StopAt; ++I) {
471
472	// Stop scanning the block if we've reached the threshold.
473	if (Size > Threshold)
474	return Size;
475
476	// Bail out if this instruction gives back a token type, it is not possible
477	// to duplicate it if it is used outside this BB.
478	if (I ->getType()->isTokenTy() && I ->isUsedOutsideOfBlock(BB))
479	return ~`0U`;
480
481	// Blocks with NoDuplicate are modelled as having infinite cost, so they
482	// are never duplicated.
483	if (const CallInst *CI = dyn_cast<CallInst>(Val&: I))
484	if (CI->cannotDuplicate() \|\| CI->isConvergent())
485	return ~`0U`;
486
487	if (TTI->getInstructionCost(U: &*I, CostKind: TargetTransformInfo::TCK_SizeAndLatency) ==
488	TargetTransformInfo::TCC_Free)
489	continue;
490
491	// All other instructions count for at least one unit.
492	++Size;
493
494	// Calls are more expensive. If they are non-intrinsic calls, we model them
495	// as having cost of 4. If they are a non-vector intrinsic, we model them
496	// as having cost of 2 total, and if they are a vector intrinsic, we model
497	// them as having cost 1.
498	if (const CallInst *CI = dyn_cast<CallInst>(Val&: I)) {
499	if (!isa<IntrinsicInst>(Val: CI))
500	Size += `3`;
501	else if (!CI->getType()->isVectorTy())
502	Size += `1`;
503	}
504	}
505
506	return Size > Bonus ? Size - Bonus : `0`;
507	}
508
509	/// findLoopHeaders - We do not want jump threading to turn proper loop
510	/// structures into irreducible loops. Doing this breaks up the loop nesting
511	/// hierarchy and pessimizes later transformations. To prevent this from
512	/// happening, we first have to find the loop headers. Here we approximate this
513	/// by finding targets of backedges in the CFG.
514	///
515	/// Note that there definitely are cases when we want to allow threading of
516	/// edges across a loop header. For example, threading a jump from outside the
517	/// loop (the preheader) to an exit block of the loop is definitely profitable.
518	/// It is also almost always profitable to thread backedges from within the loop
519	/// to exit blocks, and is often profitable to thread backedges to other blocks
520	/// within the loop (forming a nested loop). This simple analysis is not rich
521	/// enough to track all of these properties and keep it up-to-date as the CFG
522	/// mutates, so we don't allow any of these transformations.
523	void JumpThreadingPass::findLoopHeaders(Function &F) {
524	SmallVector<std::pair<const BasicBlock,const* BasicBlock*>, `32`> Edges;
525	FindFunctionBackedges(F, Result&: Edges);
526	LoopHeaders.insert_range(R: llvm::make_second_range(c&: Edges));
527	}
528
529	/// getKnownConstant - Helper method to determine if we can thread over a
530	/// terminator with the given value as its condition, and if so what value to
531	/// use for that. What kind of value this is depends on whether we want an
532	/// integer or a block address, but an undef is always accepted.
533	/// Returns null if Val is null or not an appropriate constant.
534	static Constant getKnownConstant(Value Val, ConstantPreference Preference) {
535	if (!Val)
536	return nullptr;
537
538	// Undef is "known" enough.
539	if (UndefValue *U = dyn_cast<UndefValue>(Val))
540	return U;
541
542	if (Preference == WantBlockAddress)
543	return dyn_cast<BlockAddress>(Val: Val->stripPointerCasts());
544
545	return dyn_cast<ConstantInt>(Val);
546	}
547
548	/// computeValueKnownInPredecessors - Given a basic block BB and a value V, see
549	/// if we can infer that the value is a known ConstantInt/BlockAddress or undef
550	/// in any of our predecessors. If so, return the known list of value and pred
551	/// BB in the result vector.
552	///
553	/// This returns true if there were any known values.
554	bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
555	Value V, BasicBlock BB, PredValueInfo &Result,
556	ConstantPreference Preference, SmallPtrSet<Value *, `4`> &RecursionSet,
557	Instruction *CxtI) {
558	const DataLayout &DL = BB->getDataLayout();
559
560	// This method walks up use-def chains recursively. Because of this, we could
561	// get into an infinite loop going around loops in the use-def chain. To
562	// prevent this, keep track of what (value, block) pairs we've already visited
563	// and terminate the search if we loop back to them
564	if (!RecursionSet.insert(Ptr: V).second)
565	return false;
566
567	// If V is a constant, then it is known in all predecessors.
568	if (Constant *KC = getKnownConstant(Val: V, Preference)) {
569	for (BasicBlock *Pred : predecessors(BB))
570	Result.emplace_back(Args&: KC, Args&: Pred);
571
572	return !Result.empty();
573	}
574
575	// If V is a non-instruction value, or an instruction in a different block,
576	// then it can't be derived from a PHI.
577	Instruction *I = dyn_cast<Instruction>(Val: V);
578	if (!I \|\| I->getParent() != BB) {
579
580	// Okay, if this is a live-in value, see if it has a known value at the any
581	// edge from our predecessors.
582	for (BasicBlock *P : predecessors(BB)) {
583	using namespace PatternMatch;
584	// If the value is known by LazyValueInfo to be a constant in a
585	// predecessor, use that information to try to thread this block.
586	Constant *PredCst = LVI->getConstantOnEdge(V, FromBB: P, ToBB: BB, CxtI);
587	// If I is a non-local compare-with-constant instruction, use more-rich
588	// 'getPredicateOnEdge' method. This would be able to handle value
589	// inequalities better, for example if the compare is "X < 4" and "X < 3"
590	// is known true but "X < 4" itself is not available.
591	CmpPredicate Pred;
592	Value *Val;
593	Constant *Cst;
594	if (!PredCst && match(V, P: m_Cmp(Pred, L: m_Value(V&: Val), R: m_Constant(C&: Cst))))
595	PredCst = LVI->getPredicateOnEdge(Pred, V: Val, C: Cst, FromBB: P, ToBB: BB, CxtI);
596	if (Constant *KC = getKnownConstant(Val: PredCst, Preference))
597	Result.emplace_back(Args&: KC, Args&: P);
598	}
599
600	return !Result.empty();
601	}
602
603	/// If I is a PHI node, then we know the incoming values for any constants.
604	if (PHINode *PN = dyn_cast<PHINode>(Val: I)) {
605	for (unsigned i = `0`, e = PN->getNumIncomingValues(); i != e; ++i) {
606	Value *InVal = PN->getIncomingValue(i);
607	if (Constant *KC = getKnownConstant(Val: InVal, Preference)) {
608	Result.emplace_back(Args&: KC, Args: PN->getIncomingBlock(i));
609	} else {
610	Constant *CI = LVI->getConstantOnEdge(V: InVal,
611	FromBB: PN->getIncomingBlock(i),
612	ToBB: BB, CxtI);
613	if (Constant *KC = getKnownConstant(Val: CI, Preference))
614	Result.emplace_back(Args&: KC, Args: PN->getIncomingBlock(i));
615	}
616	}
617
618	return !Result.empty();
619	}
620
621	// Handle Cast instructions.
622	if (CastInst *CI = dyn_cast<CastInst>(Val: I)) {
623	Value *Source = CI->getOperand(i_nocapture: `0`);
624	PredValueInfoTy Vals;
625	computeValueKnownInPredecessorsImpl(V: Source, BB, Result&: Vals, Preference,
626	RecursionSet, CxtI);
627	if (Vals.empty())
628	return false;
629
630	// Convert the known values.
631	for (auto &Val : Vals)
632	if (Constant *Folded = ConstantFoldCastOperand(Opcode: CI->getOpcode(), C: Val.first,
633	DestTy: CI->getType(), DL))
634	Result.emplace_back(Args&: Folded, Args&: Val.second);
635
636	return !Result.empty();
637	}
638
639	if (FreezeInst *FI = dyn_cast<FreezeInst>(Val: I)) {
640	Value *Source = FI->getOperand(i_nocapture: `0`);
641	computeValueKnownInPredecessorsImpl(V: Source, BB, Result, Preference,
642	RecursionSet, CxtI);
643
644	erase_if(C&: Result, P: [](auto &Pair) {
645	return !isGuaranteedNotToBeUndefOrPoison(Pair.first);
646	});
647
648	return !Result.empty();
649	}
650
651	// Handle some boolean conditions.
652	if (I->getType()->getPrimitiveSizeInBits() == `1`) {
653	using namespace PatternMatch;
654	if (Preference != WantInteger)
655	return false;
656	// X \| true -> true
657	// X & false -> false
658	Value Op0, Op1;
659	if (match(V: I, P: m_LogicalOr(L: m_Value(V&: Op0), R: m_Value(V&: Op1))) \|\|
660	match(V: I, P: m_LogicalAnd(L: m_Value(V&: Op0), R: m_Value(V&: Op1)))) {
661	PredValueInfoTy LHSVals, RHSVals;
662
663	computeValueKnownInPredecessorsImpl(V: Op0, BB, Result&: LHSVals, Preference: WantInteger,
664	RecursionSet, CxtI);
665	computeValueKnownInPredecessorsImpl(V: Op1, BB, Result&: RHSVals, Preference: WantInteger,
666	RecursionSet, CxtI);
667
668	if (LHSVals.empty() && RHSVals.empty())
669	return false;
670
671	ConstantInt *InterestingVal;
672	if (match(V: I, P: m_LogicalOr()))
673	InterestingVal = ConstantInt::getTrue(Context&: I->getContext());
674	else
675	InterestingVal = ConstantInt::getFalse(Context&: I->getContext());
676
677	SmallPtrSet<BasicBlock*, `4`> LHSKnownBBs;
678
679	// Scan for the sentinel. If we find an undef, force it to the
680	// interesting value: x\|undef -> true and x&undef -> false.
681	for (const auto &LHSVal : LHSVals)
682	if (LHSVal.first == InterestingVal \|\| isa<UndefValue>(Val: LHSVal.first)) {
683	Result.emplace_back(Args&: InterestingVal, Args: LHSVal.second);
684	LHSKnownBBs.insert(Ptr: LHSVal.second);
685	}
686	for (const auto &RHSVal : RHSVals)
687	if (RHSVal.first == InterestingVal \|\| isa<UndefValue>(Val: RHSVal.first)) {
688	// If we already inferred a value for this block on the LHS, don't
689	// re-add it.
690	if (!LHSKnownBBs.count(Ptr: RHSVal.second))
691	Result.emplace_back(Args&: InterestingVal, Args: RHSVal.second);
692	}
693
694	return !Result.empty();
695	}
696
697	// Handle the NOT form of XOR.
698	if (I->getOpcode() == Instruction::Xor &&
699	isa<ConstantInt>(Val: I->getOperand(i: `1`)) &&
700	cast<ConstantInt>(Val: I->getOperand(i: `1`))->isOne()) {
701	computeValueKnownInPredecessorsImpl(V: I->getOperand(i: `0`), BB, Result,
702	Preference: WantInteger, RecursionSet, CxtI);
703	if (Result.empty())
704	return false;
705
706	// Invert the known values.
707	for (auto &R : Result)
708	R.first = ConstantExpr::getNot(C: R.first);
709
710	return true;
711	}
712
713	// Try to simplify some other binary operator values.
714	} else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Val: I)) {
715	if (Preference != WantInteger)
716	return false;
717	if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: BO->getOperand(i_nocapture: `1`))) {
718	PredValueInfoTy LHSVals;
719	computeValueKnownInPredecessorsImpl(V: BO->getOperand(i_nocapture: `0`), BB, Result&: LHSVals,
720	Preference: WantInteger, RecursionSet, CxtI);
721
722	// Try to use constant folding to simplify the binary operator.
723	for (const auto &LHSVal : LHSVals) {
724	Constant *V = LHSVal.first;
725	Constant *Folded =
726	ConstantFoldBinaryOpOperands(Opcode: BO->getOpcode(), LHS: V, RHS: CI, DL);
727
728	if (Constant *KC = getKnownConstant(Val: Folded, Preference: WantInteger))
729	Result.emplace_back(Args&: KC, Args: LHSVal.second);
730	}
731	}
732
733	return !Result.empty();
734	}
735
736	// Handle compare with phi operand, where the PHI is defined in this block.
737	if (CmpInst *Cmp = dyn_cast<CmpInst>(Val: I)) {
738	if (Preference != WantInteger)
739	return false;
740	Type *CmpType = Cmp->getType();
741	Value *CmpLHS = Cmp->getOperand(i_nocapture: `0`);
742	Value *CmpRHS = Cmp->getOperand(i_nocapture: `1`);
743	CmpInst::Predicate Pred = Cmp->getPredicate();
744
745	PHINode *PN = dyn_cast<PHINode>(Val: CmpLHS);
746	if (!PN)
747	PN = dyn_cast<PHINode>(Val: CmpRHS);
748	// Do not perform phi translation across a loop header phi, because this
749	// may result in comparison of values from two different loop iterations.
750	// FIXME: This check is broken if LoopHeaders is not populated.
751	if (PN && PN->getParent() == BB && !LoopHeaders.contains(Ptr: BB)) {
752	const DataLayout &DL = PN->getDataLayout();
753	// We can do this simplification if any comparisons fold to true or false.
754	// See if any do.
755	for (unsigned i = `0`, e = PN->getNumIncomingValues(); i != e; ++i) {
756	BasicBlock *PredBB = PN->getIncomingBlock(i);
757	Value LHS, RHS;
758	if (PN == CmpLHS) {
759	LHS = PN->getIncomingValue(i);
760	RHS = CmpRHS->DoPHITranslation(CurBB: BB, PredBB);
761	} else {
762	LHS = CmpLHS->DoPHITranslation(CurBB: BB, PredBB);
763	RHS = PN->getIncomingValue(i);
764	}
765	Value *Res = simplifyCmpInst(Predicate: Pred, LHS, RHS, Q: {DL});
766	if (!Res) {
767	if (!isa<Constant>(Val: RHS))
768	continue;
769
770	// getPredicateOnEdge call will make no sense if LHS is defined in BB.
771	auto LHSInst = dyn_cast<Instruction>(Val: LHS);
772	if (LHSInst && LHSInst->getParent() == BB)
773	continue;
774
775	Res = LVI->getPredicateOnEdge(Pred, V: LHS, C: cast<Constant>(Val: RHS), FromBB: PredBB,
776	ToBB: BB, CxtI: CxtI ? CxtI : Cmp);
777	}
778
779	if (Constant *KC = getKnownConstant(Val: Res, Preference: WantInteger))
780	Result.emplace_back(Args&: KC, Args&: PredBB);
781	}
782
783	return !Result.empty();
784	}
785
786	// If comparing a live-in value against a constant, see if we know the
787	// live-in value on any predecessors.
788	if (isa<Constant>(Val: CmpRHS) && !CmpType->isVectorTy()) {
789	Constant *CmpConst = cast<Constant>(Val: CmpRHS);
790
791	if (!isa<Instruction>(Val: CmpLHS) \|\|
792	cast<Instruction>(Val: CmpLHS)->getParent() != BB) {
793	for (BasicBlock *P : predecessors(BB)) {
794	// If the value is known by LazyValueInfo to be a constant in a
795	// predecessor, use that information to try to thread this block.
796	Constant *Res = LVI->getPredicateOnEdge(Pred, V: CmpLHS, C: CmpConst, FromBB: P, ToBB: BB,
797	CxtI: CxtI ? CxtI : Cmp);
798	if (Constant *KC = getKnownConstant(Val: Res, Preference: WantInteger))
799	Result.emplace_back(Args&: KC, Args&: P);
800	}
801
802	return !Result.empty();
803	}
804
805	// InstCombine can fold some forms of constant range checks into
806	// (icmp (add (x, C1)), C2). See if we have we have such a thing with
807	// x as a live-in.
808	{
809	using namespace PatternMatch;
810
811	Value *AddLHS;
812	ConstantInt *AddConst;
813	if (isa<ConstantInt>(Val: CmpConst) &&
814	match(V: CmpLHS, P: m_Add(L: m_Value(V&: AddLHS), R: m_ConstantInt(CI&: AddConst)))) {
815	if (!isa<Instruction>(Val: AddLHS) \|\|
816	cast<Instruction>(Val: AddLHS)->getParent() != BB) {
817	for (BasicBlock *P : predecessors(BB)) {
818	// If the value is known by LazyValueInfo to be a ConstantRange in
819	// a predecessor, use that information to try to thread this
820	// block.
821	ConstantRange CR = LVI->getConstantRangeOnEdge(
822	V: AddLHS, FromBB: P, ToBB: BB, CxtI: CxtI ? CxtI : cast<Instruction>(Val: CmpLHS));
823	// Propagate the range through the addition.
824	CR = CR.add(Other: AddConst->getValue());
825
826	// Get the range where the compare returns true.
827	ConstantRange CmpRange = ConstantRange::makeExactICmpRegion(
828	Pred, Other: cast<ConstantInt>(Val: CmpConst)->getValue());
829
830	Constant *ResC;
831	if (CmpRange.contains(CR))
832	ResC = ConstantInt::getTrue(Ty: CmpType);
833	else if (CmpRange.inverse().contains(CR))
834	ResC = ConstantInt::getFalse(Ty: CmpType);
835	else
836	continue;
837
838	Result.emplace_back(Args&: ResC, Args&: P);
839	}
840
841	return !Result.empty();
842	}
843	}
844	}
845
846	// Try to find a constant value for the LHS of a comparison,
847	// and evaluate it statically if we can.
848	PredValueInfoTy LHSVals;
849	computeValueKnownInPredecessorsImpl(V: I->getOperand(i: `0`), BB, Result&: LHSVals,
850	Preference: WantInteger, RecursionSet, CxtI);
851
852	for (const auto &LHSVal : LHSVals) {
853	Constant *V = LHSVal.first;
854	Constant *Folded =
855	ConstantFoldCompareInstOperands(Predicate: Pred, LHS: V, RHS: CmpConst, DL);
856	if (Constant *KC = getKnownConstant(Val: Folded, Preference: WantInteger))
857	Result.emplace_back(Args&: KC, Args: LHSVal.second);
858	}
859
860	return !Result.empty();
861	}
862	}
863
864	if (SelectInst *SI = dyn_cast<SelectInst>(Val: I)) {
865	// Handle select instructions where at least one operand is a known constant
866	// and we can figure out the condition value for any predecessor block.
867	Constant *TrueVal = getKnownConstant(Val: SI->getTrueValue(), Preference);
868	Constant *FalseVal = getKnownConstant(Val: SI->getFalseValue(), Preference);
869	PredValueInfoTy Conds;
870	if ((TrueVal \|\| FalseVal) &&
871	computeValueKnownInPredecessorsImpl(V: SI->getCondition(), BB, Result&: Conds,
872	Preference: WantInteger, RecursionSet, CxtI)) {
873	for (auto &C : Conds) {
874	Constant *Cond = C.first;
875
876	// Figure out what value to use for the condition.
877	bool KnownCond;
878	if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: Cond)) {
879	// A known boolean.
880	KnownCond = CI->isOne();
881	} else {
882	assert(isa<UndefValue>(Cond) && "Unexpected condition value");
883	// Either operand will do, so be sure to pick the one that's a known
884	// constant.
885	// FIXME: Do this more cleverly if both values are known constants?
886	KnownCond = (TrueVal != nullptr);
887	}
888
889	// See if the select has a known constant value for this predecessor.
890	if (Constant *Val = KnownCond ? TrueVal : FalseVal)
891	Result.emplace_back(Args&: Val, Args&: C.second);
892	}
893
894	return !Result.empty();
895	}
896	}
897
898	// If all else fails, see if LVI can figure out a constant value for us.
899	assert(CxtI->getParent() == BB && "CxtI should be in BB");
900	Constant *CI = LVI->getConstant(V, CxtI);
901	if (Constant *KC = getKnownConstant(Val: CI, Preference)) {
902	for (BasicBlock *Pred : predecessors(BB))
903	Result.emplace_back(Args&: KC, Args&: Pred);
904	}
905
906	return !Result.empty();
907	}
908
909	/// GetBestDestForBranchOnUndef - If we determine that the specified block ends
910	/// in an undefined jump, decide which block is best to revector to.
911	///
912	/// Since we can pick an arbitrary destination, we pick the successor with the
913	/// fewest predecessors. This should reduce the in-degree of the others.
914	static unsigned getBestDestForJumpOnUndef(BasicBlock *BB) {
915	Instruction *BBTerm = BB->getTerminator();
916	unsigned MinSucc = `0`;
917	BasicBlock *TestBB = BBTerm->getSuccessor(Idx: MinSucc);
918	// Compute the successor with the minimum number of predecessors.
919	unsigned MinNumPreds = pred_size(BB: TestBB);
920	for (unsigned i = `1`, e = BBTerm->getNumSuccessors(); i != e; ++i) {
921	TestBB = BBTerm->getSuccessor(Idx: i);
922	unsigned NumPreds = pred_size(BB: TestBB);
923	if (NumPreds < MinNumPreds) {
924	MinSucc = i;
925	MinNumPreds = NumPreds;
926	}
927	}
928
929	return MinSucc;
930	}
931
932	static bool hasAddressTakenAndUsed(BasicBlock *BB) {
933	if (!BB->hasAddressTaken()) return false;
934
935	// If the block has its address taken, it may be a tree of dead constants
936	// hanging off of it. These shouldn't keep the block alive.
937	BlockAddress *BA = BlockAddress::get(BB);
938	BA->removeDeadConstantUsers();
939	return !BA->use_empty();
940	}
941
942	/// processBlock - If there are any predecessors whose control can be threaded
943	/// through to a successor, transform them now.
944	bool JumpThreadingPass::processBlock(BasicBlock *BB) {
945	// If the block is trivially dead, just return and let the caller nuke it.
946	// This simplifies other transformations.
947	if (DTU ->isBBPendingDeletion(DelBB: BB) \|\|
948	(pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()))
949	return false;
950
951	// If this block has a single predecessor, and if that pred has a single
952	// successor, merge the blocks. This encourages recursive jump threading
953	// because now the condition in this block can be threaded through
954	// predecessors of our predecessor block.
955	if (maybeMergeBasicBlockIntoOnlyPred(BB))
956	return true;
957
958	if (tryToUnfoldSelectInCurrBB(BB))
959	return true;
960
961	// Look if we can propagate guards to predecessors.
962	if (HasGuards && processGuards(BB))
963	return true;
964
965	// What kind of constant we're looking for.
966	ConstantPreference Preference = WantInteger;
967
968	// Look to see if the terminator is a conditional branch, switch or indirect
969	// branch, if not we can't thread it.
970	Value *Condition;
971	Instruction *Terminator = BB->getTerminator();
972	if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: Terminator)) {
973	Condition = BI->getCondition();
974	} else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: Terminator)) {
975	Condition = SI->getCondition();
976	} else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Val: Terminator)) {
977	// Can't thread indirect branch with no successors.
978	if (IB->getNumSuccessors() == `0`) return false;
979	Condition = IB->getAddress()->stripPointerCasts();
980	Preference = WantBlockAddress;
981	} else {
982	return false; // Must be an invoke or callbr.
983	}
984
985	// Keep track if we constant folded the condition in this invocation.
986	bool ConstantFolded = false;
987
988	// Run constant folding to see if we can reduce the condition to a simple
989	// constant.
990	if (Instruction *I = dyn_cast<Instruction>(Val: Condition)) {
991	Value *SimpleVal =
992	ConstantFoldInstruction(I, DL: BB->getDataLayout(), TLI);
993	if (SimpleVal) {
994	I->replaceAllUsesWith(V: SimpleVal);
995	if (isInstructionTriviallyDead(I, TLI))
996	I->eraseFromParent();
997	Condition = SimpleVal;
998	ConstantFolded = true;
999	}
1000	}
1001
1002	// If the terminator is branching on an undef or freeze undef, we can pick any
1003	// of the successors to branch to. Let getBestDestForJumpOnUndef decide.
1004	auto *FI = dyn_cast<FreezeInst>(Val: Condition);
1005	if (isa<UndefValue>(Val: Condition) \|\|
1006	(FI && isa<UndefValue>(Val: FI->getOperand(i_nocapture: `0`)) && FI->hasOneUse())) {
1007	unsigned BestSucc = getBestDestForJumpOnUndef(BB);
1008	std::vector<DominatorTree::UpdateType> Updates;
1009
1010	// Fold the branch/switch.
1011	Instruction *BBTerm = BB->getTerminator();
1012	Updates.reserve(n: BBTerm->getNumSuccessors());
1013	for (unsigned i = `0`, e = BBTerm->getNumSuccessors(); i != e; ++i) {
1014	if (i == BestSucc) continue;
1015	BasicBlock *Succ = BBTerm->getSuccessor(Idx: i);
1016	Succ->removePredecessor(Pred: BB, KeepOneInputPHIs: true);
1017	Updates.push_back(x: {DominatorTree::Delete, BB, Succ});
1018	}
1019
1020	LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
1021	<< "' folding undef terminator: " << *BBTerm << `'\n'`);
1022	Instruction *NewBI = UncondBrInst::Create(IfTrue: BBTerm->getSuccessor(Idx: BestSucc),
1023	InsertBefore: BBTerm->getIterator());
1024	NewBI->setDebugLoc(BBTerm->getDebugLoc());
1025	++NumFolds;
1026	BBTerm->eraseFromParent();
1027	DTU ->applyUpdatesPermissive(Updates);
1028	if (FI)
1029	FI->eraseFromParent();
1030	return true;
1031	}
1032
1033	// If the terminator of this block is branching on a constant, simplify the
1034	// terminator to an unconditional branch. This can occur due to threading in
1035	// other blocks.
1036	if (getKnownConstant(Val: Condition, Preference)) {
1037	LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
1038	<< "' folding terminator: " << *BB->getTerminator()
1039	<< `'\n'`);
1040	++NumFolds;
1041	ConstantFoldTerminator(BB, DeleteDeadConditions: true, TLI: nullptr, DTU: DTU.get());
1042	if (auto *BPI = getBPI())
1043	BPI->eraseBlock(BB);
1044	return true;
1045	}
1046
1047	Instruction *CondInst = dyn_cast<Instruction>(Val: Condition);
1048
1049	// All the rest of our checks depend on the condition being an instruction.
1050	if (!CondInst) {
1051	// FIXME: Unify this with code below.
1052	if (processThreadableEdges(Cond: Condition, BB, Preference, CxtI: Terminator))
1053	return true;
1054	return ConstantFolded;
1055	}
1056
1057	// Some of the following optimization can safely work on the unfrozen cond.
1058	Value *CondWithoutFreeze = CondInst;
1059	if (auto *FI = dyn_cast<FreezeInst>(Val: CondInst))
1060	CondWithoutFreeze = FI->getOperand(i_nocapture: `0`);
1061
1062	if (CmpInst *CondCmp = dyn_cast<CmpInst>(Val: CondWithoutFreeze)) {
1063	// If we're branching on a conditional, LVI might be able to determine
1064	// it's value at the branch instruction. We only handle comparisons
1065	// against a constant at this time.
1066	if (Constant *CondConst = dyn_cast<Constant>(Val: CondCmp->getOperand(i_nocapture: `1`))) {
1067	Constant *Res =
1068	LVI->getPredicateAt(Pred: CondCmp->getPredicate(), V: CondCmp->getOperand(i_nocapture: `0`),
1069	C: CondConst, CxtI: BB->getTerminator(),
1070	/UseBlockValue=/false);
1071	if (Res) {
1072	// We can safely replace some* uses of the CondInst if it has*
1073	// exactly one value as returned by LVI. RAUW is incorrect in the
1074	// presence of guards and assumes, that have the `Cond` as the use. This
1075	// is because we use the guards/assume to reason about the `Cond` value
1076	// at the end of block, but RAUW unconditionally replaces all uses
1077	// including the guards/assumes themselves and the uses before the
1078	// guard/assume.
1079	if (replaceFoldableUses(Cond: CondCmp, ToVal: Res, KnownAtEndOfBB: BB))
1080	return true;
1081	}
1082
1083	// We did not manage to simplify this branch, try to see whether
1084	// CondCmp depends on a known phi-select pattern.
1085	if (tryToUnfoldSelect(CondCmp, BB))
1086	return true;
1087	}
1088	}
1089
1090	if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: BB->getTerminator()))
1091	if (tryToUnfoldSelect(SI, BB))
1092	return true;
1093
1094	// Check for some cases that are worth simplifying. Right now we want to look
1095	// for loads that are used by a switch or by the condition for the branch. If
1096	// we see one, check to see if it's partially redundant. If so, insert a PHI
1097	// which can then be used to thread the values.
1098	Value *SimplifyValue = CondWithoutFreeze;
1099
1100	if (CmpInst *CondCmp = dyn_cast<CmpInst>(Val: SimplifyValue))
1101	if (isa<Constant>(Val: CondCmp->getOperand(i_nocapture: `1`)))
1102	SimplifyValue = CondCmp->getOperand(i_nocapture: `0`);
1103
1104	// TODO: There are other places where load PRE would be profitable, such as
1105	// more complex comparisons.
1106	if (LoadInst *LoadI = dyn_cast<LoadInst>(Val: SimplifyValue))
1107	if (simplifyPartiallyRedundantLoad(LI: LoadI))
1108	return true;
1109
1110	// Before threading, try to propagate profile data backwards:
1111	if (PHINode *PN = dyn_cast<PHINode>(Val: CondInst))
1112	if (PN->getParent() == BB && isa<CondBrInst>(Val: BB->getTerminator()))
1113	updatePredecessorProfileMetadata(PN, BB);
1114
1115	// Handle a variety of cases where we are branching on something derived from
1116	// a PHI node in the current block. If we can prove that any predecessors
1117	// compute a predictable value based on a PHI node, thread those predecessors.
1118	if (processThreadableEdges(Cond: CondInst, BB, Preference, CxtI: Terminator))
1119	return true;
1120
1121	// If this is an otherwise-unfoldable branch on a phi node or freeze(phi) in
1122	// the current block, see if we can simplify.
1123	PHINode *PN = dyn_cast<PHINode>(Val: CondWithoutFreeze);
1124	if (PN && PN->getParent() == BB && isa<CondBrInst>(Val: BB->getTerminator()))
1125	return processBranchOnPHI(PN);
1126
1127	// If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
1128	if (CondInst->getOpcode() == Instruction::Xor &&
1129	CondInst->getParent() == BB && isa<CondBrInst>(Val: BB->getTerminator()))
1130	return processBranchOnXOR(BO: cast<BinaryOperator>(Val: CondInst));
1131
1132	// Search for a stronger dominating condition that can be used to simplify a
1133	// conditional branch leaving BB.
1134	if (processImpliedCondition(BB))
1135	return true;
1136
1137	return false;
1138	}
1139
1140	bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) {
1141	auto *BI = dyn_cast<CondBrInst>(Val: BB->getTerminator());
1142	if (!BI)
1143	return false;
1144
1145	Value *Cond = BI->getCondition();
1146	// Assuming that predecessor's branch was taken, if pred's branch condition
1147	// (V) implies Cond, Cond can be either true, undef, or poison. In this case,
1148	// freeze(Cond) is either true or a nondeterministic value.
1149	// If freeze(Cond) has only one use, we can freely fold freeze(Cond) to true
1150	// without affecting other instructions.
1151	auto *FICond = dyn_cast<FreezeInst>(Val: Cond);
1152	if (FICond && FICond->hasOneUse())
1153	Cond = FICond->getOperand(i_nocapture: `0`);
1154	else
1155	FICond = nullptr;
1156
1157	BasicBlock *CurrentBB = BB;
1158	BasicBlock *CurrentPred = BB->getSinglePredecessor();
1159	unsigned Iter = `0`;
1160
1161	auto &DL = BB->getDataLayout();
1162
1163	while (CurrentPred && Iter++ < ImplicationSearchThreshold) {
1164	auto *PBI = dyn_cast<CondBrInst>(Val: CurrentPred->getTerminator());
1165	if (!PBI)
1166	return false;
1167	if (PBI->getSuccessor(i: `0`) != CurrentBB && PBI->getSuccessor(i: `1`) != CurrentBB)
1168	return false;
1169
1170	bool CondIsTrue = PBI->getSuccessor(i: `0`) == CurrentBB;
1171	std::optional<bool> Implication =
1172	isImpliedCondition(LHS: PBI->getCondition(), RHS: Cond, DL, LHSIsTrue: CondIsTrue);
1173
1174	// If the branch condition of BB (which is Cond) and CurrentPred are
1175	// exactly the same freeze instruction, Cond can be folded into CondIsTrue.
1176	if (!Implication && FICond && isa<FreezeInst>(Val: PBI->getCondition())) {
1177	if (cast<FreezeInst>(Val: PBI->getCondition())->getOperand(i_nocapture: `0`) ==
1178	FICond->getOperand(i_nocapture: `0`))
1179	Implication = CondIsTrue;
1180	}
1181
1182	if (Implication) {
1183	BasicBlock KeepSucc = BI->getSuccessor(i: Implication ? `0` : `1`);
1184	BasicBlock RemoveSucc = BI->getSuccessor(i: Implication ? `1` : `0`);
1185	RemoveSucc->removePredecessor(Pred: BB);
1186	UncondBrInst *UncondBI =
1187	UncondBrInst::Create(IfTrue: KeepSucc, InsertBefore: BI->getIterator());
1188	UncondBI->setDebugLoc(BI->getDebugLoc());
1189	++NumFolds;
1190	BI->eraseFromParent();
1191	if (FICond)
1192	FICond->eraseFromParent();
1193
1194	DTU ->applyUpdatesPermissive(Updates: {{DominatorTree::Delete, BB, RemoveSucc}});
1195	if (auto *BPI = getBPI())
1196	BPI->eraseBlock(BB);
1197	return true;
1198	}
1199	CurrentBB = CurrentPred;
1200	CurrentPred = CurrentBB->getSinglePredecessor();
1201	}
1202
1203	return false;
1204	}
1205
1206	/// Return true if Op is an instruction defined in the given block.
1207	static bool isOpDefinedInBlock(Value Op, BasicBlock BB) {
1208	if (Instruction *OpInst = dyn_cast<Instruction>(Val: Op))
1209	if (OpInst->getParent() == BB)
1210	return true;
1211	return false;
1212	}
1213
1214	/// simplifyPartiallyRedundantLoad - If LoadI is an obviously partially
1215	/// redundant load instruction, eliminate it by replacing it with a PHI node.
1216	/// This is an important optimization that encourages jump threading, and needs
1217	/// to be run interlaced with other jump threading tasks.
1218	bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
1219	// Don't hack volatile and ordered loads.
1220	if (!LoadI->isUnordered()) return false;
1221
1222	// If the load is defined in a block with exactly one predecessor, it can't be
1223	// partially redundant.
1224	BasicBlock *LoadBB = LoadI->getParent();
1225	if (LoadBB->getSinglePredecessor())
1226	return false;
1227
1228	// If the load is defined in an EH pad, it can't be partially redundant,
1229	// because the edges between the invoke and the EH pad cannot have other
1230	// instructions between them.
1231	if (LoadBB->isEHPad())
1232	return false;
1233
1234	Value *LoadedPtr = LoadI->getOperand(i_nocapture: `0`);
1235
1236	// If the loaded operand is defined in the LoadBB and its not a phi,
1237	// it can't be available in predecessors.
1238	if (isOpDefinedInBlock(Op: LoadedPtr, BB: LoadBB) && !isa<PHINode>(Val: LoadedPtr))
1239	return false;
1240
1241	// Scan a few instructions up from the load, to see if it is obviously live at
1242	// the entry to its block.
1243	BasicBlock::iterator BBIt(LoadI);
1244	bool IsLoadCSE;
1245	BatchAAResults BatchAA(*AA);
1246	// The dominator tree is updated lazily and may not be valid at this point.
1247	BatchAA.disableDominatorTree();
1248	if (Value *AvailableVal = FindAvailableLoadedValue(
1249	Load: LoadI, ScanBB: LoadBB, ScanFrom&: BBIt, MaxInstsToScan: DefMaxInstsToScan, AA: &BatchAA, IsLoadCSE: &IsLoadCSE)) {
1250	// If the value of the load is locally available within the block, just use
1251	// it. This frequently occurs for reg2mem'd allocas.
1252
1253	if (IsLoadCSE) {
1254	LoadInst *NLoadI = cast<LoadInst>(Val: AvailableVal);
1255	combineMetadataForCSE(K: NLoadI, J: LoadI, DoesKMove: false);
1256	LVI->forgetValue(V: NLoadI);
1257	};
1258
1259	// If the returned value is the load itself, replace with poison. This can
1260	// only happen in dead loops.
1261	if (AvailableVal == LoadI)
1262	AvailableVal = PoisonValue::get(T: LoadI->getType());
1263	if (AvailableVal->getType() != LoadI->getType()) {
1264	AvailableVal = CastInst::CreateBitOrPointerCast(
1265	S: AvailableVal, Ty: LoadI->getType(), Name: "", InsertBefore: LoadI->getIterator());
1266	cast<Instruction>(Val: AvailableVal)->setDebugLoc(LoadI->getDebugLoc());
1267	}
1268	LoadI->replaceAllUsesWith(V: AvailableVal);
1269	LoadI->eraseFromParent();
1270	return true;
1271	}
1272
1273	// Otherwise, if we scanned the whole block and got to the top of the block,
1274	// we know the block is locally transparent to the load. If not, something
1275	// might clobber its value.
1276	if (BBIt != LoadBB->begin())
1277	return false;
1278
1279	// If all of the loads and stores that feed the value have the same AA tags,
1280	// then we can propagate them onto any newly inserted loads.
1281	AAMDNodes AATags = LoadI->getAAMetadata();
1282
1283	SmallPtrSet<BasicBlock*, `8`> PredsScanned;
1284
1285	using AvailablePredsTy = SmallVector<std::pair<BasicBlock , Value >, `8`>;
1286
1287	AvailablePredsTy AvailablePreds;
1288	BasicBlock OneUnavailablePred = nullptr*;
1289	SmallVector<LoadInst*, `8`> CSELoads;
1290
1291	// If we got here, the loaded value is transparent through to the start of the
1292	// block. Check to see if it is available in any of the predecessor blocks.
1293	for (BasicBlock *PredBB : predecessors(BB: LoadBB)) {
1294	// If we already scanned this predecessor, skip it.
1295	if (!PredsScanned.insert(Ptr: PredBB).second)
1296	continue;
1297
1298	BBIt = PredBB->end();
1299	unsigned NumScanedInst = `0`;
1300	Value PredAvailable = nullptr*;
1301	// NOTE: We don't CSE load that is volatile or anything stronger than
1302	// unordered, that should have been checked when we entered the function.
1303	assert(LoadI->isUnordered() &&
1304	"Attempting to CSE volatile or atomic loads");
1305	// If this is a load on a phi pointer, phi-translate it and search
1306	// for available load/store to the pointer in predecessors.
1307	Type *AccessTy = LoadI->getType();
1308	const auto &DL = LoadI->getDataLayout();
1309	MemoryLocation Loc(LoadedPtr->DoPHITranslation(CurBB: LoadBB, PredBB),
1310	LocationSize::precise(Value: DL.getTypeStoreSize(Ty: AccessTy)),
1311	AATags);
1312	PredAvailable = findAvailablePtrLoadStore(
1313	Loc, AccessTy, AtLeastAtomic: LoadI->isAtomic(), ScanBB: PredBB, ScanFrom&: BBIt, MaxInstsToScan: DefMaxInstsToScan,
1314	AA: &BatchAA, IsLoadCSE: &IsLoadCSE, NumScanedInst: &NumScanedInst);
1315
1316	// If PredBB has a single predecessor, continue scanning through the
1317	// single predecessor.
1318	BasicBlock *SinglePredBB = PredBB;
1319	while (!PredAvailable && SinglePredBB && BBIt == SinglePredBB->begin() &&
1320	NumScanedInst < DefMaxInstsToScan) {
1321	SinglePredBB = SinglePredBB->getSinglePredecessor();
1322	if (SinglePredBB) {
1323	BBIt = SinglePredBB->end();
1324	PredAvailable = findAvailablePtrLoadStore(
1325	Loc, AccessTy, AtLeastAtomic: LoadI->isAtomic(), ScanBB: SinglePredBB, ScanFrom&: BBIt,
1326	MaxInstsToScan: (DefMaxInstsToScan - NumScanedInst), AA: &BatchAA, IsLoadCSE: &IsLoadCSE,
1327	NumScanedInst: &NumScanedInst);
1328	}
1329	}
1330
1331	if (!PredAvailable) {
1332	OneUnavailablePred = PredBB;
1333	continue;
1334	}
1335
1336	if (IsLoadCSE)
1337	CSELoads.push_back(Elt: cast<LoadInst>(Val: PredAvailable));
1338
1339	// If so, this load is partially redundant. Remember this info so that we
1340	// can create a PHI node.
1341	AvailablePreds.emplace_back(Args&: PredBB, Args&: PredAvailable);
1342	}
1343
1344	// If the loaded value isn't available in any predecessor, it isn't partially
1345	// redundant.
1346	if (AvailablePreds.empty()) return false;
1347
1348	// Okay, the loaded value is available in at least one (and maybe all!)
1349	// predecessors. If the value is unavailable in more than one unique
1350	// predecessor, we want to insert a merge block for those common predecessors.
1351	// This ensures that we only have to insert one reload, thus not increasing
1352	// code size.
1353	BasicBlock UnavailablePred = nullptr*;
1354
1355	// If the value is unavailable in one of predecessors, we will end up
1356	// inserting a new instruction into them. It is only valid if all the
1357	// instructions before LoadI are guaranteed to pass execution to its
1358	// successor, or if LoadI is safe to speculate.
1359	// TODO: If this logic becomes more complex, and we will perform PRE insertion
1360	// farther than to a predecessor, we need to reuse the code from GVN's PRE.
1361	// It requires domination tree analysis, so for this simple case it is an
1362	// overkill.
1363	if (PredsScanned.size() != AvailablePreds.size() &&
1364	!isSafeToSpeculativelyExecute(I: LoadI))
1365	for (auto I = LoadBB->begin(); &*I != LoadI; ++I)
1366	if (!isGuaranteedToTransferExecutionToSuccessor(I: &*I))
1367	return false;
1368
1369	// If there is exactly one predecessor where the value is unavailable, the
1370	// already computed 'OneUnavailablePred' block is it. If it ends in an
1371	// unconditional branch, we know that it isn't a critical edge.
1372	if (PredsScanned.size() == AvailablePreds.size()+`1` &&
1373	OneUnavailablePred->getTerminator()->getNumSuccessors() == `1`) {
1374	UnavailablePred = OneUnavailablePred;
1375	} else if (PredsScanned.size() != AvailablePreds.size()) {
1376	// Otherwise, we had multiple unavailable predecessors or we had a critical
1377	// edge from the one.
1378	SmallVector<BasicBlock*, `8`> PredsToSplit;
1379	SmallPtrSet<BasicBlock *, `8`> AvailablePredSet(
1380	llvm::from_range, llvm::make_first_range(c&: AvailablePreds));
1381
1382	// Add all the unavailable predecessors to the PredsToSplit list.
1383	for (BasicBlock *P : predecessors(BB: LoadBB)) {
1384	// If the predecessor is an indirect goto, we can't split the edge.
1385	if (isa<IndirectBrInst>(Val: P->getTerminator()))
1386	return false;
1387
1388	if (!AvailablePredSet.count(Ptr: P))
1389	PredsToSplit.push_back(Elt: P);
1390	}
1391
1392	// Split them out to their own block.
1393	UnavailablePred = splitBlockPreds(BB: LoadBB, Preds: PredsToSplit, Suffix: "thread-pre-split");
1394	}
1395
1396	// If the value isn't available in all predecessors, then there will be
1397	// exactly one where it isn't available. Insert a load on that edge and add
1398	// it to the AvailablePreds list.
1399	if (UnavailablePred) {
1400	assert(UnavailablePred->getTerminator()->getNumSuccessors() == `1` &&
1401	"Can't handle critical edge here!");
1402	LoadInst NewVal = new* LoadInst (
1403	LoadI->getType(), LoadedPtr->DoPHITranslation(CurBB: LoadBB, PredBB: UnavailablePred),
1404	LoadI->getName() + ".pr", false, LoadI->getAlign(),
1405	LoadI->getOrdering(), LoadI->getSyncScopeID(),
1406	UnavailablePred->getTerminator()->getIterator());
1407	NewVal->setDebugLoc(LoadI->getDebugLoc());
1408	if (AATags)
1409	NewVal->setAAMetadata(AATags);
1410
1411	AvailablePreds.emplace_back(Args&: UnavailablePred, Args&: NewVal);
1412	}
1413
1414	// Now we know that each predecessor of this block has a value in
1415	// AvailablePreds, sort them for efficient access as we're walking the preds.
1416	array_pod_sort(Start: AvailablePreds.begin(), End: AvailablePreds.end());
1417
1418	// Create a PHI node at the start of the block for the PRE'd load value.
1419	PHINode *PN = PHINode::Create(Ty: LoadI->getType(), NumReservedValues: pred_size(BB: LoadBB), NameStr: "");
1420	PN->insertBefore(InsertPos: LoadBB->begin());
1421	PN->takeName(V: LoadI);
1422	PN->setDebugLoc(LoadI->getDebugLoc());
1423
1424	// Insert new entries into the PHI for each predecessor. A single block may
1425	// have multiple entries here.
1426	for (BasicBlock *P : predecessors(BB: LoadBB)) {
1427	AvailablePredsTy::iterator I =
1428	llvm::lower_bound(Range&: AvailablePreds, Value: std::make_pair(x&: P, y: (Value )nullptr*));
1429
1430	assert(I != AvailablePreds.end() && I->first == P &&
1431	"Didn't find entry for predecessor!");
1432
1433	// If we have an available predecessor but it requires casting, insert the
1434	// cast in the predecessor and use the cast. Note that we have to update the
1435	// AvailablePreds vector as we go so that all of the PHI entries for this
1436	// predecessor use the same bitcast.
1437	Value *&PredV = I->second;
1438	if (PredV->getType() != LoadI->getType()) {
1439	PredV = CastInst::CreateBitOrPointerCast(
1440	S: PredV, Ty: LoadI->getType(), Name: "", InsertBefore: P->getTerminator()->getIterator());
1441	// The new cast is producing the value used to replace the load
1442	// instruction, so uses the load's debug location. If P does not always
1443	// branch to the load BB however then the debug location must be dropped,
1444	// as it is hoisted past a conditional branch.
1445	DebugLoc DL = P->getTerminator()->getNumSuccessors() == `1`
1446	? LoadI->getDebugLoc()
1447	: DebugLoc::getDropped();
1448	cast<CastInst>(Val: PredV)->setDebugLoc(DL);
1449	}
1450
1451	PN->addIncoming(V: PredV, BB: I->first);
1452	}
1453
1454	for (LoadInst *PredLoadI : CSELoads) {
1455	combineMetadataForCSE(K: PredLoadI, J: LoadI, DoesKMove: true);
1456	LVI->forgetValue(V: PredLoadI);
1457	}
1458
1459	LoadI->replaceAllUsesWith(V: PN);
1460	LoadI->eraseFromParent();
1461
1462	return true;
1463	}
1464
1465	/// findMostPopularDest - The specified list contains multiple possible
1466	/// threadable destinations. Pick the one that occurs the most frequently in
1467	/// the list.
1468	static BasicBlock *
1469	findMostPopularDest(BasicBlock *BB,
1470	const SmallVectorImpl<std::pair<BasicBlock *,
1471	BasicBlock *>> &PredToDestList) {
1472	assert(!PredToDestList.empty());
1473
1474	// Determine popularity. If there are multiple possible destinations, we
1475	// explicitly choose to ignore 'undef' destinations. We prefer to thread
1476	// blocks with known and real destinations to threading undef. We'll handle
1477	// them later if interesting.
1478	MapVector<BasicBlock , unsigned*> DestPopularity;
1479
1480	// Populate DestPopularity with the successors in the order they appear in the
1481	// successor list. This way, we ensure determinism by iterating it in the
1482	// same order in llvm::max_element below. We map nullptr to 0 so that we can
1483	// return nullptr when PredToDestList contains nullptr only.
1484	DestPopularity [nullptr] = `0`;
1485	for (auto *SuccBB : successors(BB))
1486	DestPopularity [SuccBB] = `0`;
1487
1488	for (const auto &PredToDest : PredToDestList)
1489	if (PredToDest.second)
1490	DestPopularity [PredToDest.second]++;
1491
1492	// Find the most popular dest.
1493	auto MostPopular = llvm::max_element(Range&: DestPopularity, C: llvm::less_second ());
1494
1495	// Okay, we have finally picked the most popular destination.
1496	return MostPopular->first;
1497	}
1498
1499	// Try to evaluate the value of V when the control flows from PredPredBB to
1500	// BB->getSinglePredecessor() and then on to BB.
1501	Constant JumpThreadingPass::evaluateOnPredecessorEdge(BasicBlock BB,
1502	BasicBlock *PredPredBB,
1503	Value *V,
1504	const DataLayout &DL) {
1505	SmallPtrSet<Value *, `8`> Visited;
1506	return evaluateOnPredecessorEdge(BB, PredPredBB, cond: V, DL, Visited);
1507	}
1508
1509	Constant *JumpThreadingPass::evaluateOnPredecessorEdge(
1510	BasicBlock BB, BasicBlock PredPredBB, Value V, const* DataLayout &DL,
1511	SmallPtrSet<Value *, `8`> &Visited) {
1512	if (!Visited.insert(Ptr: V).second)
1513	return nullptr;
1514	llvm::scope_exit _([&Visited, V]() { Visited.erase(Ptr: V); });
1515
1516	BasicBlock *PredBB = BB->getSinglePredecessor();
1517	assert(PredBB && "Expected a single predecessor");
1518
1519	if (Constant *Cst = dyn_cast<Constant>(Val: V)) {
1520	return Cst;
1521	}
1522
1523	// Consult LVI if V is not an instruction in BB or PredBB.
1524	Instruction *I = dyn_cast<Instruction>(Val: V);
1525	if (!I \|\| (I->getParent() != BB && I->getParent() != PredBB)) {
1526	return LVI->getConstantOnEdge(V, FromBB: PredPredBB, ToBB: PredBB, CxtI: nullptr);
1527	}
1528
1529	// Look into a PHI argument.
1530	if (PHINode *PHI = dyn_cast<PHINode>(Val: V)) {
1531	if (PHI->getParent() == PredBB)
1532	return dyn_cast<Constant>(Val: PHI->getIncomingValueForBlock(BB: PredPredBB));
1533	return nullptr;
1534	}
1535
1536	// If we have a CmpInst, try to fold it for each incoming edge into PredBB.
1537	// Note that during the execution of the pass, phi nodes may become constant
1538	// and may be removed, which can lead to self-referencing instructions in
1539	// code that becomes unreachable. Consequently, we need to handle those
1540	// instructions in unreachable code and check before going into recursion.
1541	if (CmpInst *CondCmp = dyn_cast<CmpInst>(Val: V)) {
1542	if (CondCmp->getParent() == BB) {
1543	Constant *Op0 = evaluateOnPredecessorEdge(
1544	BB, PredPredBB, V: CondCmp->getOperand(i_nocapture: `0`), DL, Visited);
1545	Constant *Op1 = evaluateOnPredecessorEdge(
1546	BB, PredPredBB, V: CondCmp->getOperand(i_nocapture: `1`), DL, Visited);
1547	if (Op0 && Op1) {
1548	return ConstantFoldCompareInstOperands(Predicate: CondCmp->getPredicate(), LHS: Op0,
1549	RHS: Op1, DL);
1550	}
1551	}
1552	return nullptr;
1553	}
1554
1555	return nullptr;
1556	}
1557
1558	bool JumpThreadingPass::processThreadableEdges(Value Cond, BasicBlock BB,
1559	ConstantPreference Preference,
1560	Instruction *CxtI) {
1561	// If threading this would thread across a loop header, don't even try to
1562	// thread the edge.
1563	if (LoopHeaders.count(Ptr: BB))
1564	return false;
1565
1566	PredValueInfoTy PredValues;
1567	if (!computeValueKnownInPredecessors(V: Cond, BB, Result&: PredValues, Preference,
1568	CxtI)) {
1569	// We don't have known values in predecessors. See if we can thread through
1570	// BB and its sole predecessor.
1571	return maybethreadThroughTwoBasicBlocks(BB, Cond);
1572	}
1573
1574	assert(!PredValues.empty() &&
1575	"computeValueKnownInPredecessors returned true with no values");
1576
1577	LLVM_DEBUG(dbgs() << "IN BB: " << *BB;
1578	for (const auto &PredValue : PredValues) {
1579	dbgs() << " BB '" << BB->getName()
1580	<< "': FOUND condition = " << *PredValue.first
1581	<< " for pred '" << PredValue.second->getName() << "'.\n";
1582	});
1583
1584	// Decide what we want to thread through. Convert our list of known values to
1585	// a list of known destinations for each pred. This also discards duplicate
1586	// predecessors and keeps track of the undefined inputs (which are represented
1587	// as a null dest in the PredToDestList).
1588	SmallPtrSet<BasicBlock*, `16`> SeenPreds;
1589	SmallVector<std::pair<BasicBlock, BasicBlock>, `16`> PredToDestList;
1590
1591	BasicBlock OnlyDest = nullptr*;
1592	BasicBlock MultipleDestSentinel = (BasicBlock)(intptr_t)~`0ULL`;
1593	Constant OnlyVal = nullptr*;
1594	Constant MultipleVal = (Constant )(intptr_t)~`0ULL`;
1595
1596	for (const auto &PredValue : PredValues) {
1597	BasicBlock *Pred = PredValue.second;
1598	if (!SeenPreds.insert(Ptr: Pred).second)
1599	continue; // Duplicate predecessor entry.
1600
1601	Constant *Val = PredValue.first;
1602
1603	BasicBlock *DestBB;
1604	if (isa<UndefValue>(Val))
1605	DestBB = nullptr;
1606	else if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: BB->getTerminator())) {
1607	assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1608	DestBB = BI->getSuccessor(i: cast<ConstantInt>(Val)->isZero());
1609	} else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: BB->getTerminator())) {
1610	assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1611	DestBB = SI->findCaseValue(C: cast<ConstantInt>(Val))->getCaseSuccessor();
1612	} else {
1613	assert(isa<IndirectBrInst>(BB->getTerminator())
1614	&& "Unexpected terminator");
1615	assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress");
1616	DestBB = cast<BlockAddress>(Val)->getBasicBlock();
1617	}
1618
1619	// If we have exactly one destination, remember it for efficiency below.
1620	if (PredToDestList.empty()) {
1621	OnlyDest = DestBB;
1622	OnlyVal = Val;
1623	} else {
1624	if (OnlyDest != DestBB)
1625	OnlyDest = MultipleDestSentinel;
1626	// It possible we have same destination, but different value, e.g. default
1627	// case in switchinst.
1628	if (Val != OnlyVal)
1629	OnlyVal = MultipleVal;
1630	}
1631
1632	// If the predecessor ends with an indirect goto, we can't change its
1633	// destination.
1634	if (isa<IndirectBrInst>(Val: Pred->getTerminator()))
1635	continue;
1636
1637	PredToDestList.emplace_back(Args&: Pred, Args&: DestBB);
1638	}
1639
1640	// If all edges were unthreadable, we fail.
1641	if (PredToDestList.empty())
1642	return false;
1643
1644	// If all the predecessors go to a single known successor, we want to fold,
1645	// not thread. By doing so, we do not need to duplicate the current block and
1646	// also miss potential opportunities in case we dont/cant duplicate.
1647	if (OnlyDest && OnlyDest != MultipleDestSentinel) {
1648	if (BB->hasNPredecessors(N: PredToDestList.size())) {
1649	bool SeenFirstBranchToOnlyDest = false;
1650	std::vector <DominatorTree::UpdateType> Updates;
1651	Updates.reserve(n: BB->getTerminator()->getNumSuccessors() - `1`);
1652	for (BasicBlock *SuccBB : successors(BB)) {
1653	if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) {
1654	SeenFirstBranchToOnlyDest = true; // Don't modify the first branch.
1655	} else {
1656	SuccBB->removePredecessor(Pred: BB, KeepOneInputPHIs: true); // This is unreachable successor.
1657	Updates.push_back(x: {DominatorTree::Delete, BB, SuccBB});
1658	}
1659	}
1660
1661	// Finally update the terminator.
1662	Instruction *Term = BB->getTerminator();
1663	Instruction *NewBI = UncondBrInst::Create(IfTrue: OnlyDest, InsertBefore: Term->getIterator());
1664	NewBI->setDebugLoc(Term->getDebugLoc());
1665	++NumFolds;
1666	Term->eraseFromParent();
1667	DTU ->applyUpdatesPermissive(Updates);
1668	if (auto *BPI = getBPI())
1669	BPI->eraseBlock(BB);
1670
1671	// If the condition is now dead due to the removal of the old terminator,
1672	// erase it.
1673	if (auto *CondInst = dyn_cast<Instruction>(Val: Cond)) {
1674	if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
1675	CondInst->eraseFromParent();
1676	// We can safely replace some* uses of the CondInst if it has*
1677	// exactly one value as returned by LVI. RAUW is incorrect in the
1678	// presence of guards and assumes, that have the `Cond` as the use. This
1679	// is because we use the guards/assume to reason about the `Cond` value
1680	// at the end of block, but RAUW unconditionally replaces all uses
1681	// including the guards/assumes themselves and the uses before the
1682	// guard/assume.
1683	else if (OnlyVal && OnlyVal != MultipleVal)
1684	replaceFoldableUses(Cond: CondInst, ToVal: OnlyVal, KnownAtEndOfBB: BB);
1685	}
1686	return true;
1687	}
1688	}
1689
1690	// Determine which is the most common successor. If we have many inputs and
1691	// this block is a switch, we want to start by threading the batch that goes
1692	// to the most popular destination first. If we only know about one
1693	// threadable destination (the common case) we can avoid this.
1694	BasicBlock *MostPopularDest = OnlyDest;
1695
1696	if (MostPopularDest == MultipleDestSentinel) {
1697	// Remove any loop headers from the Dest list, threadEdge conservatively
1698	// won't process them, but we might have other destination that are eligible
1699	// and we still want to process.
1700	erase_if(C&: PredToDestList,
1701	P: [&](const std::pair<BasicBlock , BasicBlock > &PredToDest) {
1702	return LoopHeaders.contains(Ptr: PredToDest.second);
1703	});
1704
1705	if (PredToDestList.empty())
1706	return false;
1707
1708	MostPopularDest = findMostPopularDest(BB, PredToDestList);
1709	}
1710
1711	// Now that we know what the most popular destination is, factor all
1712	// predecessors that will jump to it into a single predecessor.
1713	SmallVector<BasicBlock*, `16`> PredsToFactor;
1714	for (const auto &PredToDest : PredToDestList)
1715	if (PredToDest.second == MostPopularDest) {
1716	BasicBlock *Pred = PredToDest.first;
1717
1718	// This predecessor may be a switch or something else that has multiple
1719	// edges to the block. Factor each of these edges by listing them
1720	// according to # occurrences in PredsToFactor.
1721	for (BasicBlock *Succ : successors(BB: Pred))
1722	if (Succ == BB)
1723	PredsToFactor.push_back(Elt: Pred);
1724	}
1725
1726	// If the threadable edges are branching on an undefined value, we get to pick
1727	// the destination that these predecessors should get to.
1728	if (!MostPopularDest)
1729	MostPopularDest = BB->getTerminator()->
1730	getSuccessor(Idx: getBestDestForJumpOnUndef(BB));
1731
1732	// Ok, try to thread it!
1733	return tryThreadEdge(BB, PredBBs: PredsToFactor, SuccBB: MostPopularDest);
1734	}
1735
1736	/// processBranchOnPHI - We have an otherwise unthreadable conditional branch on
1737	/// a PHI node (or freeze PHI) in the current block. See if there are any
1738	/// simplifications we can do based on inputs to the phi node.
1739	bool JumpThreadingPass::processBranchOnPHI(PHINode *PN) {
1740	BasicBlock *BB = PN->getParent();
1741
1742	// TODO: We could make use of this to do it once for blocks with common PHI
1743	// values.
1744	SmallVector<BasicBlock*, `1`> PredBBs;
1745	PredBBs.resize(N: `1`);
1746
1747	// If any of the predecessor blocks end in an unconditional branch, we can
1748	// duplicate* the conditional branch into that block in order to further*
1749	// encourage jump threading and to eliminate cases where we have branch on a
1750	// phi of an icmp (branch on icmp is much better).
1751	// This is still beneficial when a frozen phi is used as the branch condition
1752	// because it allows CodeGenPrepare to further canonicalize br(freeze(icmp))
1753	// to br(icmp(freeze ...)).
1754	for (unsigned i = `0`, e = PN->getNumIncomingValues(); i != e; ++i) {
1755	BasicBlock *PredBB = PN->getIncomingBlock(i);
1756	if (isa<UncondBrInst>(Val: PredBB->getTerminator())) {
1757	PredBBs [`0`] = PredBB;
1758	// Try to duplicate BB into PredBB.
1759	if (duplicateCondBranchOnPHIIntoPred(BB, PredBBs))
1760	return true;
1761	}
1762	}
1763
1764	return false;
1765	}
1766
1767	/// processBranchOnXOR - We have an otherwise unthreadable conditional branch on
1768	/// a xor instruction in the current block. See if there are any
1769	/// simplifications we can do based on inputs to the xor.
1770	bool JumpThreadingPass::processBranchOnXOR(BinaryOperator *BO) {
1771	BasicBlock *BB = BO->getParent();
1772
1773	// If either the LHS or RHS of the xor is a constant, don't do this
1774	// optimization.
1775	if (isa<ConstantInt>(Val: BO->getOperand(i_nocapture: `0`)) \|\|
1776	isa<ConstantInt>(Val: BO->getOperand(i_nocapture: `1`)))
1777	return false;
1778
1779	// If the first instruction in BB isn't a phi, we won't be able to infer
1780	// anything special about any particular predecessor.
1781	if (!isa<PHINode>(Val: BB->front()))
1782	return false;
1783
1784	// If this BB is a landing pad, we won't be able to split the edge into it.
1785	if (BB->isEHPad())
1786	return false;
1787
1788	// If we have a xor as the branch input to this block, and we know that the
1789	// LHS or RHS of the xor in any predecessor is true/false, then we can clone
1790	// the condition into the predecessor and fix that value to true, saving some
1791	// logical ops on that path and encouraging other paths to simplify.
1792	//
1793	// This copies something like this:
1794	//
1795	// BB:
1796	// %X = phi i1 [1], [%X']
1797	// %Y = icmp eq i32 %A, %B
1798	// %Z = xor i1 %X, %Y
1799	// br i1 %Z, ...
1800	//
1801	// Into:
1802	// BB':
1803	// %Y = icmp ne i32 %A, %B
1804	// br i1 %Y, ...
1805
1806	PredValueInfoTy XorOpValues;
1807	bool isLHS = true;
1808	if (!computeValueKnownInPredecessors(V: BO->getOperand(i_nocapture: `0`), BB, Result&: XorOpValues,
1809	Preference: WantInteger, CxtI: BO)) {
1810	assert(XorOpValues.empty());
1811	if (!computeValueKnownInPredecessors(V: BO->getOperand(i_nocapture: `1`), BB, Result&: XorOpValues,
1812	Preference: WantInteger, CxtI: BO))
1813	return false;
1814	isLHS = false;
1815	}
1816
1817	assert(!XorOpValues.empty() &&
1818	"computeValueKnownInPredecessors returned true with no values");
1819
1820	// Scan the information to see which is most popular: true or false. The
1821	// predecessors can be of the set true, false, or undef.
1822	unsigned NumTrue = `0`, NumFalse = `0`;
1823	for (const auto &XorOpValue : XorOpValues) {
1824	if (isa<UndefValue>(Val: XorOpValue.first))
1825	// Ignore undefs for the count.
1826	continue;
1827	if (cast<ConstantInt>(Val: XorOpValue.first)->isZero())
1828	++NumFalse;
1829	else
1830	++NumTrue;
1831	}
1832
1833	// Determine which value to split on, true, false, or undef if neither.
1834	ConstantInt SplitVal = nullptr*;
1835	if (NumTrue > NumFalse)
1836	SplitVal = ConstantInt::getTrue(Context&: BB->getContext());
1837	else if (NumTrue != `0` \|\| NumFalse != `0`)
1838	SplitVal = ConstantInt::getFalse(Context&: BB->getContext());
1839
1840	// Collect all of the blocks that this can be folded into so that we can
1841	// factor this once and clone it once.
1842	SmallVector<BasicBlock*, `8`> BlocksToFoldInto;
1843	for (const auto &XorOpValue : XorOpValues) {
1844	if (XorOpValue.first != SplitVal && !isa<UndefValue>(Val: XorOpValue.first))
1845	continue;
1846
1847	BlocksToFoldInto.push_back(Elt: XorOpValue.second);
1848	}
1849
1850	// If we inferred a value for all of the predecessors, then duplication won't
1851	// help us. However, we can just replace the LHS or RHS with the constant.
1852	if (BlocksToFoldInto.size() ==
1853	cast<PHINode>(Val&: BB->front()).getNumIncomingValues()) {
1854	if (!SplitVal) {
1855	// If all preds provide undef, just nuke the xor, because it is undef too.
1856	BO->replaceAllUsesWith(V: UndefValue::get(T: BO->getType()));
1857	BO->eraseFromParent();
1858	} else if (SplitVal->isZero() && BO != BO->getOperand(i_nocapture: isLHS)) {
1859	// If all preds provide 0, replace the xor with the other input.
1860	BO->replaceAllUsesWith(V: BO->getOperand(i_nocapture: isLHS));
1861	BO->eraseFromParent();
1862	} else {
1863	// If all preds provide 1, set the computed value to 1.
1864	BO->setOperand(i_nocapture: !isLHS, Val_nocapture: SplitVal);
1865	}
1866
1867	return true;
1868	}
1869
1870	// If any of predecessors end with an indirect goto, we can't change its
1871	// destination.
1872	if (any_of(Range&: BlocksToFoldInto, P: [](BasicBlock *Pred) {
1873	return isa<IndirectBrInst>(Val: Pred->getTerminator());
1874	}))
1875	return false;
1876
1877	// Try to duplicate BB into PredBB.
1878	return duplicateCondBranchOnPHIIntoPred(BB, PredBBs: BlocksToFoldInto);
1879	}
1880
1881	/// addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
1882	/// predecessor to the PHIBB block. If it has PHI nodes, add entries for
1883	/// NewPred using the entries from OldPred (suitably mapped).
1884	static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
1885	BasicBlock *OldPred,
1886	BasicBlock *NewPred,
1887	ValueToValueMapTy &ValueMap) {
1888	for (PHINode &PN : PHIBB->phis()) {
1889	// Ok, we have a PHI node. Figure out what the incoming value was for the
1890	// DestBlock.
1891	Value *IV = PN.getIncomingValueForBlock(BB: OldPred);
1892
1893	// Remap the value if necessary.
1894	if (Instruction *Inst = dyn_cast<Instruction>(Val: IV)) {
1895	ValueToValueMapTy::iterator I = ValueMap.find(Val: Inst);
1896	if (I != ValueMap.end())
1897	IV = I ->second;
1898	}
1899
1900	PN.addIncoming(V: IV, BB: NewPred);
1901	}
1902	}
1903
1904	/// Merge basic block BB into its sole predecessor if possible.
1905	bool JumpThreadingPass::maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB) {
1906	BasicBlock *SinglePred = BB->getSinglePredecessor();
1907	if (!SinglePred)
1908	return false;
1909
1910	const Instruction *TI = SinglePred->getTerminator();
1911	if (TI->isSpecialTerminator() \|\| TI->getNumSuccessors() != `1` \|\|
1912	SinglePred == BB \|\| hasAddressTakenAndUsed(BB))
1913	return false;
1914
1915	// MergeBasicBlockIntoOnlyPred may delete SinglePred, we need to avoid
1916	// deleting a BB pointer from Unreachable.
1917	if (Unreachable.count(Ptr: SinglePred))
1918	return false;
1919
1920	// Don't merge if both the basic block and the predecessor contain loop or
1921	// entry convergent intrinsics, since there may only be one convergence token
1922	// per block.
1923	if (HasLoopOrEntryConvergenceToken(BB) &&
1924	HasLoopOrEntryConvergenceToken(BB: SinglePred))
1925	return false;
1926
1927	// If SinglePred was a loop header, BB becomes one.
1928	if (LoopHeaders.erase(Ptr: SinglePred))
1929	LoopHeaders.insert(Ptr: BB);
1930
1931	LVI->eraseBlock(BB: SinglePred);
1932	MergeBasicBlockIntoOnlyPred(BB, DTU: DTU.get());
1933
1934	// Now that BB is merged into SinglePred (i.e. SinglePred code followed by
1935	// BB code within one basic block `BB`), we need to invalidate the LVI
1936	// information associated with BB, because the LVI information need not be
1937	// true for all of BB after the merge. For example,
1938	// Before the merge, LVI info and code is as follows:
1939	// SinglePred: <LVI info1 for %p val>
1940	// %y = use of %p
1941	// call @exit() // need not transfer execution to successor.
1942	// assume(%p) // from this point on %p is true
1943	// br label %BB
1944	// BB: <LVI info2 for %p val, i.e. %p is true>
1945	// %x = use of %p
1946	// br label exit
1947	//
1948	// Note that this LVI info for blocks BB and SinglPred is correct for %p
1949	// (info2 and info1 respectively). After the merge and the deletion of the
1950	// LVI info1 for SinglePred. We have the following code:
1951	// BB: <LVI info2 for %p val>
1952	// %y = use of %p
1953	// call @exit()
1954	// assume(%p)
1955	// %x = use of %p <-- LVI info2 is correct from here onwards.
1956	// br label exit
1957	// LVI info2 for BB is incorrect at the beginning of BB.
1958
1959	// Invalidate LVI information for BB if the LVI is not provably true for
1960	// all of BB.
1961	if (!isGuaranteedToTransferExecutionToSuccessor(BB))
1962	LVI->eraseBlock(BB);
1963	return true;
1964	}
1965
1966	/// Update the SSA form. NewBB contains instructions that are copied from BB.
1967	/// ValueMapping maps old values in BB to new ones in NewBB.
1968	void JumpThreadingPass::updateSSA(BasicBlock BB, BasicBlock NewBB,
1969	ValueToValueMapTy &ValueMapping) {
1970	// If there were values defined in BB that are used outside the block, then we
1971	// now have to update all uses of the value to use either the original value,
1972	// the cloned value, or some PHI derived value. This can require arbitrary
1973	// PHI insertion, of which we are prepared to do, clean these up now.
1974	SSAUpdater SSAUpdate;
1975	SmallVector<Use *, `16`> UsesToRename;
1976	SmallVector<DbgVariableRecord *, `4`> DbgVariableRecords;
1977
1978	for (Instruction &I : *BB) {
1979	// Scan all uses of this instruction to see if it is used outside of its
1980	// block, and if so, record them in UsesToRename.
1981	for (Use &U : I.uses()) {
1982	Instruction *User = cast<Instruction>(Val: U.getUser());
1983	if (PHINode *UserPN = dyn_cast<PHINode>(Val: User)) {
1984	if (UserPN->getIncomingBlock(U) == BB)
1985	continue;
1986	} else if (User->getParent() == BB)
1987	continue;
1988
1989	UsesToRename.push_back(Elt: &U);
1990	}
1991
1992	// Find debug values outside of the block
1993	findDbgValues(V: &I, DbgVariableRecords);
1994	llvm::erase_if(C&: DbgVariableRecords, P: [&](const DbgVariableRecord *DbgVarRec) {
1995	return DbgVarRec->getParent() == BB;
1996	});
1997
1998	// If there are no uses outside the block, we're done with this instruction.
1999	if (UsesToRename.empty() && DbgVariableRecords.empty())
2000	continue;
2001	LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
2002
2003	// We found a use of I outside of BB. Rename all uses of I that are outside
2004	// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
2005	// with the two values we know.
2006	SSAUpdate.Initialize(Ty: I.getType(), Name: I.getName());
2007	SSAUpdate.AddAvailableValue(BB, V: &I);
2008	SSAUpdate.AddAvailableValue(BB: NewBB, V: ValueMapping [&I]);
2009
2010	while (!UsesToRename.empty())
2011	SSAUpdate.RewriteUse(U&: *UsesToRename.pop_back_val());
2012	if (!DbgVariableRecords.empty()) {
2013	SSAUpdate.UpdateDebugValues(I: &I, DbgValues&: DbgVariableRecords);
2014	DbgVariableRecords.clear();
2015	}
2016
2017	LLVM_DEBUG(dbgs() << "\n");
2018	}
2019	}
2020
2021	static void remapSourceAtoms(ValueToValueMapTy &VM, BasicBlock::iterator Begin,
2022	BasicBlock::iterator End) {
2023	if (VM.AtomMap.empty())
2024	return;
2025	for (auto It = Begin; It != End; ++It)
2026	RemapSourceAtom(I: &*It, VM);
2027	}
2028
2029	/// Clone instructions in range [BI, BE) to NewBB. For PHI nodes, we only clone
2030	/// arguments that come from PredBB. Return the map from the variables in the
2031	/// source basic block to the variables in the newly created basic block.
2032
2033	void JumpThreadingPass::cloneInstructions(ValueToValueMapTy &ValueMapping,
2034	BasicBlock::iterator BI,
2035	BasicBlock::iterator BE,
2036	BasicBlock *NewBB,
2037	BasicBlock *PredBB) {
2038	// We are going to have to map operands from the source basic block to the new
2039	// copy of the block 'NewBB'. If there are PHI nodes in the source basic
2040	// block, evaluate them to account for entry from PredBB.
2041
2042	// Retargets dbg.value to any renamed variables.
2043	auto RetargetDbgVariableRecordIfPossible = [&](DbgVariableRecord *DVR) {
2044	SmallSet<std::pair<Value , Value >, `16`> OperandsToRemap;
2045	for (auto *Op : DVR->location_ops()) {
2046	Instruction *OpInst = dyn_cast<Instruction>(Val: Op);
2047	if (!OpInst)
2048	continue;
2049
2050	auto I = ValueMapping.find(Val: OpInst);
2051	if (I != ValueMapping.end())
2052	OperandsToRemap.insert(V: {OpInst, I ->second});
2053	}
2054
2055	for (auto &[OldOp, MappedOp] : OperandsToRemap)
2056	DVR->replaceVariableLocationOp(OldValue: OldOp, NewValue: MappedOp);
2057	};
2058
2059	BasicBlock *RangeBB = BI ->getParent();
2060
2061	// Clone the phi nodes of the source basic block into NewBB. The resulting
2062	// phi nodes are trivial since NewBB only has one predecessor, but SSAUpdater
2063	// might need to rewrite the operand of the cloned phi.
2064	for (; PHINode *PN = dyn_cast<PHINode>(Val&: BI); ++BI) {
2065	PHINode *NewPN = PHINode::Create(Ty: PN->getType(), NumReservedValues: `1`, NameStr: PN->getName(), InsertBefore: NewBB);
2066	NewPN->addIncoming(V: PN->getIncomingValueForBlock(BB: PredBB), BB: PredBB);
2067	ValueMapping [PN] = NewPN;
2068	if (const DebugLoc &DL = PN->getDebugLoc())
2069	mapAtomInstance(DL, VMap&: ValueMapping);
2070	}
2071
2072	// Clone noalias scope declarations in the threaded block. When threading a
2073	// loop exit, we would otherwise end up with two idential scope declarations
2074	// visible at the same time.
2075	SmallVector<MDNode *> NoAliasScopes;
2076	DenseMap<MDNode , MDNode > ClonedScopes;
2077	LLVMContext &Context = PredBB->getContext();
2078	identifyNoAliasScopesToClone(Start: BI, End: BE, NoAliasDeclScopes&: NoAliasScopes);
2079	cloneNoAliasScopes(NoAliasDeclScopes: NoAliasScopes, ClonedScopes, Ext: "thread", Context);
2080
2081	auto CloneAndRemapDbgInfo = [&](Instruction NewInst, Instruction From) {
2082	auto DVRRange = NewInst->cloneDebugInfoFrom(From);
2083	for (DbgVariableRecord &DVR : filterDbgVars(R: DVRRange))
2084	RetargetDbgVariableRecordIfPossible (&DVR);
2085	};
2086
2087	// Clone the non-phi instructions of the source basic block into NewBB,
2088	// keeping track of the mapping and using it to remap operands in the cloned
2089	// instructions.
2090	for (; BI != BE; ++BI) {
2091	Instruction *New = BI ->clone();
2092	New->setName(BI ->getName());
2093	New->insertInto(ParentBB: NewBB, It: NewBB->end());
2094	ValueMapping [&*BI] = New;
2095	adaptNoAliasScopes(I: New, ClonedScopes, Context);
2096
2097	CloneAndRemapDbgInfo (New, &*BI);
2098	if (const DebugLoc &DL = New->getDebugLoc())
2099	mapAtomInstance(DL, VMap&: ValueMapping);
2100
2101	// Remap operands to patch up intra-block references.
2102	for (unsigned i = `0`, e = New->getNumOperands(); i != e; ++i)
2103	if (Instruction *Inst = dyn_cast<Instruction>(Val: New->getOperand(i))) {
2104	ValueToValueMapTy::iterator I = ValueMapping.find(Val: Inst);
2105	if (I != ValueMapping.end())
2106	New->setOperand(i, Val: I ->second);
2107	}
2108	}
2109
2110	// There may be DbgVariableRecords on the terminator, clone directly from
2111	// marker to marker as there isn't an instruction there.
2112	if (BE != RangeBB->end() && BE ->hasDbgRecords()) {
2113	// Dump them at the end.
2114	DbgMarker *Marker = RangeBB->getMarker(It: BE);
2115	DbgMarker *EndMarker = NewBB->createMarker(It: NewBB->end());
2116	auto DVRRange = EndMarker->cloneDebugInfoFrom(From: Marker, FromHere: std::nullopt);
2117	for (DbgVariableRecord &DVR : filterDbgVars(R: DVRRange))
2118	RetargetDbgVariableRecordIfPossible (&DVR);
2119	}
2120	}
2121
2122	/// Attempt to thread through two successive basic blocks.
2123	bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB,
2124	Value *Cond) {
2125	// Consider:
2126	//
2127	// PredBB:
2128	// %var = phi i32 [ null, %bb1 ], [ @a, %bb2 ]*
2129	// %tobool = icmp eq i32 %cond, 0
2130	// br i1 %tobool, label %BB, label ...
2131	//
2132	// BB:
2133	// %cmp = icmp eq i32 %var, null*
2134	// br i1 %cmp, label ..., label ...
2135	//
2136	// We don't know the value of %var at BB even if we know which incoming edge
2137	// we take to BB. However, once we duplicate PredBB for each of its incoming
2138	// edges (say, PredBB1 and PredBB2), we know the value of %var in each copy of
2139	// PredBB. Then we can thread edges PredBB1->BB and PredBB2->BB through BB.
2140
2141	// Require that BB end with a Branch for simplicity.
2142	CondBrInst *CondBr = dyn_cast<CondBrInst>(Val: BB->getTerminator());
2143	if (!CondBr)
2144	return false;
2145
2146	// BB must have exactly one predecessor.
2147	BasicBlock *PredBB = BB->getSinglePredecessor();
2148	if (!PredBB)
2149	return false;
2150
2151	// Require that PredBB end with a conditional Branch. If PredBB ends with an
2152	// unconditional branch, we should be merging PredBB and BB instead. For
2153	// simplicity, we don't deal with a switch.
2154	CondBrInst *PredBBBranch = dyn_cast<CondBrInst>(Val: PredBB->getTerminator());
2155	if (!PredBBBranch)
2156	return false;
2157
2158	// If PredBB has exactly one incoming edge, we don't gain anything by copying
2159	// PredBB.
2160	if (PredBB->getSinglePredecessor())
2161	return false;
2162
2163	// Don't thread through PredBB if it contains a successor edge to itself, in
2164	// which case we would infinite loop. Suppose we are threading an edge from
2165	// PredPredBB through PredBB and BB to SuccBB with PredBB containing a
2166	// successor edge to itself. If we allowed jump threading in this case, we
2167	// could duplicate PredBB and BB as, say, PredBB.thread and BB.thread. Since
2168	// PredBB.thread has a successor edge to PredBB, we would immediately come up
2169	// with another jump threading opportunity from PredBB.thread through PredBB
2170	// and BB to SuccBB. This jump threading would repeatedly occur. That is, we
2171	// would keep peeling one iteration from PredBB.
2172	if (llvm::is_contained(Range: successors(BB: PredBB), Element: PredBB))
2173	return false;
2174
2175	// Don't thread across a loop header.
2176	if (LoopHeaders.count(Ptr: PredBB))
2177	return false;
2178
2179	// Avoid complication with duplicating EH pads.
2180	if (PredBB->isEHPad())
2181	return false;
2182
2183	// Find a predecessor that we can thread. For simplicity, we only consider a
2184	// successor edge out of BB to which we thread exactly one incoming edge into
2185	// PredBB.
2186	unsigned ZeroCount = `0`;
2187	unsigned OneCount = `0`;
2188	BasicBlock ZeroPred = nullptr*;
2189	BasicBlock OnePred = nullptr*;
2190	const DataLayout &DL = BB->getDataLayout();
2191	for (BasicBlock *P : predecessors(BB: PredBB)) {
2192	// If PredPred ends with IndirectBrInst, we can't handle it.
2193	if (isa<IndirectBrInst>(Val: P->getTerminator()))
2194	continue;
2195	if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
2196	Val: evaluateOnPredecessorEdge(BB, PredPredBB: P, V: Cond, DL))) {
2197	if (CI->isZero()) {
2198	ZeroCount++;
2199	ZeroPred = P;
2200	} else if (CI->isOne()) {
2201	OneCount++;
2202	OnePred = P;
2203	}
2204	}
2205	}
2206
2207	// Disregard complicated cases where we have to thread multiple edges.
2208	BasicBlock *PredPredBB;
2209	if (ZeroCount == `1`) {
2210	PredPredBB = ZeroPred;
2211	} else if (OneCount == `1`) {
2212	PredPredBB = OnePred;
2213	} else {
2214	return false;
2215	}
2216
2217	BasicBlock *SuccBB = CondBr->getSuccessor(i: PredPredBB == ZeroPred);
2218
2219	// If threading to the same block as we come from, we would infinite loop.
2220	if (SuccBB == BB) {
2221	LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2222	<< "' - would thread to self!\n");
2223	return false;
2224	}
2225
2226	// If threading this would thread across a loop header, don't thread the edge.
2227	// See the comments above findLoopHeaders for justifications and caveats.
2228	if (LoopHeaders.count(Ptr: BB) \|\| LoopHeaders.count(Ptr: SuccBB)) {
2229	LLVM_DEBUG({
2230	bool BBIsHeader = LoopHeaders.count(BB);
2231	bool SuccIsHeader = LoopHeaders.count(SuccBB);
2232	dbgs() << " Not threading across "
2233	<< (BBIsHeader ? "loop header BB '" : "block BB '")
2234	<< BB->getName() << "' to dest "
2235	<< (SuccIsHeader ? "loop header BB '" : "block BB '")
2236	<< SuccBB->getName()
2237	<< "' - it might create an irreducible loop!\n";
2238	});
2239	return false;
2240	}
2241
2242	// Compute the cost of duplicating BB and PredBB.
2243	unsigned BBCost = getJumpThreadDuplicationCost(
2244	TTI, BB, StopAt: BB->getTerminator(), Threshold: BBDupThreshold);
2245	unsigned PredBBCost = getJumpThreadDuplicationCost(
2246	TTI, BB: PredBB, StopAt: PredBB->getTerminator(), Threshold: BBDupThreshold);
2247
2248	// Give up if costs are too high. We need to check BBCost and PredBBCost
2249	// individually before checking their sum because getJumpThreadDuplicationCost
2250	// return (unsigned)~0 for those basic blocks that cannot be duplicated.
2251	if (BBCost > BBDupThreshold \|\| PredBBCost > BBDupThreshold \|\|
2252	BBCost + PredBBCost > BBDupThreshold) {
2253	LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2254	<< "' - Cost is too high: " << PredBBCost
2255	<< " for PredBB, " << BBCost << "for BB\n");
2256	return false;
2257	}
2258
2259	// Now we are ready to duplicate PredBB.
2260	threadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB);
2261	return true;
2262	}
2263
2264	void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
2265	BasicBlock *PredBB,
2266	BasicBlock *BB,
2267	BasicBlock *SuccBB) {
2268	LLVM_DEBUG(dbgs() << " Threading through '" << PredBB->getName() << "' and '"
2269	<< BB->getName() << "'\n");
2270
2271	// Build BPI/BFI before any changes are made to IR.
2272	bool HasProfile = doesBlockHaveProfileData(BB);
2273	auto *BFI = getOrCreateBFI(Force: HasProfile);
2274	auto BPI = getOrCreateBPI(Force: BFI != nullptr*);
2275
2276	CondBrInst *CondBr = cast<CondBrInst>(Val: BB->getTerminator());
2277	CondBrInst *PredBBBranch = cast<CondBrInst>(Val: PredBB->getTerminator());
2278
2279	BasicBlock *NewBB =
2280	BasicBlock::Create(Context&: PredBB->getContext(), Name: PredBB->getName() + ".thread",
2281	Parent: PredBB->getParent(), InsertBefore: PredBB);
2282	NewBB->moveAfter(MovePos: PredBB);
2283
2284	// Set the block frequency of NewBB.
2285	if (BFI) {
2286	assert(BPI && "It's expected BPI to exist along with BFI");
2287	auto NewBBFreq = BFI->getBlockFreq(BB: PredPredBB) *
2288	BPI->getEdgeProbability(Src: PredPredBB, Dst: PredBB);
2289	BFI->setBlockFreq(BB: NewBB, Freq: NewBBFreq);
2290	}
2291
2292	// We are going to have to map operands from the original BB block to the new
2293	// copy of the block 'NewBB'. If there are PHI nodes in PredBB, evaluate them
2294	// to account for entry from PredPredBB.
2295	ValueToValueMapTy ValueMapping;
2296	cloneInstructions(ValueMapping, BI: PredBB->begin(), BE: PredBB->end(), NewBB,
2297	PredBB: PredPredBB);
2298
2299	// Copy the edge probabilities from PredBB to NewBB.
2300	if (BPI)
2301	BPI->copyEdgeProbabilities(Src: PredBB, Dst: NewBB);
2302
2303	// Update the terminator of PredPredBB to jump to NewBB instead of PredBB.
2304	// This eliminates predecessors from PredPredBB, which requires us to simplify
2305	// any PHI nodes in PredBB.
2306	Instruction *PredPredTerm = PredPredBB->getTerminator();
2307	for (unsigned i = `0`, e = PredPredTerm->getNumSuccessors(); i != e; ++i)
2308	if (PredPredTerm->getSuccessor(Idx: i) == PredBB) {
2309	PredBB->removePredecessor(Pred: PredPredBB, KeepOneInputPHIs: true);
2310	PredPredTerm->setSuccessor(Idx: i, BB: NewBB);
2311	}
2312
2313	addPHINodeEntriesForMappedBlock(PHIBB: PredBBBranch->getSuccessor(i: `0`), OldPred: PredBB, NewPred: NewBB,
2314	ValueMap&: ValueMapping);
2315	addPHINodeEntriesForMappedBlock(PHIBB: PredBBBranch->getSuccessor(i: `1`), OldPred: PredBB, NewPred: NewBB,
2316	ValueMap&: ValueMapping);
2317
2318	DTU ->applyUpdatesPermissive(
2319	Updates: {{DominatorTree::Insert, NewBB, CondBr->getSuccessor(i: `0`)},
2320	{DominatorTree::Insert, NewBB, CondBr->getSuccessor(i: `1`)},
2321	{DominatorTree::Insert, PredPredBB, NewBB},
2322	{DominatorTree::Delete, PredPredBB, PredBB}});
2323
2324	// Remap source location atoms beacuse we're duplicating control flow.
2325	remapSourceAtoms(VM&: ValueMapping, Begin: NewBB->begin(), End: NewBB->end());
2326
2327	updateSSA(BB: PredBB, NewBB, ValueMapping);
2328
2329	// Clean up things like PHI nodes with single operands, dead instructions,
2330	// etc.
2331	SimplifyInstructionsInBlock(BB: NewBB, TLI);
2332	SimplifyInstructionsInBlock(BB: PredBB, TLI);
2333
2334	SmallVector<BasicBlock *, `1`> PredsToFactor;
2335	PredsToFactor.push_back(Elt: NewBB);
2336	threadEdge(BB, PredBBs: PredsToFactor, SuccBB);
2337	}
2338
2339	/// tryThreadEdge - Thread an edge if it's safe and profitable to do so.
2340	bool JumpThreadingPass::tryThreadEdge(
2341	BasicBlock BB, const* SmallVectorImpl<BasicBlock *> &PredBBs,
2342	BasicBlock *SuccBB) {
2343	// If threading to the same block as we come from, we would infinite loop.
2344	if (SuccBB == BB) {
2345	LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2346	<< "' - would thread to self!\n");
2347	return false;
2348	}
2349
2350	// If threading this would thread across a loop header, don't thread the edge.
2351	// See the comments above findLoopHeaders for justifications and caveats.
2352	if (LoopHeaders.count(Ptr: BB) \|\| LoopHeaders.count(Ptr: SuccBB)) {
2353	LLVM_DEBUG({
2354	bool BBIsHeader = LoopHeaders.count(BB);
2355	bool SuccIsHeader = LoopHeaders.count(SuccBB);
2356	dbgs() << " Not threading across "
2357	<< (BBIsHeader ? "loop header BB '" : "block BB '") << BB->getName()
2358	<< "' to dest " << (SuccIsHeader ? "loop header BB '" : "block BB '")
2359	<< SuccBB->getName() << "' - it might create an irreducible loop!\n";
2360	});
2361	return false;
2362	}
2363
2364	unsigned JumpThreadCost = getJumpThreadDuplicationCost(
2365	TTI, BB, StopAt: BB->getTerminator(), Threshold: BBDupThreshold);
2366	if (JumpThreadCost > BBDupThreshold) {
2367	LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2368	<< "' - Cost is too high: " << JumpThreadCost << "\n");
2369	return false;
2370	}
2371
2372	threadEdge(BB, PredBBs, SuccBB);
2373	return true;
2374	}
2375
2376	/// threadEdge - We have decided that it is safe and profitable to factor the
2377	/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
2378	/// across BB. Transform the IR to reflect this change.
2379	void JumpThreadingPass::threadEdge(BasicBlock *BB,
2380	const SmallVectorImpl<BasicBlock *> &PredBBs,
2381	BasicBlock *SuccBB) {
2382	assert(SuccBB != BB && "Don't create an infinite loop");
2383
2384	assert(!LoopHeaders.count(BB) && !LoopHeaders.count(SuccBB) &&
2385	"Don't thread across loop headers");
2386
2387	// Build BPI/BFI before any changes are made to IR.
2388	bool HasProfile = doesBlockHaveProfileData(BB);
2389	auto *BFI = getOrCreateBFI(Force: HasProfile);
2390	auto BPI = getOrCreateBPI(Force: BFI != nullptr*);
2391
2392	// And finally, do it! Start by factoring the predecessors if needed.
2393	BasicBlock *PredBB;
2394	if (PredBBs.size() == `1`)
2395	PredBB = PredBBs [`0`];
2396	else {
2397	LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2398	<< " common predecessors.\n");
2399	PredBB = splitBlockPreds(BB, Preds: PredBBs, Suffix: ".thr_comm");
2400	}
2401
2402	// And finally, do it!
2403	LLVM_DEBUG(dbgs() << " Threading edge from '" << PredBB->getName()
2404	<< "' to '" << SuccBB->getName()
2405	<< ", across block:\n " << *BB << "\n");
2406
2407	LVI->threadEdge(PredBB, OldSucc: BB, NewSucc: SuccBB);
2408
2409	BasicBlock *NewBB = BasicBlock::Create(Context&: BB->getContext(),
2410	Name: BB->getName()+".thread",
2411	Parent: BB->getParent(), InsertBefore: BB);
2412	NewBB->moveAfter(MovePos: PredBB);
2413
2414	// Set the block frequency of NewBB.
2415	if (BFI) {
2416	assert(BPI && "It's expected BPI to exist along with BFI");
2417	auto NewBBFreq =
2418	BFI->getBlockFreq(BB: PredBB) * BPI->getEdgeProbability(Src: PredBB, Dst: BB);
2419	BFI->setBlockFreq(BB: NewBB, Freq: NewBBFreq);
2420	}
2421
2422	// Copy all the instructions from BB to NewBB except the terminator.
2423	ValueToValueMapTy ValueMapping;
2424	cloneInstructions(ValueMapping, BI: BB->begin(), BE: std::prev(x: BB->end()), NewBB,
2425	PredBB);
2426
2427	// We didn't copy the terminator from BB over to NewBB, because there is now
2428	// an unconditional jump to SuccBB. Insert the unconditional jump.
2429	UncondBrInst *NewBI = UncondBrInst::Create(IfTrue: SuccBB, InsertBefore: NewBB);
2430	NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
2431
2432	// Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
2433	// PHI nodes for NewBB now.
2434	addPHINodeEntriesForMappedBlock(PHIBB: SuccBB, OldPred: BB, NewPred: NewBB, ValueMap&: ValueMapping);
2435
2436	// Update the terminator of PredBB to jump to NewBB instead of BB. This
2437	// eliminates predecessors from BB, which requires us to simplify any PHI
2438	// nodes in BB.
2439	Instruction *PredTerm = PredBB->getTerminator();
2440	for (unsigned i = `0`, e = PredTerm->getNumSuccessors(); i != e; ++i)
2441	if (PredTerm->getSuccessor(Idx: i) == BB) {
2442	BB->removePredecessor(Pred: PredBB, KeepOneInputPHIs: true);
2443	PredTerm->setSuccessor(Idx: i, BB: NewBB);
2444	}
2445
2446	// Enqueue required DT updates.
2447	DTU ->applyUpdatesPermissive(Updates: {{DominatorTree::Insert, NewBB, SuccBB},
2448	{DominatorTree::Insert, PredBB, NewBB},
2449	{DominatorTree::Delete, PredBB, BB}});
2450
2451	remapSourceAtoms(VM&: ValueMapping, Begin: NewBB->begin(), End: NewBB->end());
2452	updateSSA(BB, NewBB, ValueMapping);
2453
2454	// At this point, the IR is fully up to date and consistent. Do a quick scan
2455	// over the new instructions and zap any that are constants or dead. This
2456	// frequently happens because of phi translation.
2457	SimplifyInstructionsInBlock(BB: NewBB, TLI);
2458
2459	// Update the edge weight from BB to SuccBB, which should be less than before.
2460	updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB, BFI, BPI, HasProfile);
2461
2462	// Threaded an edge!
2463	++NumThreads;
2464	}
2465
2466	/// Create a new basic block that will be the predecessor of BB and successor of
2467	/// all blocks in Preds. When profile data is available, update the frequency of
2468	/// this new block.
2469	BasicBlock JumpThreadingPass::splitBlockPreds(BasicBlock BB,
2470	ArrayRef<BasicBlock *> Preds,
2471	const char *Suffix) {
2472	SmallVector<BasicBlock *, `2`> NewBBs;
2473
2474	// Collect the frequencies of all predecessors of BB, which will be used to
2475	// update the edge weight of the result of splitting predecessors.
2476	DenseMap<BasicBlock *, BlockFrequency> FreqMap;
2477	auto *BFI = getBFI();
2478	if (BFI) {
2479	auto BPI = getOrCreateBPI(Force: true*);
2480	for (auto *Pred : Preds)
2481	FreqMap.insert(KV: std::make_pair(
2482	x&: Pred, y: BFI->getBlockFreq(BB: Pred) * BPI->getEdgeProbability(Src: Pred, Dst: BB)));
2483	}
2484
2485	// In the case when BB is a LandingPad block we create 2 new predecessors
2486	// instead of just one.
2487	if (BB->isLandingPad()) {
2488	std::string NewName = std::string (Suffix) + ".split-lp";
2489	SplitLandingPadPredecessors(OrigBB: BB, Preds, Suffix, Suffix2: NewName.c_str(), NewBBs);
2490	} else {
2491	NewBBs.push_back(Elt: SplitBlockPredecessors(BB, Preds, Suffix));
2492	}
2493
2494	std::vector<DominatorTree::UpdateType> Updates;
2495	Updates.reserve(n: (`2` * Preds.size()) + NewBBs.size());
2496	for (auto *NewBB : NewBBs) {
2497	BlockFrequency NewBBFreq(`0`);
2498	Updates.push_back(x: {DominatorTree::Insert, NewBB, BB});
2499	for (auto *Pred : predecessors(BB: NewBB)) {
2500	Updates.push_back(x: {DominatorTree::Delete, Pred, BB});
2501	Updates.push_back(x: {DominatorTree::Insert, Pred, NewBB});
2502	if (BFI) // Update frequencies between Pred -> NewBB.
2503	NewBBFreq += FreqMap.lookup(Val: Pred);
2504	}
2505	if (BFI) // Apply the summed frequency to NewBB.
2506	BFI->setBlockFreq(BB: NewBB, Freq: NewBBFreq);
2507	}
2508
2509	DTU ->applyUpdatesPermissive(Updates);
2510	return NewBBs [`0`];
2511	}
2512
2513	bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
2514	const Instruction *TI = BB->getTerminator();
2515	if (!TI \|\| TI->getNumSuccessors() < `2`)
2516	return false;
2517
2518	return hasValidBranchWeightMD(I: *TI);
2519	}
2520
2521	/// Update the block frequency of BB and branch weight and the metadata on the
2522	/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
2523	/// Freq(PredBB->BB) / Freq(BB->SuccBB).
2524	void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
2525	BasicBlock *BB,
2526	BasicBlock *NewBB,
2527	BasicBlock *SuccBB,
2528	BlockFrequencyInfo *BFI,
2529	BranchProbabilityInfo *BPI,
2530	bool HasProfile) {
2531	assert(((BFI && BPI) \|\| (!BFI && !BFI)) &&
2532	"Both BFI & BPI should either be set or unset");
2533
2534	if (!BFI) {
2535	assert(!HasProfile &&
2536	"It's expected to have BFI/BPI when profile info exists");
2537	return;
2538	}
2539
2540	// As the edge from PredBB to BB is deleted, we have to update the block
2541	// frequency of BB.
2542	auto BBOrigFreq = BFI->getBlockFreq(BB);
2543	auto NewBBFreq = BFI->getBlockFreq(BB: NewBB);
2544	auto BBNewFreq = BBOrigFreq - NewBBFreq;
2545	BFI->setBlockFreq(BB, Freq: BBNewFreq);
2546
2547	// Collect updated outgoing edges' frequencies from BB and use them to update
2548	// edge probabilities.
2549	SmallVector<uint64_t, `4`> BBSuccFreq;
2550	for (auto It : enumerate(First: successors(BB))) {
2551	auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(Src: BB, IndexInSuccessors: It.index());
2552	auto SuccFreq =
2553	(It.value() == SuccBB) ? BB2SuccBBFreq - NewBBFreq : BB2SuccBBFreq;
2554	BBSuccFreq.push_back(Elt: SuccFreq.getFrequency());
2555	}
2556
2557	uint64_t MaxBBSuccFreq = *llvm::max_element(Range&: BBSuccFreq);
2558
2559	SmallVector<BranchProbability, `4`> BBSuccProbs;
2560	if (MaxBBSuccFreq == `0`)
2561	BBSuccProbs.assign(NumElts: BBSuccFreq.size(),
2562	Elt: {`1`, static_cast<unsigned>(BBSuccFreq.size())});
2563	else {
2564	for (uint64_t Freq : BBSuccFreq)
2565	BBSuccProbs.push_back(
2566	Elt: BranchProbability::getBranchProbability(Numerator: Freq, Denominator: MaxBBSuccFreq));
2567	// Normalize edge probabilities so that they sum up to one.
2568	BranchProbability::normalizeProbabilities(Begin: BBSuccProbs.begin(),
2569	End: BBSuccProbs.end());
2570	}
2571
2572	// Update edge probabilities in BPI.
2573	BPI->setEdgeProbability(Src: BB, Probs: BBSuccProbs);
2574
2575	// Update the profile metadata as well.
2576	//
2577	// Don't do this if the profile of the transformed blocks was statically
2578	// estimated. (This could occur despite the function having an entry
2579	// frequency in completely cold parts of the CFG.)
2580	//
2581	// In this case we don't want to suggest to subsequent passes that the
2582	// calculated weights are fully consistent. Consider this graph:
2583	//
2584	// check_1
2585	// 50% / \|
2586	// eq_1 \| 50%
2587	// \ \|
2588	// check_2
2589	// 50% / \|
2590	// eq_2 \| 50%
2591	// \ \|
2592	// check_3
2593	// 50% / \|
2594	// eq_3 \| 50%
2595	// \ \|
2596	//
2597	// Assuming the blocks check_ all compare the same value against 1, 2 and 3,*
2598	// the overall probabilities are inconsistent; the total probability that the
2599	// value is either 1, 2 or 3 is 150%.
2600	//
2601	// As a consequence if we thread eq_1 -> check_2 to check_3, check_2->check_3
2602	// becomes 0%. This is even worse if the edge whose probability becomes 0% is
2603	// the loop exit edge. Then based solely on static estimation we would assume
2604	// the loop was extremely hot.
2605	//
2606	// FIXME this locally as well so that BPI and BFI are consistent as well. We
2607	// shouldn't make edges extremely likely or unlikely based solely on static
2608	// estimation.
2609	if (BBSuccProbs.size() >= `2` && HasProfile) {
2610	SmallVector<uint32_t, `4`> Weights;
2611	for (auto Prob : BBSuccProbs)
2612	Weights.push_back(Elt: Prob.getNumerator());
2613
2614	auto TI = BB->getTerminator();
2615	setBranchWeights(I&: TI, Weights, IsExpected: hasBranchWeightOrigin(I: TI));
2616	}
2617	}
2618
2619	/// duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
2620	/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
2621	/// If we can duplicate the contents of BB up into PredBB do so now, this
2622	/// improves the odds that the branch will be on an analyzable instruction like
2623	/// a compare.
2624	bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
2625	BasicBlock BB, const* SmallVectorImpl<BasicBlock *> &PredBBs) {
2626	assert(!PredBBs.empty() && "Can't handle an empty set");
2627
2628	// If BB is a loop header, then duplicating this block outside the loop would
2629	// cause us to transform this into an irreducible loop, don't do this.
2630	// See the comments above findLoopHeaders for justifications and caveats.
2631	if (LoopHeaders.count(Ptr: BB)) {
2632	LLVM_DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
2633	<< "' into predecessor block '" << PredBBs[`0`]->getName()
2634	<< "' - it might create an irreducible loop!\n");
2635	return false;
2636	}
2637
2638	unsigned DuplicationCost = getJumpThreadDuplicationCost(
2639	TTI, BB, StopAt: BB->getTerminator(), Threshold: BBDupThreshold);
2640	if (DuplicationCost > BBDupThreshold) {
2641	LLVM_DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
2642	<< "' - Cost is too high: " << DuplicationCost << "\n");
2643	return false;
2644	}
2645
2646	// And finally, do it! Start by factoring the predecessors if needed.
2647	std::vector<DominatorTree::UpdateType> Updates;
2648	BasicBlock *PredBB;
2649	if (PredBBs.size() == `1`)
2650	PredBB = PredBBs [`0`];
2651	else {
2652	LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2653	<< " common predecessors.\n");
2654	PredBB = splitBlockPreds(BB, Preds: PredBBs, Suffix: ".thr_comm");
2655	}
2656	Updates.push_back(x: {DominatorTree::Delete, PredBB, BB});
2657
2658	// Okay, we decided to do this! Clone all the instructions in BB onto the end
2659	// of PredBB.
2660	LLVM_DEBUG(dbgs() << " Duplicating block '" << BB->getName()
2661	<< "' into end of '" << PredBB->getName()
2662	<< "' to eliminate branch on phi. Cost: "
2663	<< DuplicationCost << " block is:" << *BB << "\n");
2664
2665	// Unless PredBB ends with an unconditional branch, split the edge so that we
2666	// can just clone the bits from BB into the end of the new PredBB.
2667	UncondBrInst *OldPredBranch = dyn_cast<UncondBrInst>(Val: PredBB->getTerminator());
2668
2669	if (!OldPredBranch) {
2670	BasicBlock *OldPredBB = PredBB;
2671	PredBB = SplitEdge(From: OldPredBB, To: BB);
2672	Updates.push_back(x: {DominatorTree::Insert, OldPredBB, PredBB});
2673	Updates.push_back(x: {DominatorTree::Insert, PredBB, BB});
2674	Updates.push_back(x: {DominatorTree::Delete, OldPredBB, BB});
2675	OldPredBranch = cast<UncondBrInst>(Val: PredBB->getTerminator());
2676	}
2677
2678	// We are going to have to map operands from the original BB block into the
2679	// PredBB block. Evaluate PHI nodes in BB.
2680	ValueToValueMapTy ValueMapping;
2681
2682	// Remember the position before the inserted instructions.
2683	auto RItBeforeInsertPt = std::next(x: OldPredBranch->getReverseIterator());
2684
2685	BasicBlock::iterator BI = BB->begin();
2686	for (; PHINode *PN = dyn_cast<PHINode>(Val&: BI); ++BI)
2687	ValueMapping [PN] = PN->getIncomingValueForBlock(BB: PredBB);
2688	// Clone the non-phi instructions of BB into PredBB, keeping track of the
2689	// mapping and using it to remap operands in the cloned instructions.
2690	for (; BI != BB->end(); ++BI) {
2691	Instruction *New = BI ->clone();
2692	New->insertInto(ParentBB: PredBB, It: OldPredBranch->getIterator());
2693
2694	// Remap operands to patch up intra-block references.
2695	for (unsigned i = `0`, e = New->getNumOperands(); i != e; ++i)
2696	if (Instruction *Inst = dyn_cast<Instruction>(Val: New->getOperand(i))) {
2697	ValueToValueMapTy::iterator I = ValueMapping.find(Val: Inst);
2698	if (I != ValueMapping.end())
2699	New->setOperand(i, Val: I ->second);
2700	}
2701
2702	// Remap debug variable operands.
2703	remapDebugVariable(Mapping&: ValueMapping, Inst: New);
2704	if (const DebugLoc &DL = New->getDebugLoc())
2705	mapAtomInstance(DL, VMap&: ValueMapping);
2706
2707	// If this instruction can be simplified after the operands are updated,
2708	// just use the simplified value instead. This frequently happens due to
2709	// phi translation.
2710	if (Value *IV = simplifyInstruction(
2711	I: New,
2712	Q: {BB->getDataLayout(), TLI, nullptr, nullptr, New})) {
2713	ValueMapping [&*BI] = IV;
2714	if (!New->mayHaveSideEffects()) {
2715	New->eraseFromParent();
2716	New = nullptr;
2717	// Clone debug-info on the elided instruction to the destination
2718	// position.
2719	OldPredBranch->cloneDebugInfoFrom(From: &BI, FromHere: std::nullopt, InsertAtHead: true*);
2720	}
2721	} else {
2722	ValueMapping [&*BI] = New;
2723	}
2724	if (New) {
2725	// Otherwise, insert the new instruction into the block.
2726	New->setName(BI ->getName());
2727	// Clone across any debug-info attached to the old instruction.
2728	New->cloneDebugInfoFrom(From: &*BI);
2729	// Update Dominance from simplified New instruction operands.
2730	for (unsigned i = `0`, e = New->getNumOperands(); i != e; ++i)
2731	if (BasicBlock *SuccBB = dyn_cast<BasicBlock>(Val: New->getOperand(i)))
2732	Updates.push_back(x: {DominatorTree::Insert, PredBB, SuccBB});
2733	}
2734	}
2735
2736	// Check to see if the targets of the branch had PHI nodes. If so, we need to
2737	// add entries to the PHI nodes for branch from PredBB now.
2738	CondBrInst *BBBranch = cast<CondBrInst>(Val: BB->getTerminator());
2739	addPHINodeEntriesForMappedBlock(PHIBB: BBBranch->getSuccessor(i: `0`), OldPred: BB, NewPred: PredBB,
2740	ValueMap&: ValueMapping);
2741	addPHINodeEntriesForMappedBlock(PHIBB: BBBranch->getSuccessor(i: `1`), OldPred: BB, NewPred: PredBB,
2742	ValueMap&: ValueMapping);
2743
2744	// KeyInstructions: Remap the cloned instructions' atoms only.
2745	remapSourceAtoms(VM&: ValueMapping, Begin: std::prev(x: RItBeforeInsertPt)->getIterator(),
2746	End: OldPredBranch->getIterator());
2747
2748	updateSSA(BB, NewBB: PredBB, ValueMapping);
2749
2750	// PredBB no longer jumps to BB, remove entries in the PHI node for the edge
2751	// that we nuked.
2752	BB->removePredecessor(Pred: PredBB, KeepOneInputPHIs: true);
2753
2754	// Remove the unconditional branch at the end of the PredBB block.
2755	OldPredBranch->eraseFromParent();
2756	if (auto *BPI = getBPI())
2757	BPI->copyEdgeProbabilities(Src: BB, Dst: PredBB);
2758	DTU ->applyUpdatesPermissive(Updates);
2759
2760	++NumDupes;
2761	return true;
2762	}
2763
2764	// Pred is a predecessor of BB with an unconditional branch to BB. SI is
2765	// a Select instruction in Pred. BB has other predecessors and SI is used in
2766	// a PHI node in BB. SI has no other use.
2767	// A new basic block, NewBB, is created and SI is converted to compare and
2768	// conditional branch. SI is erased from parent.
2769	void JumpThreadingPass::unfoldSelectInstr(BasicBlock Pred, BasicBlock BB,
2770	SelectInst SI, PHINode SIUse,
2771	unsigned Idx) {
2772	// Expand the select.
2773	//
2774	// Pred --
2775	// \| v
2776	// \| NewBB
2777	// \| \|
2778	// \|-----
2779	// v
2780	// BB
2781	UncondBrInst *PredTerm = cast<UncondBrInst>(Val: Pred->getTerminator());
2782	BasicBlock *NewBB = BasicBlock::Create(Context&: BB->getContext(), Name: "select.unfold",
2783	Parent: BB->getParent(), InsertBefore: BB);
2784	// Move the unconditional branch to NewBB.
2785	PredTerm->removeFromParent();
2786	PredTerm->insertInto(ParentBB: NewBB, It: NewBB->end());
2787	// Create a conditional branch and update PHI nodes.
2788	auto *BI = CondBrInst::Create(Cond: SI->getCondition(), IfTrue: NewBB, IfFalse: BB, InsertBefore: Pred);
2789	BI->applyMergedLocation(LocA: PredTerm->getDebugLoc(), LocB: SI->getDebugLoc());
2790	BI->copyMetadata(SrcInst: *SI, WL: {LLVMContext::MD_prof});
2791	SIUse->setIncomingValue(i: Idx, V: SI->getFalseValue());
2792	SIUse->addIncoming(V: SI->getTrueValue(), BB: NewBB);
2793
2794	uint64_t TrueWeight = `1`;
2795	uint64_t FalseWeight = `1`;
2796	// Copy probabilities from 'SI' to created conditional branch in 'Pred'.
2797	if (extractBranchWeights(I: *SI, TrueVal&: TrueWeight, FalseVal&: FalseWeight) &&
2798	(TrueWeight + FalseWeight) != `0`) {
2799	SmallVector<BranchProbability, `2`> BP;
2800	BP.emplace_back(Args: BranchProbability::getBranchProbability(
2801	Numerator: TrueWeight, Denominator: TrueWeight + FalseWeight));
2802	BP.emplace_back(Args: BranchProbability::getBranchProbability(
2803	Numerator: FalseWeight, Denominator: TrueWeight + FalseWeight));
2804	// Update BPI if exists.
2805	if (auto *BPI = getBPI())
2806	BPI->setEdgeProbability(Src: Pred, Probs: BP);
2807	}
2808	// Set the block frequency of NewBB.
2809	if (auto *BFI = getBFI()) {
2810	if ((TrueWeight + FalseWeight) == `0`) {
2811	TrueWeight = `1`;
2812	FalseWeight = `1`;
2813	}
2814	BranchProbability PredToNewBBProb = BranchProbability::getBranchProbability(
2815	Numerator: TrueWeight, Denominator: TrueWeight + FalseWeight);
2816	auto NewBBFreq = BFI->getBlockFreq(BB: Pred) * PredToNewBBProb;
2817	BFI->setBlockFreq(BB: NewBB, Freq: NewBBFreq);
2818	}
2819
2820	// The select is now dead.
2821	SI->eraseFromParent();
2822	DTU ->applyUpdatesPermissive(Updates: {{DominatorTree::Insert, NewBB, BB},
2823	{DominatorTree::Insert, Pred, NewBB}});
2824
2825	// Update any other PHI nodes in BB.
2826	for (BasicBlock::iterator BI = BB->begin();
2827	PHINode *Phi = dyn_cast<PHINode>(Val&: BI); ++BI)
2828	if (Phi != SIUse)
2829	Phi->addIncoming(V: Phi->getIncomingValueForBlock(BB: Pred), BB: NewBB);
2830	}
2831
2832	bool JumpThreadingPass::tryToUnfoldSelect(SwitchInst SI, BasicBlock BB) {
2833	PHINode *CondPHI = dyn_cast<PHINode>(Val: SI->getCondition());
2834
2835	if (!CondPHI \|\| CondPHI->getParent() != BB)
2836	return false;
2837
2838	for (unsigned I = `0`, E = CondPHI->getNumIncomingValues(); I != E; ++I) {
2839	BasicBlock *Pred = CondPHI->getIncomingBlock(i: I);
2840	SelectInst *PredSI = dyn_cast<SelectInst>(Val: CondPHI->getIncomingValue(i: I));
2841
2842	// The second and third condition can be potentially relaxed. Currently
2843	// the conditions help to simplify the code and allow us to reuse existing
2844	// code, developed for tryToUnfoldSelect(CmpInst , BasicBlock )
2845	if (!PredSI \|\| PredSI->getParent() != Pred \|\| !PredSI->hasOneUse())
2846	continue;
2847
2848	UncondBrInst *PredTerm = dyn_cast<UncondBrInst>(Val: Pred->getTerminator());
2849	if (!PredTerm)
2850	continue;
2851
2852	unfoldSelectInstr(Pred, BB, SI: PredSI, SIUse: CondPHI, Idx: I);
2853	return true;
2854	}
2855	return false;
2856	}
2857
2858	/// tryToUnfoldSelect - Look for blocks of the form
2859	/// bb1:
2860	/// %a = select
2861	/// br bb2
2862	///
2863	/// bb2:
2864	/// %p = phi [%a, %bb1] ...
2865	/// %c = icmp %p
2866	/// br i1 %c
2867	///
2868	/// And expand the select into a branch structure if one of its arms allows %c
2869	/// to be folded. This later enables threading from bb1 over bb2.
2870	bool JumpThreadingPass::tryToUnfoldSelect(CmpInst CondCmp, BasicBlock BB) {
2871	CondBrInst *CondBr = dyn_cast<CondBrInst>(Val: BB->getTerminator());
2872	PHINode *CondLHS = dyn_cast<PHINode>(Val: CondCmp->getOperand(i_nocapture: `0`));
2873	Constant *CondRHS = cast<Constant>(Val: CondCmp->getOperand(i_nocapture: `1`));
2874
2875	if (!CondBr \|\| !CondLHS \|\| CondLHS->getParent() != BB)
2876	return false;
2877
2878	for (unsigned I = `0`, E = CondLHS->getNumIncomingValues(); I != E; ++I) {
2879	BasicBlock *Pred = CondLHS->getIncomingBlock(i: I);
2880	SelectInst *SI = dyn_cast<SelectInst>(Val: CondLHS->getIncomingValue(i: I));
2881
2882	// Look if one of the incoming values is a select in the corresponding
2883	// predecessor.
2884	if (!SI \|\| SI->getParent() != Pred \|\| !SI->hasOneUse())
2885	continue;
2886
2887	UncondBrInst *PredTerm = dyn_cast<UncondBrInst>(Val: Pred->getTerminator());
2888	if (!PredTerm)
2889	continue;
2890
2891	// Now check if one of the select values would allow us to constant fold the
2892	// terminator in BB. We don't do the transform if both sides fold, those
2893	// cases will be threaded in any case.
2894	Constant *LHSRes =
2895	LVI->getPredicateOnEdge(Pred: CondCmp->getPredicate(), V: SI->getOperand(i_nocapture: `1`),
2896	C: CondRHS, FromBB: Pred, ToBB: BB, CxtI: CondCmp);
2897	Constant *RHSRes =
2898	LVI->getPredicateOnEdge(Pred: CondCmp->getPredicate(), V: SI->getOperand(i_nocapture: `2`),
2899	C: CondRHS, FromBB: Pred, ToBB: BB, CxtI: CondCmp);
2900	if ((LHSRes \|\| RHSRes) && LHSRes != RHSRes) {
2901	unfoldSelectInstr(Pred, BB, SI, SIUse: CondLHS, Idx: I);
2902	return true;
2903	}
2904	}
2905	return false;
2906	}
2907
2908	/// tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the
2909	/// same BB in the form
2910	/// bb:
2911	/// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
2912	/// %s = select %p, trueval, falseval
2913	///
2914	/// or
2915	///
2916	/// bb:
2917	/// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
2918	/// %c = cmp %p, 0
2919	/// %s = select %c, trueval, falseval
2920	///
2921	/// And expand the select into a branch structure. This later enables
2922	/// jump-threading over bb in this pass.
2923	///
2924	/// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
2925	/// select if the associated PHI has at least one constant. If the unfolded
2926	/// select is not jump-threaded, it will be folded again in the later
2927	/// optimizations.
2928	bool JumpThreadingPass::tryToUnfoldSelectInCurrBB(BasicBlock *BB) {
2929	// This transform would reduce the quality of msan diagnostics.
2930	// Disable this transform under MemorySanitizer.
2931	if (BB->getParent()->hasFnAttribute(Kind: Attribute::SanitizeMemory))
2932	return false;
2933
2934	// If threading this would thread across a loop header, don't thread the edge.
2935	// See the comments above findLoopHeaders for justifications and caveats.
2936	if (LoopHeaders.count(Ptr: BB))
2937	return false;
2938
2939	for (BasicBlock::iterator BI = BB->begin();
2940	PHINode *PN = dyn_cast<PHINode>(Val&: BI); ++BI) {
2941	// Look for a Phi having at least one constant incoming value.
2942	if (llvm::all_of(Range: PN->incoming_values(),
2943	P: [](Value V) { return* !isa<ConstantInt>(Val: V); }))
2944	continue;
2945
2946	auto isUnfoldCandidate = [BB](SelectInst SI, Value V) {
2947	using namespace PatternMatch;
2948
2949	// Check if SI is in BB and use V as condition.
2950	if (SI->getParent() != BB)
2951	return false;
2952	Value *Cond = SI->getCondition();
2953	bool IsAndOr = match(V: SI, P: m_CombineOr(L: m_LogicalAnd(), R: m_LogicalOr()));
2954	return Cond && Cond == V && Cond->getType()->isIntegerTy(Bitwidth: `1`) && !IsAndOr;
2955	};
2956
2957	SelectInst SI = nullptr*;
2958	for (Use &U : PN->uses()) {
2959	if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Val: U.getUser())) {
2960	// Look for a ICmp in BB that compares PN with a constant and is the
2961	// condition of a Select.
2962	if (Cmp->getParent() == BB && Cmp->hasOneUse() &&
2963	isa<ConstantInt>(Val: Cmp->getOperand(i_nocapture: `1` - U.getOperandNo())))
2964	if (SelectInst *SelectI = dyn_cast<SelectInst>(Val: Cmp->user_back()))
2965	if (isUnfoldCandidate (SelectI, Cmp->use_begin()->get())) {
2966	SI = SelectI;
2967	break;
2968	}
2969	} else if (SelectInst *SelectI = dyn_cast<SelectInst>(Val: U.getUser())) {
2970	// Look for a Select in BB that uses PN as condition.
2971	if (isUnfoldCandidate (SelectI, U.get())) {
2972	SI = SelectI;
2973	break;
2974	}
2975	}
2976	}
2977
2978	if (!SI)
2979	continue;
2980	// Expand the select.
2981	Value *Cond = SI->getCondition();
2982	if (!isGuaranteedNotToBeUndefOrPoison(V: Cond, AC: nullptr, CtxI: SI)) {
2983	Cond = new FreezeInst (Cond, "cond.fr", SI->getIterator());
2984	cast<FreezeInst>(Val: Cond)->setDebugLoc(DebugLoc::getTemporary());
2985	}
2986	MDNode BranchWeights = getBranchWeightMDNode(I: SI);
2987	Instruction *Term =
2988	SplitBlockAndInsertIfThen(Cond, SplitBefore: SI, Unreachable: false, BranchWeights);
2989	BasicBlock *SplitBB = SI->getParent();
2990	BasicBlock *NewBB = Term->getParent();
2991	PHINode *NewPN = PHINode::Create(Ty: SI->getType(), NumReservedValues: `2`, NameStr: "", InsertBefore: SI->getIterator());
2992	NewPN->addIncoming(V: SI->getTrueValue(), BB: Term->getParent());
2993	NewPN->addIncoming(V: SI->getFalseValue(), BB);
2994	NewPN->setDebugLoc(SI->getDebugLoc());
2995	SI->replaceAllUsesWith(V: NewPN);
2996	SI->eraseFromParent();
2997	// NewBB and SplitBB are newly created blocks which require insertion.
2998	std::vector<DominatorTree::UpdateType> Updates;
2999	Updates.reserve(n: (`2` * SplitBB->getTerminator()->getNumSuccessors()) + `3`);
3000	Updates.push_back(x: {DominatorTree::Insert, BB, SplitBB});
3001	Updates.push_back(x: {DominatorTree::Insert, BB, NewBB});
3002	Updates.push_back(x: {DominatorTree::Insert, NewBB, SplitBB});
3003	// BB's successors were moved to SplitBB, update DTU accordingly.
3004	for (auto *Succ : successors(BB: SplitBB)) {
3005	Updates.push_back(x: {DominatorTree::Delete, BB, Succ});
3006	Updates.push_back(x: {DominatorTree::Insert, SplitBB, Succ});
3007	}
3008	DTU ->applyUpdatesPermissive(Updates);
3009	return true;
3010	}
3011	return false;
3012	}
3013
3014	/// Try to propagate a guard from the current BB into one of its predecessors
3015	/// in case if another branch of execution implies that the condition of this
3016	/// guard is always true. Currently we only process the simplest case that
3017	/// looks like:
3018	///
3019	/// Start:
3020	/// %cond = ...
3021	/// br i1 %cond, label %T1, label %F1
3022	/// T1:
3023	/// br label %Merge
3024	/// F1:
3025	/// br label %Merge
3026	/// Merge:
3027	/// %condGuard = ...
3028	/// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
3029	///
3030	/// And cond either implies condGuard or !condGuard. In this case all the
3031	/// instructions before the guard can be duplicated in both branches, and the
3032	/// guard is then threaded to one of them.
3033	bool JumpThreadingPass::processGuards(BasicBlock *BB) {
3034	using namespace PatternMatch;
3035
3036	// We only want to deal with two predecessors.
3037	BasicBlock Pred1, Pred2;
3038	auto PI = pred_begin(BB), PE = pred_end(BB);
3039	if (PI == PE)
3040	return false;
3041	Pred1 = *PI ++;
3042	if (PI == PE)
3043	return false;
3044	Pred2 = *PI ++;
3045	if (PI != PE)
3046	return false;
3047	if (Pred1 == Pred2)
3048	return false;
3049
3050	// Try to thread one of the guards of the block.
3051	// TODO: Look up deeper than to immediate predecessor?
3052	auto *Parent = Pred1->getSinglePredecessor();
3053	if (!Parent \|\| Parent != Pred2->getSinglePredecessor())
3054	return false;
3055
3056	if (auto *BI = dyn_cast<CondBrInst>(Val: Parent->getTerminator()))
3057	for (auto &I : *BB)
3058	if (isGuard(U: &I) && threadGuard(BB, Guard: cast<IntrinsicInst>(Val: &I), BI))
3059	return true;
3060
3061	return false;
3062	}
3063
3064	/// Try to propagate the guard from BB which is the lower block of a diamond
3065	/// to one of its branches, in case if diamond's condition implies guard's
3066	/// condition.
3067	bool JumpThreadingPass::threadGuard(BasicBlock BB, IntrinsicInst Guard,
3068	CondBrInst *BI) {
3069	Value *GuardCond = Guard->getArgOperand(i: `0`);
3070	Value *BranchCond = BI->getCondition();
3071	BasicBlock *TrueDest = BI->getSuccessor(i: `0`);
3072	BasicBlock *FalseDest = BI->getSuccessor(i: `1`);
3073
3074	auto &DL = BB->getDataLayout();
3075	bool TrueDestIsSafe = false;
3076	bool FalseDestIsSafe = false;
3077
3078	// True dest is safe if BranchCond => GuardCond.
3079	auto Impl = isImpliedCondition(LHS: BranchCond, RHS: GuardCond, DL);
3080	if (Impl && *Impl)
3081	TrueDestIsSafe = true;
3082	else {
3083	// False dest is safe if !BranchCond => GuardCond.
3084	Impl = isImpliedCondition(LHS: BranchCond, RHS: GuardCond, DL, / LHSIsTrue / false);
3085	if (Impl && *Impl)
3086	FalseDestIsSafe = true;
3087	}
3088
3089	if (!TrueDestIsSafe && !FalseDestIsSafe)
3090	return false;
3091
3092	BasicBlock *PredUnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest;
3093	BasicBlock *PredGuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest;
3094
3095	ValueToValueMapTy UnguardedMapping, GuardedMapping;
3096	Instruction *AfterGuard = Guard->getNextNode();
3097	unsigned Cost =
3098	getJumpThreadDuplicationCost(TTI, BB, StopAt: AfterGuard, Threshold: BBDupThreshold);
3099	if (Cost > BBDupThreshold)
3100	return false;
3101	// Duplicate all instructions before the guard and the guard itself to the
3102	// branch where implication is not proved.
3103	BasicBlock *GuardedBlock = DuplicateInstructionsInSplitBetween(
3104	BB, PredBB: PredGuardedBlock, StopAt: AfterGuard, ValueMapping&: GuardedMapping, DTU&: *DTU);
3105	assert(GuardedBlock && "Could not create the guarded block?");
3106	// Duplicate all instructions before the guard in the unguarded branch.
3107	// Since we have successfully duplicated the guarded block and this block
3108	// has fewer instructions, we expect it to succeed.
3109	BasicBlock *UnguardedBlock = DuplicateInstructionsInSplitBetween(
3110	BB, PredBB: PredUnguardedBlock, StopAt: Guard, ValueMapping&: UnguardedMapping, DTU&: *DTU);
3111	assert(UnguardedBlock && "Could not create the unguarded block?");
3112	LLVM_DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
3113	<< GuardedBlock->getName() << "\n");
3114	// Some instructions before the guard may still have uses. For them, we need
3115	// to create Phi nodes merging their copies in both guarded and unguarded
3116	// branches. Those instructions that have no uses can be just removed.
3117	SmallVector<Instruction *, `4`> ToRemove;
3118	for (auto BI = BB->begin(); &*BI != AfterGuard; ++BI)
3119	if (!isa<PHINode>(Val: &*BI))
3120	ToRemove.push_back(Elt: &*BI);
3121
3122	BasicBlock::iterator InsertionPoint = BB->getFirstInsertionPt();
3123	assert(InsertionPoint != BB->end() && "Empty block?");
3124	// Substitute with Phis & remove.
3125	for (auto *Inst : reverse(C&: ToRemove)) {
3126	if (!Inst->use_empty()) {
3127	PHINode *NewPN = PHINode::Create(Ty: Inst->getType(), NumReservedValues: `2`);
3128	NewPN->addIncoming(V: UnguardedMapping [Inst], BB: UnguardedBlock);
3129	NewPN->addIncoming(V: GuardedMapping [Inst], BB: GuardedBlock);
3130	NewPN->setDebugLoc(Inst->getDebugLoc());
3131	NewPN->insertBefore(InsertPos: InsertionPoint);
3132	Inst->replaceAllUsesWith(V: NewPN);
3133	}
3134	Inst->dropDbgRecords();
3135	Inst->eraseFromParent();
3136	}
3137	return true;
3138	}
3139
3140	PreservedAnalyses JumpThreadingPass::getPreservedAnalysis() const {
3141	PreservedAnalyses PA;
3142	PA.preserve<LazyValueAnalysis>();
3143	PA.preserve<DominatorTreeAnalysis>();
3144
3145	// TODO: We would like to preserve BPI/BFI. Enable once all paths update them.
3146	// TODO: Would be nice to verify BPI/BFI consistency as well.
3147	return PA;
3148	}
3149
3150	template <typename AnalysisT>
3151	typename AnalysisT::Result *JumpThreadingPass::runExternalAnalysis() {
3152	assert(FAM && "Can't run external analysis without FunctionAnalysisManager");
3153
3154	// If there were no changes since last call to 'runExternalAnalysis' then all
3155	// analysis is either up to date or explicitly invalidated. Just go ahead and
3156	// run the "external" analysis.
3157	if (!ChangedSinceLastAnalysisUpdate) {
3158	assert(!DTU->hasPendingUpdates() &&
3159	"Lost update of 'ChangedSinceLastAnalysisUpdate'?");
3160	// Run the "external" analysis.
3161	return &FAM->getResult<AnalysisT>(*F);
3162	}
3163	ChangedSinceLastAnalysisUpdate = false;
3164
3165	auto PA = getPreservedAnalysis();
3166	// TODO: This shouldn't be needed once 'getPreservedAnalysis' reports BPI/BFI
3167	// as preserved.
3168	PA.preserve<BranchProbabilityAnalysis>();
3169	PA.preserve<BlockFrequencyAnalysis>();
3170	// Report everything except explicitly preserved as invalid.
3171	FAM->invalidate(IR&: *F, PA);
3172	// Update DT/PDT.
3173	DTU ->flush();
3174	// Make sure DT/PDT are valid before running "external" analysis.
3175	assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Fast));
3176	assert((!DTU->hasPostDomTree() \|\|
3177	DTU->getPostDomTree().verify(
3178	PostDominatorTree::VerificationLevel::Fast)));
3179	// Run the "external" analysis.
3180	auto Result = &FAM->getResult<AnalysisT>(F);
3181	// Update analysis JumpThreading depends on and not explicitly preserved.
3182	TTI = &FAM->getResult<TargetIRAnalysis>(IR&: *F);
3183	TLI = &FAM->getResult<TargetLibraryAnalysis>(IR&: *F);
3184	AA = &FAM->getResult<AAManager>(IR&: *F);
3185
3186	return Result;
3187	}
3188
3189	BranchProbabilityInfo *JumpThreadingPass::getBPI() {
3190	if (!BPI) {
3191	assert(FAM && "Can't create BPI without FunctionAnalysisManager");
3192	BPI = FAM->getCachedResult<BranchProbabilityAnalysis>(IR&: *F);
3193	}
3194	return BPI;
3195	}
3196
3197	BlockFrequencyInfo *JumpThreadingPass::getBFI() {
3198	if (!BFI) {
3199	assert(FAM && "Can't create BFI without FunctionAnalysisManager");
3200	BFI = FAM->getCachedResult<BlockFrequencyAnalysis>(IR&: *F);
3201	}
3202	return BFI;
3203	}
3204
3205	// Important note on validity of BPI/BFI. JumpThreading tries to preserve
3206	// BPI/BFI as it goes. Thus if cached instance exists it will be updated.
3207	// Otherwise, new instance of BPI/BFI is created (up to date by definition).
3208	BranchProbabilityInfo JumpThreadingPass::getOrCreateBPI(bool* Force) {
3209	auto *Res = getBPI();
3210	if (Res)
3211	return Res;
3212
3213	if (Force)
3214	BPI = runExternalAnalysis<BranchProbabilityAnalysis>();
3215
3216	return BPI;
3217	}
3218
3219	BlockFrequencyInfo JumpThreadingPass::getOrCreateBFI(bool* Force) {
3220	auto *Res = getBFI();
3221	if (Res)
3222	return Res;
3223
3224	if (Force)
3225	BFI = runExternalAnalysis<BlockFrequencyAnalysis>();
3226
3227	return BFI;
3228	}
3229

Browse the source code of llvm_projects/llvm/lib/Transforms/Scalar/JumpThreading.cpp