InstructionCombining.cpp source code [llvm_projects/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp]

1	//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// InstructionCombining - Combine instructions to form fewer, simple
10	// instructions. This pass does not modify the CFG. This pass is where
11	// algebraic simplification happens.
12	//
13	// This pass combines things like:
14	// %Y = add i32 %X, 1
15	// %Z = add i32 %Y, 1
16	// into:
17	// %Z = add i32 %X, 2
18	//
19	// This is a simple worklist driven algorithm.
20	//
21	// This pass guarantees that the following canonicalizations are performed on
22	// the program:
23	// 1. If a binary operator has a constant operand, it is moved to the RHS
24	// 2. Bitwise operators with constant operands are always grouped so that
25	// shifts are performed first, then or's, then and's, then xor's.
26	// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27	// 4. All cmp instructions on boolean values are replaced with logical ops
28	// 5. add X, X is represented as (X2) => (X << 1)*
29	// 6. Multiplies with a power-of-two constant argument are transformed into
30	// shifts.
31	// ... etc.
32	//
33	//===----------------------------------------------------------------------===//
34
35	#include "InstCombineInternal.h"
36	#include "llvm/ADT/APInt.h"
37	#include "llvm/ADT/ArrayRef.h"
38	#include "llvm/ADT/DenseMap.h"
39	#include "llvm/ADT/SmallPtrSet.h"
40	#include "llvm/ADT/SmallVector.h"
41	#include "llvm/ADT/Statistic.h"
42	#include "llvm/Analysis/AliasAnalysis.h"
43	#include "llvm/Analysis/AssumptionCache.h"
44	#include "llvm/Analysis/BasicAliasAnalysis.h"
45	#include "llvm/Analysis/BlockFrequencyInfo.h"
46	#include "llvm/Analysis/CFG.h"
47	#include "llvm/Analysis/ConstantFolding.h"
48	#include "llvm/Analysis/GlobalsModRef.h"
49	#include "llvm/Analysis/InstructionSimplify.h"
50	#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
51	#include "llvm/Analysis/LoopInfo.h"
52	#include "llvm/Analysis/MemoryBuiltins.h"
53	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
54	#include "llvm/Analysis/ProfileSummaryInfo.h"
55	#include "llvm/Analysis/TargetFolder.h"
56	#include "llvm/Analysis/TargetLibraryInfo.h"
57	#include "llvm/Analysis/TargetTransformInfo.h"
58	#include "llvm/Analysis/Utils/Local.h"
59	#include "llvm/Analysis/ValueTracking.h"
60	#include "llvm/Analysis/VectorUtils.h"
61	#include "llvm/IR/BasicBlock.h"
62	#include "llvm/IR/CFG.h"
63	#include "llvm/IR/Constant.h"
64	#include "llvm/IR/Constants.h"
65	#include "llvm/IR/DIBuilder.h"
66	#include "llvm/IR/DataLayout.h"
67	#include "llvm/IR/DebugInfo.h"
68	#include "llvm/IR/DerivedTypes.h"
69	#include "llvm/IR/Dominators.h"
70	#include "llvm/IR/EHPersonalities.h"
71	#include "llvm/IR/Function.h"
72	#include "llvm/IR/GetElementPtrTypeIterator.h"
73	#include "llvm/IR/IRBuilder.h"
74	#include "llvm/IR/InstrTypes.h"
75	#include "llvm/IR/Instruction.h"
76	#include "llvm/IR/Instructions.h"
77	#include "llvm/IR/IntrinsicInst.h"
78	#include "llvm/IR/Intrinsics.h"
79	#include "llvm/IR/Metadata.h"
80	#include "llvm/IR/Operator.h"
81	#include "llvm/IR/PassManager.h"
82	#include "llvm/IR/PatternMatch.h"
83	#include "llvm/IR/Type.h"
84	#include "llvm/IR/Use.h"
85	#include "llvm/IR/User.h"
86	#include "llvm/IR/Value.h"
87	#include "llvm/IR/ValueHandle.h"
88	#include "llvm/InitializePasses.h"
89	#include "llvm/Support/Casting.h"
90	#include "llvm/Support/CommandLine.h"
91	#include "llvm/Support/Compiler.h"
92	#include "llvm/Support/Debug.h"
93	#include "llvm/Support/DebugCounter.h"
94	#include "llvm/Support/ErrorHandling.h"
95	#include "llvm/Support/KnownBits.h"
96	#include "llvm/Support/raw_ostream.h"
97	#include "llvm/Transforms/InstCombine/InstCombine.h"
98	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
99	#include "llvm/Transforms/Utils/Local.h"
100	#include <algorithm>
101	#include <cassert>
102	#include <cstdint>
103	#include <memory>
104	#include <optional>
105	#include <string>
106	#include <utility>
107
108	#define DEBUG_TYPE "instcombine"
109	#include "llvm/Transforms/Utils/InstructionWorklist.h"
110	#include <optional>
111
112	using namespace llvm;
113	using namespace llvm::PatternMatch;
114
115	STATISTIC(NumWorklistIterations,
116	"Number of instruction combining iterations performed");
117	STATISTIC(NumOneIteration, "Number of functions with one iteration");
118	STATISTIC(NumTwoIterations, "Number of functions with two iterations");
119	STATISTIC(NumThreeIterations, "Number of functions with three iterations");
120	STATISTIC(NumFourOrMoreIterations,
121	"Number of functions with four or more iterations");
122
123	STATISTIC(NumCombined , "Number of insts combined");
124	STATISTIC(NumConstProp, "Number of constant folds");
125	STATISTIC(NumDeadInst , "Number of dead inst eliminated");
126	STATISTIC(NumSunkInst , "Number of instructions sunk");
127	STATISTIC(NumExpand, "Number of expansions");
128	STATISTIC(NumFactor , "Number of factorizations");
129	STATISTIC(NumReassoc , "Number of reassociations");
130	DEBUG_COUNTER(VisitCounter, "instcombine-visit",
131	"Controls which instructions are visited");
132
133	static cl::opt<bool>
134	EnableCodeSinking("instcombine-code-sinking", cl::desc ("Enable code sinking"),
135	cl::init(Val: true));
136
137	static cl::opt<unsigned> MaxSinkNumUsers(
138	"instcombine-max-sink-users", cl::init(Val: `32`),
139	cl::desc ("Maximum number of undroppable users for instruction sinking"));
140
141	static cl::opt<unsigned>
142	MaxArraySize("instcombine-maxarray-size", cl::init(Val: `1024`),
143	cl::desc ("Maximum array size considered when doing a combine"));
144
145	// FIXME: Remove this flag when it is no longer necessary to convert
146	// llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
147	// increases variable availability at the cost of accuracy. Variables that
148	// cannot be promoted by mem2reg or SROA will be described as living in memory
149	// for their entire lifetime. However, passes like DSE and instcombine can
150	// delete stores to the alloca, leading to misleading and inaccurate debug
151	// information. This flag can be removed when those passes are fixed.
152	static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
153	cl::Hidden, cl::init(Val: true));
154
155	std::optional<Instruction *>
156	InstCombiner::targetInstCombineIntrinsic(IntrinsicInst &II) {
157	// Handle target specific intrinsics
158	if (II.getCalledFunction()->isTargetIntrinsic()) {
159	return TTI.instCombineIntrinsic(IC&: *this, II);
160	}
161	return std::nullopt;
162	}
163
164	std::optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic(
165	IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
166	bool &KnownBitsComputed) {
167	// Handle target specific intrinsics
168	if (II.getCalledFunction()->isTargetIntrinsic()) {
169	return TTI.simplifyDemandedUseBitsIntrinsic(IC&: *this, II, DemandedMask, Known,
170	KnownBitsComputed);
171	}
172	return std::nullopt;
173	}
174
175	std::optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic(
176	IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts,
177	APInt &PoisonElts2, APInt &PoisonElts3,
178	std::function<void(Instruction , unsigned*, APInt, APInt &)>
179	SimplifyAndSetOp) {
180	// Handle target specific intrinsics
181	if (II.getCalledFunction()->isTargetIntrinsic()) {
182	return TTI.simplifyDemandedVectorEltsIntrinsic(
183	IC&: *this, II, DemandedElts, UndefElts&: PoisonElts, UndefElts2&: PoisonElts2, UndefElts3&: PoisonElts3,
184	SimplifyAndSetOp);
185	}
186	return std::nullopt;
187	}
188
189	bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
190	return TTI.isValidAddrSpaceCast(FromAS, ToAS);
191	}
192
193	Value InstCombinerImpl::EmitGEPOffset(GEPOperator GEP, bool RewriteGEP) {
194	if (!RewriteGEP)
195	return llvm::emitGEPOffset(Builder: &Builder, DL, GEP);
196
197	IRBuilderBase::InsertPointGuard Guard(Builder);
198	auto *Inst = dyn_cast<Instruction>(Val: GEP);
199	if (Inst)
200	Builder.SetInsertPoint(Inst);
201
202	Value *Offset = EmitGEPOffset(GEP);
203	// If a non-trivial GEP has other uses, rewrite it to avoid duplicating
204	// the offset arithmetic.
205	if (Inst && !GEP->hasOneUse() && !GEP->hasAllConstantIndices() &&
206	!GEP->getSourceElementType()->isIntegerTy(Bitwidth: `8`)) {
207	replaceInstUsesWith(
208	I&: *Inst, V: Builder.CreateGEP(Ty: Builder.getInt8Ty(), Ptr: GEP->getPointerOperand(),
209	IdxList: Offset, Name: "", NW: GEP->getNoWrapFlags()));
210	eraseInstFromFunction(I&: *Inst);
211	}
212	return Offset;
213	}
214
215	/// Legal integers and common types are considered desirable. This is used to
216	/// avoid creating instructions with types that may not be supported well by the
217	/// the backend.
218	/// NOTE: This treats i8, i16 and i32 specially because they are common
219	/// types in frontend languages.
220	bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
221	switch (BitWidth) {
222	case `8`:
223	case `16`:
224	case `32`:
225	return true;
226	default:
227	return DL.isLegalInteger(Width: BitWidth);
228	}
229	}
230
231	/// Return true if it is desirable to convert an integer computation from a
232	/// given bit width to a new bit width.
233	/// We don't want to convert from a legal or desirable type (like i8) to an
234	/// illegal type or from a smaller to a larger illegal type. A width of '1'
235	/// is always treated as a desirable type because i1 is a fundamental type in
236	/// IR, and there are many specialized optimizations for i1 types.
237	/// Common/desirable widths are equally treated as legal to convert to, in
238	/// order to open up more combining opportunities.
239	bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
240	unsigned ToWidth) const {
241	bool FromLegal = FromWidth == `1` \|\| DL.isLegalInteger(Width: FromWidth);
242	bool ToLegal = ToWidth == `1` \|\| DL.isLegalInteger(Width: ToWidth);
243
244	// Convert to desirable widths even if they are not legal types.
245	// Only shrink types, to prevent infinite loops.
246	if (ToWidth < FromWidth && isDesirableIntType(BitWidth: ToWidth))
247	return true;
248
249	// If this is a legal or desiable integer from type, and the result would be
250	// an illegal type, don't do the transformation.
251	if ((FromLegal \|\| isDesirableIntType(BitWidth: FromWidth)) && !ToLegal)
252	return false;
253
254	// Otherwise, if both are illegal, do not increase the size of the result. We
255	// do allow things like i160 -> i64, but not i64 -> i160.
256	if (!FromLegal && !ToLegal && ToWidth > FromWidth)
257	return false;
258
259	return true;
260	}
261
262	/// Return true if it is desirable to convert a computation from 'From' to 'To'.
263	/// We don't want to convert from a legal to an illegal type or from a smaller
264	/// to a larger illegal type. i1 is always treated as a legal type because it is
265	/// a fundamental type in IR, and there are many specialized optimizations for
266	/// i1 types.
267	bool InstCombinerImpl::shouldChangeType(Type From, Type To) const {
268	// TODO: This could be extended to allow vectors. Datalayout changes might be
269	// needed to properly support that.
270	if (!From->isIntegerTy() \|\| !To->isIntegerTy())
271	return false;
272
273	unsigned FromWidth = From->getPrimitiveSizeInBits();
274	unsigned ToWidth = To->getPrimitiveSizeInBits();
275	return shouldChangeType(FromWidth, ToWidth);
276	}
277
278	// Return true, if No Signed Wrap should be maintained for I.
279	// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
280	// where both B and C should be ConstantInts, results in a constant that does
281	// not overflow. This function only handles the Add and Sub opcodes. For
282	// all other opcodes, the function conservatively returns false.
283	static bool maintainNoSignedWrap(BinaryOperator &I, Value B, Value C) {
284	auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
285	if (!OBO \|\| !OBO->hasNoSignedWrap())
286	return false;
287
288	// We reason about Add and Sub Only.
289	Instruction::BinaryOps Opcode = I.getOpcode();
290	if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
291	return false;
292
293	const APInt BVal, CVal;
294	if (!match(V: B, P: m_APInt(Res&: BVal)) \|\| !match(V: C, P: m_APInt(Res&: CVal)))
295	return false;
296
297	bool Overflow = false;
298	if (Opcode == Instruction::Add)
299	(void)BVal->sadd_ov(RHS: *CVal, Overflow);
300	else
301	(void)BVal->ssub_ov(RHS: *CVal, Overflow);
302
303	return !Overflow;
304	}
305
306	static bool hasNoUnsignedWrap(BinaryOperator &I) {
307	auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
308	return OBO && OBO->hasNoUnsignedWrap();
309	}
310
311	static bool hasNoSignedWrap(BinaryOperator &I) {
312	auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
313	return OBO && OBO->hasNoSignedWrap();
314	}
315
316	/// Conservatively clears subclassOptionalData after a reassociation or
317	/// commutation. We preserve fast-math flags when applicable as they can be
318	/// preserved.
319	static void ClearSubclassDataAfterReassociation(BinaryOperator &I) {
320	FPMathOperator *FPMO = dyn_cast<FPMathOperator>(Val: &I);
321	if (!FPMO) {
322	I.clearSubclassOptionalData();
323	return;
324	}
325
326	FastMathFlags FMF = I.getFastMathFlags();
327	I.clearSubclassOptionalData();
328	I.setFastMathFlags(FMF);
329	}
330
331	/// Combine constant operands of associative operations either before or after a
332	/// cast to eliminate one of the associative operations:
333	/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
334	/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
335	static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1,
336	InstCombinerImpl &IC) {
337	auto *Cast = dyn_cast<CastInst>(Val: BinOp1->getOperand(i_nocapture: `0`));
338	if (!Cast \|\| !Cast->hasOneUse())
339	return false;
340
341	// TODO: Enhance logic for other casts and remove this check.
342	auto CastOpcode = Cast->getOpcode();
343	if (CastOpcode != Instruction::ZExt)
344	return false;
345
346	// TODO: Enhance logic for other BinOps and remove this check.
347	if (!BinOp1->isBitwiseLogicOp())
348	return false;
349
350	auto AssocOpcode = BinOp1->getOpcode();
351	auto *BinOp2 = dyn_cast<BinaryOperator>(Val: Cast->getOperand(i_nocapture: `0`));
352	if (!BinOp2 \|\| !BinOp2->hasOneUse() \|\| BinOp2->getOpcode() != AssocOpcode)
353	return false;
354
355	Constant C1, C2;
356	if (!match(V: BinOp1->getOperand(i_nocapture: `1`), P: m_Constant(C&: C1)) \|\|
357	!match(V: BinOp2->getOperand(i_nocapture: `1`), P: m_Constant(C&: C2)))
358	return false;
359
360	// TODO: This assumes a zext cast.
361	// Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
362	// to the destination type might lose bits.
363
364	// Fold the constants together in the destination type:
365	// (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
366	const DataLayout &DL = IC.getDataLayout();
367	Type *DestTy = C1->getType();
368	Constant *CastC2 = ConstantFoldCastOperand(Opcode: CastOpcode, C: C2, DestTy, DL);
369	if (!CastC2)
370	return false;
371	Constant *FoldedC = ConstantFoldBinaryOpOperands(Opcode: AssocOpcode, LHS: C1, RHS: CastC2, DL);
372	if (!FoldedC)
373	return false;
374
375	IC.replaceOperand(I&: *Cast, OpNum: `0`, V: BinOp2->getOperand(i_nocapture: `0`));
376	IC.replaceOperand(I&: *BinOp1, OpNum: `1`, V: FoldedC);
377	BinOp1->dropPoisonGeneratingFlags();
378	Cast->dropPoisonGeneratingFlags();
379	return true;
380	}
381
382	// Simplifies IntToPtr/PtrToInt RoundTrip Cast.
383	// inttoptr ( ptrtoint (x) ) --> x
384	Value InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value Val) {
385	auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
386	if (IntToPtr && DL.getTypeSizeInBits(Ty: IntToPtr->getDestTy()) ==
387	DL.getTypeSizeInBits(Ty: IntToPtr->getSrcTy())) {
388	auto *PtrToInt = dyn_cast<PtrToIntInst>(Val: IntToPtr->getOperand(i_nocapture: `0`));
389	Type *CastTy = IntToPtr->getDestTy();
390	if (PtrToInt &&
391	CastTy->getPointerAddressSpace() ==
392	PtrToInt->getSrcTy()->getPointerAddressSpace() &&
393	DL.getTypeSizeInBits(Ty: PtrToInt->getSrcTy()) ==
394	DL.getTypeSizeInBits(Ty: PtrToInt->getDestTy()))
395	return PtrToInt->getOperand(i_nocapture: `0`);
396	}
397	return nullptr;
398	}
399
400	/// This performs a few simplifications for operators that are associative or
401	/// commutative:
402	///
403	/// Commutative operators:
404	///
405	/// 1. Order operands such that they are listed from right (least complex) to
406	/// left (most complex). This puts constants before unary operators before
407	/// binary operators.
408	///
409	/// Associative operators:
410	///
411	/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
412	/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
413	///
414	/// Associative and commutative operators:
415	///
416	/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
417	/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
418	/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
419	/// if C1 and C2 are constants.
420	bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
421	Instruction::BinaryOps Opcode = I.getOpcode();
422	bool Changed = false;
423
424	do {
425	// Order operands such that they are listed from right (least complex) to
426	// left (most complex). This puts constants before unary operators before
427	// binary operators.
428	if (I.isCommutative() && getComplexity(V: I.getOperand(i_nocapture: `0`)) <
429	getComplexity(V: I.getOperand(i_nocapture: `1`)))
430	Changed = !I.swapOperands();
431
432	if (I.isCommutative()) {
433	if (auto Pair = matchSymmetricPair(LHS: I.getOperand(i_nocapture: `0`), RHS: I.getOperand(i_nocapture: `1`))) {
434	replaceOperand(I, OpNum: `0`, V: Pair ->first);
435	replaceOperand(I, OpNum: `1`, V: Pair ->second);
436	Changed = true;
437	}
438	}
439
440	BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: I.getOperand(i_nocapture: `0`));
441	BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: I.getOperand(i_nocapture: `1`));
442
443	if (I.isAssociative()) {
444	// Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
445	if (Op0 && Op0->getOpcode() == Opcode) {
446	Value *A = Op0->getOperand(i_nocapture: `0`);
447	Value *B = Op0->getOperand(i_nocapture: `1`);
448	Value *C = I.getOperand(i_nocapture: `1`);
449
450	// Does "B op C" simplify?
451	if (Value *V = simplifyBinOp(Opcode, LHS: B, RHS: C, Q: SQ.getWithInstruction(I: &I))) {
452	// It simplifies to V. Form "A op V".
453	replaceOperand(I, OpNum: `0`, V: A);
454	replaceOperand(I, OpNum: `1`, V);
455	bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(I&: *Op0);
456	bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(I&: *Op0);
457
458	// Conservatively clear all optional flags since they may not be
459	// preserved by the reassociation. Reset nsw/nuw based on the above
460	// analysis.
461	ClearSubclassDataAfterReassociation(I);
462
463	// Note: this is only valid because SimplifyBinOp doesn't look at
464	// the operands to Op0.
465	if (IsNUW)
466	I.setHasNoUnsignedWrap(true);
467
468	if (IsNSW)
469	I.setHasNoSignedWrap(true);
470
471	Changed = true;
472	++NumReassoc;
473	continue;
474	}
475	}
476
477	// Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
478	if (Op1 && Op1->getOpcode() == Opcode) {
479	Value *A = I.getOperand(i_nocapture: `0`);
480	Value *B = Op1->getOperand(i_nocapture: `0`);
481	Value *C = Op1->getOperand(i_nocapture: `1`);
482
483	// Does "A op B" simplify?
484	if (Value *V = simplifyBinOp(Opcode, LHS: A, RHS: B, Q: SQ.getWithInstruction(I: &I))) {
485	// It simplifies to V. Form "V op C".
486	replaceOperand(I, OpNum: `0`, V);
487	replaceOperand(I, OpNum: `1`, V: C);
488	// Conservatively clear the optional flags, since they may not be
489	// preserved by the reassociation.
490	ClearSubclassDataAfterReassociation(I);
491	Changed = true;
492	++NumReassoc;
493	continue;
494	}
495	}
496	}
497
498	if (I.isAssociative() && I.isCommutative()) {
499	if (simplifyAssocCastAssoc(BinOp1: &I, IC&: *this)) {
500	Changed = true;
501	++NumReassoc;
502	continue;
503	}
504
505	// Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
506	if (Op0 && Op0->getOpcode() == Opcode) {
507	Value *A = Op0->getOperand(i_nocapture: `0`);
508	Value *B = Op0->getOperand(i_nocapture: `1`);
509	Value *C = I.getOperand(i_nocapture: `1`);
510
511	// Does "C op A" simplify?
512	if (Value *V = simplifyBinOp(Opcode, LHS: C, RHS: A, Q: SQ.getWithInstruction(I: &I))) {
513	// It simplifies to V. Form "V op B".
514	replaceOperand(I, OpNum: `0`, V);
515	replaceOperand(I, OpNum: `1`, V: B);
516	// Conservatively clear the optional flags, since they may not be
517	// preserved by the reassociation.
518	ClearSubclassDataAfterReassociation(I);
519	Changed = true;
520	++NumReassoc;
521	continue;
522	}
523	}
524
525	// Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
526	if (Op1 && Op1->getOpcode() == Opcode) {
527	Value *A = I.getOperand(i_nocapture: `0`);
528	Value *B = Op1->getOperand(i_nocapture: `0`);
529	Value *C = Op1->getOperand(i_nocapture: `1`);
530
531	// Does "C op A" simplify?
532	if (Value *V = simplifyBinOp(Opcode, LHS: C, RHS: A, Q: SQ.getWithInstruction(I: &I))) {
533	// It simplifies to V. Form "B op V".
534	replaceOperand(I, OpNum: `0`, V: B);
535	replaceOperand(I, OpNum: `1`, V);
536	// Conservatively clear the optional flags, since they may not be
537	// preserved by the reassociation.
538	ClearSubclassDataAfterReassociation(I);
539	Changed = true;
540	++NumReassoc;
541	continue;
542	}
543	}
544
545	// Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
546	// if C1 and C2 are constants.
547	Value A, B;
548	Constant C1, C2, *CRes;
549	if (Op0 && Op1 &&
550	Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
551	match(V: Op0, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: A), R: m_Constant(C&: C1)))) &&
552	match(V: Op1, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: B), R: m_Constant(C&: C2)))) &&
553	(CRes = ConstantFoldBinaryOpOperands(Opcode, LHS: C1, RHS: C2, DL))) {
554	bool IsNUW = hasNoUnsignedWrap(I) &&
555	hasNoUnsignedWrap(I&: *Op0) &&
556	hasNoUnsignedWrap(I&: *Op1);
557	BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
558	BinaryOperator::CreateNUW(Opc: Opcode, V1: A, V2: B) :
559	BinaryOperator::Create(Op: Opcode, S1: A, S2: B);
560
561	if (isa<FPMathOperator>(Val: NewBO)) {
562	FastMathFlags Flags = I.getFastMathFlags() &
563	Op0->getFastMathFlags() &
564	Op1->getFastMathFlags();
565	NewBO->setFastMathFlags(Flags);
566	}
567	InsertNewInstWith(New: NewBO, Old: I.getIterator());
568	NewBO->takeName(V: Op1);
569	replaceOperand(I, OpNum: `0`, V: NewBO);
570	replaceOperand(I, OpNum: `1`, V: CRes);
571	// Conservatively clear the optional flags, since they may not be
572	// preserved by the reassociation.
573	ClearSubclassDataAfterReassociation(I);
574	if (IsNUW)
575	I.setHasNoUnsignedWrap(true);
576
577	Changed = true;
578	continue;
579	}
580	}
581
582	// No further simplifications.
583	return Changed;
584	} while (true);
585	}
586
587	/// Return whether "X LOp (Y ROp Z)" is always equal to
588	/// "(X LOp Y) ROp (X LOp Z)".
589	static bool leftDistributesOverRight(Instruction::BinaryOps LOp,
590	Instruction::BinaryOps ROp) {
591	// X & (Y \| Z) <--> (X & Y) \| (X & Z)
592	// X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
593	if (LOp == Instruction::And)
594	return ROp == Instruction::Or \|\| ROp == Instruction::Xor;
595
596	// X \| (Y & Z) <--> (X \| Y) & (X \| Z)
597	if (LOp == Instruction::Or)
598	return ROp == Instruction::And;
599
600	// X (Y + Z) <--> (X * Y) + (X * Z)*
601	// X (Y - Z) <--> (X * Y) - (X * Z)*
602	if (LOp == Instruction::Mul)
603	return ROp == Instruction::Add \|\| ROp == Instruction::Sub;
604
605	return false;
606	}
607
608	/// Return whether "(X LOp Y) ROp Z" is always equal to
609	/// "(X ROp Z) LOp (Y ROp Z)".
610	static bool rightDistributesOverLeft(Instruction::BinaryOps LOp,
611	Instruction::BinaryOps ROp) {
612	if (Instruction::isCommutative(Opcode: ROp))
613	return leftDistributesOverRight(LOp: ROp, ROp: LOp);
614
615	// (X {&\|^} Y) >> Z <--> (X >> Z) {&\|^} (Y >> Z) for all shifts.
616	return Instruction::isBitwiseLogicOp(Opcode: LOp) && Instruction::isShift(Opcode: ROp);
617
618	// TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
619	// but this requires knowing that the addition does not overflow and other
620	// such subtleties.
621	}
622
623	/// This function returns identity value for given opcode, which can be used to
624	/// factor patterns like (X 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).*
625	static Value getIdentityValue(Instruction::BinaryOps Opcode, Value V) {
626	if (isa<Constant>(Val: V))
627	return nullptr;
628
629	return ConstantExpr::getBinOpIdentity(Opcode, Ty: V->getType());
630	}
631
632	/// This function predicates factorization using distributive laws. By default,
633	/// it just returns the 'Op' inputs. But for special-cases like
634	/// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
635	/// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
636	/// allow more factorization opportunities.
637	static Instruction::BinaryOps
638	getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op,
639	Value &LHS, Value &RHS, BinaryOperator *OtherOp) {
640	assert(Op && "Expected a binary operator");
641	LHS = Op->getOperand(i_nocapture: `0`);
642	RHS = Op->getOperand(i_nocapture: `1`);
643	if (TopOpcode == Instruction::Add \|\| TopOpcode == Instruction::Sub) {
644	Constant *C;
645	if (match(V: Op, P: m_Shl(L: m_Value(), R: m_ImmConstant(C)))) {
646	// X << C --> X (1 << C)*
647	RHS = ConstantFoldBinaryInstruction(
648	Opcode: Instruction::Shl, V1: ConstantInt::get(Ty: Op->getType(), V: `1`), V2: C);
649	assert(RHS && "Constant folding of immediate constants failed");
650	return Instruction::Mul;
651	}
652	// TODO: We can add other conversions e.g. shr => div etc.
653	}
654	if (Instruction::isBitwiseLogicOp(Opcode: TopOpcode)) {
655	if (OtherOp && OtherOp->getOpcode() == Instruction::AShr &&
656	match(V: Op, P: m_LShr(L: m_NonNegative(), R: m_Value()))) {
657	// lshr nneg C, X --> ashr nneg C, X
658	return Instruction::AShr;
659	}
660	}
661	return Op->getOpcode();
662	}
663
664	/// This tries to simplify binary operations by factorizing out common terms
665	/// (e. g. "(AB)+(AC)" -> "A(B+C)").*
666	static Value tryFactorization(BinaryOperator &I, const* SimplifyQuery &SQ,
667	InstCombiner::BuilderTy &Builder,
668	Instruction::BinaryOps InnerOpcode, Value *A,
669	Value B, Value C, Value *D) {
670	assert(A && B && C && D && "All values must be provided");
671
672	Value V = nullptr*;
673	Value RetVal = nullptr*;
674	Value LHS = I.getOperand(i_nocapture: `0`), RHS = I.getOperand(i_nocapture: `1`);
675	Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
676
677	// Does "X op' Y" always equal "Y op' X"?
678	bool InnerCommutative = Instruction::isCommutative(Opcode: InnerOpcode);
679
680	// Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
681	if (leftDistributesOverRight(LOp: InnerOpcode, ROp: TopLevelOpcode)) {
682	// Does the instruction have the form "(A op' B) op (A op' D)" or, in the
683	// commutative case, "(A op' B) op (C op' A)"?
684	if (A == C \|\| (InnerCommutative && A == D)) {
685	if (A != C)
686	std::swap(a&: C, b&: D);
687	// Consider forming "A op' (B op D)".
688	// If "B op D" simplifies then it can be formed with no cost.
689	V = simplifyBinOp(Opcode: TopLevelOpcode, LHS: B, RHS: D, Q: SQ.getWithInstruction(I: &I));
690
691	// If "B op D" doesn't simplify then only go on if one of the existing
692	// operations "A op' B" and "C op' D" will be zapped as no longer used.
693	if (!V && (LHS->hasOneUse() \|\| RHS->hasOneUse()))
694	V = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: B, RHS: D, Name: RHS->getName());
695	if (V)
696	RetVal = Builder.CreateBinOp(Opc: InnerOpcode, LHS: A, RHS: V);
697	}
698	}
699
700	// Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
701	if (!RetVal && rightDistributesOverLeft(LOp: TopLevelOpcode, ROp: InnerOpcode)) {
702	// Does the instruction have the form "(A op' B) op (C op' B)" or, in the
703	// commutative case, "(A op' B) op (B op' D)"?
704	if (B == D \|\| (InnerCommutative && B == C)) {
705	if (B != D)
706	std::swap(a&: C, b&: D);
707	// Consider forming "(A op C) op' B".
708	// If "A op C" simplifies then it can be formed with no cost.
709	V = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQ.getWithInstruction(I: &I));
710
711	// If "A op C" doesn't simplify then only go on if one of the existing
712	// operations "A op' B" and "C op' D" will be zapped as no longer used.
713	if (!V && (LHS->hasOneUse() \|\| RHS->hasOneUse()))
714	V = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C, Name: LHS->getName());
715	if (V)
716	RetVal = Builder.CreateBinOp(Opc: InnerOpcode, LHS: V, RHS: B);
717	}
718	}
719
720	if (!RetVal)
721	return nullptr;
722
723	++NumFactor;
724	RetVal->takeName(V: &I);
725
726	// Try to add no-overflow flags to the final value.
727	if (isa<OverflowingBinaryOperator>(Val: RetVal)) {
728	bool HasNSW = false;
729	bool HasNUW = false;
730	if (isa<OverflowingBinaryOperator>(Val: &I)) {
731	HasNSW = I.hasNoSignedWrap();
732	HasNUW = I.hasNoUnsignedWrap();
733	}
734	if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(Val: LHS)) {
735	HasNSW &= LOBO->hasNoSignedWrap();
736	HasNUW &= LOBO->hasNoUnsignedWrap();
737	}
738
739	if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(Val: RHS)) {
740	HasNSW &= ROBO->hasNoSignedWrap();
741	HasNUW &= ROBO->hasNoUnsignedWrap();
742	}
743
744	if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
745	// We can propagate 'nsw' if we know that
746	// %Y = mul nsw i16 %X, C
747	// %Z = add nsw i16 %Y, %X
748	// =>
749	// %Z = mul nsw i16 %X, C+1
750	//
751	// iff C+1 isn't INT_MIN
752	const APInt *CInt;
753	if (match(V, P: m_APInt(Res&: CInt)) && !CInt->isMinSignedValue())
754	cast<Instruction>(Val: RetVal)->setHasNoSignedWrap(HasNSW);
755
756	// nuw can be propagated with any constant or nuw value.
757	cast<Instruction>(Val: RetVal)->setHasNoUnsignedWrap(HasNUW);
758	}
759	}
760	return RetVal;
761	}
762
763	// If `I` has one Const operand and the other matches `(ctpop (not x))`,
764	// replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`.
765	// This is only useful is the new subtract can fold so we only handle the
766	// following cases:
767	// 1) (add/sub/disjoint_or C, (ctpop (not x))
768	// -> (add/sub/disjoint_or C', (ctpop x))
769	// 1) (cmp pred C, (ctpop (not x))
770	// -> (cmp pred C', (ctpop x))
771	Instruction InstCombinerImpl::tryFoldInstWithCtpopWithNot(Instruction I) {
772	unsigned Opc = I->getOpcode();
773	unsigned ConstIdx = `1`;
774	switch (Opc) {
775	default:
776	return nullptr;
777	// (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x))
778	// We can fold the BitWidth(x) with add/sub/icmp as long the other operand
779	// is constant.
780	case Instruction::Sub:
781	ConstIdx = `0`;
782	break;
783	case Instruction::ICmp:
784	// Signed predicates aren't correct in some edge cases like for i2 types, as
785	// well since (ctpop x) is known [0, log2(BitWidth(x))] almost all signed
786	// comparisons against it are simplfied to unsigned.
787	if (cast<ICmpInst>(Val: I)->isSigned())
788	return nullptr;
789	break;
790	case Instruction::Or:
791	if (!match(V: I, P: m_DisjointOr(L: m_Value(), R: m_Value())))
792	return nullptr;
793	[[fallthrough]];
794	case Instruction::Add:
795	break;
796	}
797
798	Value *Op;
799	// Find ctpop.
800	if (!match(V: I->getOperand(i: `1` - ConstIdx),
801	P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ctpop>(Op0: m_Value(V&: Op)))))
802	return nullptr;
803
804	Constant *C;
805	// Check other operand is ImmConstant.
806	if (!match(V: I->getOperand(i: ConstIdx), P: m_ImmConstant(C)))
807	return nullptr;
808
809	Type *Ty = Op->getType();
810	Constant *BitWidthC = ConstantInt::get(Ty, V: Ty->getScalarSizeInBits());
811	// Need extra check for icmp. Note if this check is true, it generally means
812	// the icmp will simplify to true/false.
813	if (Opc == Instruction::ICmp && !cast<ICmpInst>(Val: I)->isEquality()) {
814	Constant *Cmp =
815	ConstantFoldCompareInstOperands(Predicate: ICmpInst::ICMP_UGT, LHS: C, RHS: BitWidthC, DL);
816	if (!Cmp \|\| !Cmp->isZeroValue())
817	return nullptr;
818	}
819
820	// Check we can invert `(not x)` for free.
821	bool Consumes = false;
822	if (!isFreeToInvert(V: Op, WillInvertAllUses: Op->hasOneUse(), DoesConsume&: Consumes) \|\| !Consumes)
823	return nullptr;
824	Value *NotOp = getFreelyInverted(V: Op, WillInvertAllUses: Op->hasOneUse(), Builder: &Builder);
825	assert(NotOp != nullptr &&
826	"Desync between isFreeToInvert and getFreelyInverted");
827
828	Value *CtpopOfNotOp = Builder.CreateIntrinsic(RetTy: Ty, ID: Intrinsic::ctpop, Args: NotOp);
829
830	Value R = nullptr*;
831
832	// Do the transformation here to avoid potentially introducing an infinite
833	// loop.
834	switch (Opc) {
835	case Instruction::Sub:
836	R = Builder.CreateAdd(LHS: CtpopOfNotOp, RHS: ConstantExpr::getSub(C1: C, C2: BitWidthC));
837	break;
838	case Instruction::Or:
839	case Instruction::Add:
840	R = Builder.CreateSub(LHS: ConstantExpr::getAdd(C1: C, C2: BitWidthC), RHS: CtpopOfNotOp);
841	break;
842	case Instruction::ICmp:
843	R = Builder.CreateICmp(P: cast<ICmpInst>(Val: I)->getSwappedPredicate(),
844	LHS: CtpopOfNotOp, RHS: ConstantExpr::getSub(C1: BitWidthC, C2: C));
845	break;
846	default:
847	llvm_unreachable("Unhandled Opcode");
848	}
849	assert(R != nullptr);
850	return replaceInstUsesWith(I&: *I, V: R);
851	}
852
853	// (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
854	// IFF
855	// 1) the logic_shifts match
856	// 2) either both binops are binops and one is `and` or
857	// BinOp1 is `and`
858	// (logic_shift (inv_logic_shift C1, C), C) == C1 or
859	//
860	// -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
861	//
862	// (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
863	// IFF
864	// 1) the logic_shifts match
865	// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`).
866	//
867	// -> (BinOp (logic_shift (BinOp X, Y)), Mask)
868	//
869	// (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt))
870	// IFF
871	// 1) Binop1 is bitwise logical operator `and`, `or` or `xor`
872	// 2) Binop2 is `not`
873	//
874	// -> (arithmetic_shift Binop1((not X), Y), Amt)
875
876	Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) {
877	const DataLayout &DL = I.getDataLayout();
878	auto IsValidBinOpc = [](unsigned Opc) {
879	switch (Opc) {
880	default:
881	return false;
882	case Instruction::And:
883	case Instruction::Or:
884	case Instruction::Xor:
885	case Instruction::Add:
886	// Skip Sub as we only match constant masks which will canonicalize to use
887	// add.
888	return true;
889	}
890	};
891
892	// Check if we can distribute binop arbitrarily. `add` + `lshr` has extra
893	// constraints.
894	auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2,
895	unsigned ShOpc) {
896	assert(ShOpc != Instruction::AShr);
897	return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) \|\|
898	ShOpc == Instruction::Shl;
899	};
900
901	auto GetInvShift = [](unsigned ShOpc) {
902	assert(ShOpc != Instruction::AShr);
903	return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr;
904	};
905
906	auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2,
907	unsigned ShOpc, Constant *CMask,
908	Constant *CShift) {
909	// If the BinOp1 is `and` we don't need to check the mask.
910	if (BinOpc1 == Instruction::And)
911	return true;
912
913	// For all other possible transfers we need complete distributable
914	// binop/shift (anything but `add` + `lshr`).
915	if (!IsCompletelyDistributable (BinOpc1, BinOpc2, ShOpc))
916	return false;
917
918	// If BinOp2 is `and`, any mask works (this only really helps for non-splat
919	// vecs, otherwise the mask will be simplified and the following check will
920	// handle it).
921	if (BinOpc2 == Instruction::And)
922	return true;
923
924	// Otherwise, need mask that meets the below requirement.
925	// (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
926	Constant *MaskInvShift =
927	ConstantFoldBinaryOpOperands(Opcode: GetInvShift (ShOpc), LHS: CMask, RHS: CShift, DL);
928	return ConstantFoldBinaryOpOperands(Opcode: ShOpc, LHS: MaskInvShift, RHS: CShift, DL) ==
929	CMask;
930	};
931
932	auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * {
933	Constant CMask, CShift;
934	Value X, Y, ShiftedX, Mask, *Shift;
935	if (!match(V: I.getOperand(i_nocapture: ShOpnum),
936	P: m_OneUse(SubPattern: m_Shift(L: m_Value(V&: Y), R: m_Value(V&: Shift)))))
937	return nullptr;
938	if (!match(V: I.getOperand(i_nocapture: `1` - ShOpnum),
939	P: m_BinOp(L: m_Value(V&: ShiftedX), R: m_Value(V&: Mask))))
940	return nullptr;
941
942	if (!match(V: ShiftedX, P: m_OneUse(SubPattern: m_Shift(L: m_Value(V&: X), R: m_Specific(V: Shift)))))
943	return nullptr;
944
945	// Make sure we are matching instruction shifts and not ConstantExpr
946	auto *IY = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: ShOpnum));
947	auto *IX = dyn_cast<Instruction>(Val: ShiftedX);
948	if (!IY \|\| !IX)
949	return nullptr;
950
951	// LHS and RHS need same shift opcode
952	unsigned ShOpc = IY->getOpcode();
953	if (ShOpc != IX->getOpcode())
954	return nullptr;
955
956	// Make sure binop is real instruction and not ConstantExpr
957	auto *BO2 = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: `1` - ShOpnum));
958	if (!BO2)
959	return nullptr;
960
961	unsigned BinOpc = BO2->getOpcode();
962	// Make sure we have valid binops.
963	if (!IsValidBinOpc (I.getOpcode()) \|\| !IsValidBinOpc (BinOpc))
964	return nullptr;
965
966	if (ShOpc == Instruction::AShr) {
967	if (Instruction::isBitwiseLogicOp(Opcode: I.getOpcode()) &&
968	BinOpc == Instruction::Xor && match(V: Mask, P: m_AllOnes())) {
969	Value *NotX = Builder.CreateNot(V: X);
970	Value *NewBinOp = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: Y, RHS: NotX);
971	return BinaryOperator::Create(
972	Op: static_cast<Instruction::BinaryOps>(ShOpc), S1: NewBinOp, S2: Shift);
973	}
974
975	return nullptr;
976	}
977
978	// If BinOp1 == BinOp2 and it's bitwise or shl with add, then just
979	// distribute to drop the shift irrelevant of constants.
980	if (BinOpc == I.getOpcode() &&
981	IsCompletelyDistributable (I.getOpcode(), BinOpc, ShOpc)) {
982	Value *NewBinOp2 = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: X, RHS: Y);
983	Value *NewBinOp1 = Builder.CreateBinOp(
984	Opc: static_cast<Instruction::BinaryOps>(ShOpc), LHS: NewBinOp2, RHS: Shift);
985	return BinaryOperator::Create(Op: I.getOpcode(), S1: NewBinOp1, S2: Mask);
986	}
987
988	// Otherwise we can only distribute by constant shifting the mask, so
989	// ensure we have constants.
990	if (!match(V: Shift, P: m_ImmConstant(C&: CShift)))
991	return nullptr;
992	if (!match(V: Mask, P: m_ImmConstant(C&: CMask)))
993	return nullptr;
994
995	// Check if we can distribute the binops.
996	if (!CanDistributeBinops (I.getOpcode(), BinOpc, ShOpc, CMask, CShift))
997	return nullptr;
998
999	Constant *NewCMask =
1000	ConstantFoldBinaryOpOperands(Opcode: GetInvShift (ShOpc), LHS: CMask, RHS: CShift, DL);
1001	Value *NewBinOp2 = Builder.CreateBinOp(
1002	Opc: static_cast<Instruction::BinaryOps>(BinOpc), LHS: X, RHS: NewCMask);
1003	Value *NewBinOp1 = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: Y, RHS: NewBinOp2);
1004	return BinaryOperator::Create(Op: static_cast<Instruction::BinaryOps>(ShOpc),
1005	S1: NewBinOp1, S2: CShift);
1006	};
1007
1008	if (Instruction *R = MatchBinOp (`0`))
1009	return R;
1010	return MatchBinOp (`1`);
1011	}
1012
1013	// (Binop (zext C), (select C, T, F))
1014	// -> (select C, (binop 1, T), (binop 0, F))
1015	//
1016	// (Binop (sext C), (select C, T, F))
1017	// -> (select C, (binop -1, T), (binop 0, F))
1018	//
1019	// Attempt to simplify binary operations into a select with folded args, when
1020	// one operand of the binop is a select instruction and the other operand is a
1021	// zext/sext extension, whose value is the select condition.
1022	Instruction *
1023	InstCombinerImpl::foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I) {
1024	// TODO: this simplification may be extended to any speculatable instruction,
1025	// not just binops, and would possibly be handled better in FoldOpIntoSelect.
1026	Instruction::BinaryOps Opc = I.getOpcode();
1027	Value LHS = I.getOperand(i_nocapture: `0`), RHS = I.getOperand(i_nocapture: `1`);
1028	Value A, CondVal, TrueVal, FalseVal;
1029	Value *CastOp;
1030
1031	auto MatchSelectAndCast = [&](Value CastOp, Value SelectOp) {
1032	return match(V: CastOp, P: m_ZExtOrSExt(Op: m_Value(V&: A))) &&
1033	A->getType()->getScalarSizeInBits() == `1` &&
1034	match(V: SelectOp, P: m_Select(C: m_Value(V&: CondVal), L: m_Value(V&: TrueVal),
1035	R: m_Value(V&: FalseVal)));
1036	};
1037
1038	// Make sure one side of the binop is a select instruction, and the other is a
1039	// zero/sign extension operating on a i1.
1040	if (MatchSelectAndCast (LHS, RHS))
1041	CastOp = LHS;
1042	else if (MatchSelectAndCast (RHS, LHS))
1043	CastOp = RHS;
1044	else
1045	return nullptr;
1046
1047	auto NewFoldedConst = [&](bool IsTrueArm, Value *V) {
1048	bool IsCastOpRHS = (CastOp == RHS);
1049	bool IsZExt = isa<ZExtInst>(Val: CastOp);
1050	Constant *C;
1051
1052	if (IsTrueArm) {
1053	C = Constant::getNullValue(Ty: V->getType());
1054	} else if (IsZExt) {
1055	unsigned BitWidth = V->getType()->getScalarSizeInBits();
1056	C = Constant::getIntegerValue(Ty: V->getType(), V: APInt (BitWidth, `1`));
1057	} else {
1058	C = Constant::getAllOnesValue(Ty: V->getType());
1059	}
1060
1061	return IsCastOpRHS ? Builder.CreateBinOp(Opc, LHS: V, RHS: C)
1062	: Builder.CreateBinOp(Opc, LHS: C, RHS: V);
1063	};
1064
1065	// If the value used in the zext/sext is the select condition, or the negated
1066	// of the select condition, the binop can be simplified.
1067	if (CondVal == A) {
1068	Value NewTrueVal = NewFoldedConst (false*, TrueVal);
1069	return SelectInst::Create(C: CondVal, S1: NewTrueVal,
1070	S2: NewFoldedConst (true, FalseVal));
1071	}
1072
1073	if (match(V: A, P: m_Not(V: m_Specific(V: CondVal)))) {
1074	Value NewTrueVal = NewFoldedConst (true*, TrueVal);
1075	return SelectInst::Create(C: CondVal, S1: NewTrueVal,
1076	S2: NewFoldedConst (false, FalseVal));
1077	}
1078
1079	return nullptr;
1080	}
1081
1082	Value *InstCombinerImpl::tryFactorizationFolds(BinaryOperator &I) {
1083	Value LHS = I.getOperand(i_nocapture: `0`), RHS = I.getOperand(i_nocapture: `1`);
1084	BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: LHS);
1085	BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: RHS);
1086	Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1087	Value A, B, C, D;
1088	Instruction::BinaryOps LHSOpcode, RHSOpcode;
1089
1090	if (Op0)
1091	LHSOpcode = getBinOpsForFactorization(TopOpcode: TopLevelOpcode, Op: Op0, LHS&: A, RHS&: B, OtherOp: Op1);
1092	if (Op1)
1093	RHSOpcode = getBinOpsForFactorization(TopOpcode: TopLevelOpcode, Op: Op1, LHS&: C, RHS&: D, OtherOp: Op0);
1094
1095	// The instruction has the form "(A op' B) op (C op' D)". Try to factorize
1096	// a common term.
1097	if (Op0 && Op1 && LHSOpcode == RHSOpcode)
1098	if (Value *V = tryFactorization(I, SQ, Builder, InnerOpcode: LHSOpcode, A, B, C, D))
1099	return V;
1100
1101	// The instruction has the form "(A op' B) op (C)". Try to factorize common
1102	// term.
1103	if (Op0)
1104	if (Value *Ident = getIdentityValue(Opcode: LHSOpcode, V: RHS))
1105	if (Value *V =
1106	tryFactorization(I, SQ, Builder, InnerOpcode: LHSOpcode, A, B, C: RHS, D: Ident))
1107	return V;
1108
1109	// The instruction has the form "(B) op (C op' D)". Try to factorize common
1110	// term.
1111	if (Op1)
1112	if (Value *Ident = getIdentityValue(Opcode: RHSOpcode, V: LHS))
1113	if (Value *V =
1114	tryFactorization(I, SQ, Builder, InnerOpcode: RHSOpcode, A: LHS, B: Ident, C, D))
1115	return V;
1116
1117	return nullptr;
1118	}
1119
1120	/// This tries to simplify binary operations which some other binary operation
1121	/// distributes over either by factorizing out common terms
1122	/// (eg "(AB)+(AC)" -> "A(B+C)") or expanding out if this results in*
1123	/// simplifications (eg: "A & (B \| C) -> (A&B) \| (A&C)" if this is a win).
1124	/// Returns the simplified value, or null if it didn't simplify.
1125	Value *InstCombinerImpl::foldUsingDistributiveLaws(BinaryOperator &I) {
1126	Value LHS = I.getOperand(i_nocapture: `0`), RHS = I.getOperand(i_nocapture: `1`);
1127	BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: LHS);
1128	BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: RHS);
1129	Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1130
1131	// Factorization.
1132	if (Value *R = tryFactorizationFolds(I))
1133	return R;
1134
1135	// Expansion.
1136	if (Op0 && rightDistributesOverLeft(LOp: Op0->getOpcode(), ROp: TopLevelOpcode)) {
1137	// The instruction has the form "(A op' B) op C". See if expanding it out
1138	// to "(A op C) op' (B op C)" results in simplifications.
1139	Value A = Op0->getOperand(i_nocapture: `0`), B = Op0->getOperand(i_nocapture: `1`), *C = RHS;
1140	Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
1141
1142	// Disable the use of undef because it's not safe to distribute undef.
1143	auto SQDistributive = SQ.getWithInstruction(I: &I).getWithoutUndef();
1144	Value *L = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQDistributive);
1145	Value *R = simplifyBinOp(Opcode: TopLevelOpcode, LHS: B, RHS: C, Q: SQDistributive);
1146
1147	// Do "A op C" and "B op C" both simplify?
1148	if (L && R) {
1149	// They do! Return "L op' R".
1150	++NumExpand;
1151	C = Builder.CreateBinOp(Opc: InnerOpcode, LHS: L, RHS: R);
1152	C->takeName(V: &I);
1153	return C;
1154	}
1155
1156	// Does "A op C" simplify to the identity value for the inner opcode?
1157	if (L && L == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: L->getType())) {
1158	// They do! Return "B op C".
1159	++NumExpand;
1160	C = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: B, RHS: C);
1161	C->takeName(V: &I);
1162	return C;
1163	}
1164
1165	// Does "B op C" simplify to the identity value for the inner opcode?
1166	if (R && R == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: R->getType())) {
1167	// They do! Return "A op C".
1168	++NumExpand;
1169	C = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C);
1170	C->takeName(V: &I);
1171	return C;
1172	}
1173	}
1174
1175	if (Op1 && leftDistributesOverRight(LOp: TopLevelOpcode, ROp: Op1->getOpcode())) {
1176	// The instruction has the form "A op (B op' C)". See if expanding it out
1177	// to "(A op B) op' (A op C)" results in simplifications.
1178	Value A = LHS, B = Op1->getOperand(i_nocapture: `0`), *C = Op1->getOperand(i_nocapture: `1`);
1179	Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
1180
1181	// Disable the use of undef because it's not safe to distribute undef.
1182	auto SQDistributive = SQ.getWithInstruction(I: &I).getWithoutUndef();
1183	Value *L = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: B, Q: SQDistributive);
1184	Value *R = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQDistributive);
1185
1186	// Do "A op B" and "A op C" both simplify?
1187	if (L && R) {
1188	// They do! Return "L op' R".
1189	++NumExpand;
1190	A = Builder.CreateBinOp(Opc: InnerOpcode, LHS: L, RHS: R);
1191	A->takeName(V: &I);
1192	return A;
1193	}
1194
1195	// Does "A op B" simplify to the identity value for the inner opcode?
1196	if (L && L == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: L->getType())) {
1197	// They do! Return "A op C".
1198	++NumExpand;
1199	A = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C);
1200	A->takeName(V: &I);
1201	return A;
1202	}
1203
1204	// Does "A op C" simplify to the identity value for the inner opcode?
1205	if (R && R == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: R->getType())) {
1206	// They do! Return "A op B".
1207	++NumExpand;
1208	A = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: B);
1209	A->takeName(V: &I);
1210	return A;
1211	}
1212	}
1213
1214	return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
1215	}
1216
1217	static std::optional<std::pair<Value , Value >>
1218	matchSymmetricPhiNodesPair(PHINode LHS, PHINode RHS) {
1219	if (LHS->getParent() != RHS->getParent())
1220	return std::nullopt;
1221
1222	if (LHS->getNumIncomingValues() < `2`)
1223	return std::nullopt;
1224
1225	if (!equal(LRange: LHS->blocks(), RRange: RHS->blocks()))
1226	return std::nullopt;
1227
1228	Value *L0 = LHS->getIncomingValue(i: `0`);
1229	Value *R0 = RHS->getIncomingValue(i: `0`);
1230
1231	for (unsigned I = `1`, E = LHS->getNumIncomingValues(); I != E; ++I) {
1232	Value *L1 = LHS->getIncomingValue(i: I);
1233	Value *R1 = RHS->getIncomingValue(i: I);
1234
1235	if ((L0 == L1 && R0 == R1) \|\| (L0 == R1 && R0 == L1))
1236	continue;
1237
1238	return std::nullopt;
1239	}
1240
1241	return std::optional(std::pair(L0, R0));
1242	}
1243
1244	std::optional<std::pair<Value , Value >>
1245	InstCombinerImpl::matchSymmetricPair(Value LHS, Value RHS) {
1246	Instruction *LHSInst = dyn_cast<Instruction>(Val: LHS);
1247	Instruction *RHSInst = dyn_cast<Instruction>(Val: RHS);
1248	if (!LHSInst \|\| !RHSInst \|\| LHSInst->getOpcode() != RHSInst->getOpcode())
1249	return std::nullopt;
1250	switch (LHSInst->getOpcode()) {
1251	case Instruction::PHI:
1252	return matchSymmetricPhiNodesPair(LHS: cast<PHINode>(Val: LHS), RHS: cast<PHINode>(Val: RHS));
1253	case Instruction::Select: {
1254	Value *Cond = LHSInst->getOperand(i: `0`);
1255	Value *TrueVal = LHSInst->getOperand(i: `1`);
1256	Value *FalseVal = LHSInst->getOperand(i: `2`);
1257	if (Cond == RHSInst->getOperand(i: `0`) && TrueVal == RHSInst->getOperand(i: `2`) &&
1258	FalseVal == RHSInst->getOperand(i: `1`))
1259	return std::pair(TrueVal, FalseVal);
1260	return std::nullopt;
1261	}
1262	case Instruction::Call: {
1263	// Match min(a, b) and max(a, b)
1264	MinMaxIntrinsic *LHSMinMax = dyn_cast<MinMaxIntrinsic>(Val: LHSInst);
1265	MinMaxIntrinsic *RHSMinMax = dyn_cast<MinMaxIntrinsic>(Val: RHSInst);
1266	if (LHSMinMax && RHSMinMax &&
1267	LHSMinMax->getPredicate() ==
1268	ICmpInst::getSwappedPredicate(pred: RHSMinMax->getPredicate()) &&
1269	((LHSMinMax->getLHS() == RHSMinMax->getLHS() &&
1270	LHSMinMax->getRHS() == RHSMinMax->getRHS()) \|\|
1271	(LHSMinMax->getLHS() == RHSMinMax->getRHS() &&
1272	LHSMinMax->getRHS() == RHSMinMax->getLHS())))
1273	return std::pair(LHSMinMax->getLHS(), LHSMinMax->getRHS());
1274	return std::nullopt;
1275	}
1276	default:
1277	return std::nullopt;
1278	}
1279	}
1280
1281	Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
1282	Value *LHS,
1283	Value *RHS) {
1284	Value A, B, C, D, E, F;
1285	bool LHSIsSelect = match(V: LHS, P: m_Select(C: m_Value(V&: A), L: m_Value(V&: B), R: m_Value(V&: C)));
1286	bool RHSIsSelect = match(V: RHS, P: m_Select(C: m_Value(V&: D), L: m_Value(V&: E), R: m_Value(V&: F)));
1287	if (!LHSIsSelect && !RHSIsSelect)
1288	return nullptr;
1289
1290	FastMathFlags FMF;
1291	BuilderTy::FastMathFlagGuard Guard(Builder);
1292	if (isa<FPMathOperator>(Val: &I)) {
1293	FMF = I.getFastMathFlags();
1294	Builder.setFastMathFlags(FMF);
1295	}
1296
1297	Instruction::BinaryOps Opcode = I.getOpcode();
1298	SimplifyQuery Q = SQ.getWithInstruction(I: &I);
1299
1300	Value Cond, True = nullptr, False = nullptr*;
1301
1302	// Special-case for add/negate combination. Replace the zero in the negation
1303	// with the trailing add operand:
1304	// (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
1305	// (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
1306	auto foldAddNegate = [&](Value TVal, Value FVal, Value Z) -> Value {
1307	// We need an 'add' and exactly 1 arm of the select to have been simplified.
1308	if (Opcode != Instruction::Add \|\| (!True && !False) \|\| (True && False))
1309	return nullptr;
1310
1311	Value *N;
1312	if (True && match(V: FVal, P: m_Neg(V: m_Value(V&: N)))) {
1313	Value *Sub = Builder.CreateSub(LHS: Z, RHS: N);
1314	return Builder.CreateSelect(C: Cond, True, False: Sub, Name: I.getName());
1315	}
1316	if (False && match(V: TVal, P: m_Neg(V: m_Value(V&: N)))) {
1317	Value *Sub = Builder.CreateSub(LHS: Z, RHS: N);
1318	return Builder.CreateSelect(C: Cond, True: Sub, False, Name: I.getName());
1319	}
1320	return nullptr;
1321	};
1322
1323	if (LHSIsSelect && RHSIsSelect && A == D) {
1324	// (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
1325	Cond = A;
1326	True = simplifyBinOp(Opcode, LHS: B, RHS: E, FMF, Q);
1327	False = simplifyBinOp(Opcode, LHS: C, RHS: F, FMF, Q);
1328
1329	if (LHS->hasOneUse() && RHS->hasOneUse()) {
1330	if (False && !True)
1331	True = Builder.CreateBinOp(Opc: Opcode, LHS: B, RHS: E);
1332	else if (True && !False)
1333	False = Builder.CreateBinOp(Opc: Opcode, LHS: C, RHS: F);
1334	}
1335	} else if (LHSIsSelect && LHS->hasOneUse()) {
1336	// (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
1337	Cond = A;
1338	True = simplifyBinOp(Opcode, LHS: B, RHS, FMF, Q);
1339	False = simplifyBinOp(Opcode, LHS: C, RHS, FMF, Q);
1340	if (Value *NewSel = foldAddNegate (B, C, RHS))
1341	return NewSel;
1342	} else if (RHSIsSelect && RHS->hasOneUse()) {
1343	// X op (D ? E : F) -> D ? (X op E) : (X op F)
1344	Cond = D;
1345	True = simplifyBinOp(Opcode, LHS, RHS: E, FMF, Q);
1346	False = simplifyBinOp(Opcode, LHS, RHS: F, FMF, Q);
1347	if (Value *NewSel = foldAddNegate (E, F, LHS))
1348	return NewSel;
1349	}
1350
1351	if (!True \|\| !False)
1352	return nullptr;
1353
1354	Value *SI = Builder.CreateSelect(C: Cond, True, False);
1355	SI->takeName(V: &I);
1356	return SI;
1357	}
1358
1359	/// Freely adapt every user of V as-if V was changed to !V.
1360	/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
1361	void InstCombinerImpl::freelyInvertAllUsersOf(Value I, Value IgnoredUser) {
1362	assert(!isa<Constant>(I) && "Shouldn't invert users of constant");
1363	for (User *U : make_early_inc_range(Range: I->users())) {
1364	if (U == IgnoredUser)
1365	continue; // Don't consider this user.
1366	switch (cast<Instruction>(Val: U)->getOpcode()) {
1367	case Instruction::Select: {
1368	auto *SI = cast<SelectInst>(Val: U);
1369	SI->swapValues();
1370	SI->swapProfMetadata();
1371	break;
1372	}
1373	case Instruction::Br: {
1374	BranchInst *BI = cast<BranchInst>(Val: U);
1375	BI->swapSuccessors(); // swaps prof metadata too
1376	if (BPI)
1377	BPI->swapSuccEdgesProbabilities(Src: BI->getParent());
1378	break;
1379	}
1380	case Instruction::Xor:
1381	replaceInstUsesWith(I&: cast<Instruction>(Val&: *U), V: I);
1382	// Add to worklist for DCE.
1383	addToWorklist(I: cast<Instruction>(Val: U));
1384	break;
1385	default:
1386	llvm_unreachable("Got unexpected user - out of sync with "
1387	"canFreelyInvertAllUsersOf() ?");
1388	}
1389	}
1390	}
1391
1392	/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
1393	/// constant zero (which is the 'negate' form).
1394	Value InstCombinerImpl::dyn_castNegVal(Value V) const {
1395	Value *NegV;
1396	if (match(V, P: m_Neg(V: m_Value(V&: NegV))))
1397	return NegV;
1398
1399	// Constants can be considered to be negated values if they can be folded.
1400	if (ConstantInt *C = dyn_cast<ConstantInt>(Val: V))
1401	return ConstantExpr::getNeg(C);
1402
1403	if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(Val: V))
1404	if (C->getType()->getElementType()->isIntegerTy())
1405	return ConstantExpr::getNeg(C);
1406
1407	if (ConstantVector *CV = dyn_cast<ConstantVector>(Val: V)) {
1408	for (unsigned i = `0`, e = CV->getNumOperands(); i != e; ++i) {
1409	Constant *Elt = CV->getAggregateElement(Elt: i);
1410	if (!Elt)
1411	return nullptr;
1412
1413	if (isa<UndefValue>(Val: Elt))
1414	continue;
1415
1416	if (!isa<ConstantInt>(Val: Elt))
1417	return nullptr;
1418	}
1419	return ConstantExpr::getNeg(C: CV);
1420	}
1421
1422	// Negate integer vector splats.
1423	if (auto *CV = dyn_cast<Constant>(Val: V))
1424	if (CV->getType()->isVectorTy() &&
1425	CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
1426	return ConstantExpr::getNeg(C: CV);
1427
1428	return nullptr;
1429	}
1430
1431	// Try to fold:
1432	// 1) (fp_binop ({s\|u}itofp x), ({s\|u}itofp y))
1433	// -> ({s\|u}itofp (int_binop x, y))
1434	// 2) (fp_binop ({s\|u}itofp x), FpC)
1435	// -> ({s\|u}itofp (int_binop x, (fpto{s\|u}i FpC)))
1436	//
1437	// Assuming the sign of the cast for x/y is `OpsFromSigned`.
1438	Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
1439	BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, `2`> IntOps,
1440	Constant Op1FpC, SmallVectorImpl<WithCache<const* Value *>> &OpsKnown) {
1441
1442	Type *FPTy = BO.getType();
1443	Type *IntTy = IntOps [`0`]->getType();
1444
1445	unsigned IntSz = IntTy->getScalarSizeInBits();
1446	// This is the maximum number of inuse bits by the integer where the int -> fp
1447	// casts are exact.
1448	unsigned MaxRepresentableBits =
1449	APFloat::semanticsPrecision(FPTy->getScalarType()->getFltSemantics());
1450
1451	// Preserve known number of leading bits. This can allow us to trivial nsw/nuw
1452	// checks later on.
1453	unsigned NumUsedLeadingBits[`2`] = {IntSz, IntSz};
1454
1455	// NB: This only comes up if OpsFromSigned is true, so there is no need to
1456	// cache if between calls to `foldFBinOpOfIntCastsFromSign`.
1457	auto IsNonZero = [&](unsigned OpNo) -> bool {
1458	if (OpsKnown [OpNo].hasKnownBits() &&
1459	OpsKnown [OpNo].getKnownBits(Q: SQ).isNonZero())
1460	return true;
1461	return isKnownNonZero(V: IntOps [OpNo], Q: SQ);
1462	};
1463
1464	auto IsNonNeg = [&](unsigned OpNo) -> bool {
1465	// NB: This matches the impl in ValueTracking, we just try to use cached
1466	// knownbits here. If we ever start supporting WithCache for
1467	// `isKnownNonNegative`, change this to an explicit call.
1468	return OpsKnown [OpNo].getKnownBits(Q: SQ).isNonNegative();
1469	};
1470
1471	// Check if we know for certain that ({s\|u}itofp op) is exact.
1472	auto IsValidPromotion = [&](unsigned OpNo) -> bool {
1473	// Can we treat this operand as the desired sign?
1474	if (OpsFromSigned != isa<SIToFPInst>(Val: BO.getOperand(i_nocapture: OpNo)) &&
1475	!IsNonNeg (OpNo))
1476	return false;
1477
1478	// If fp precision >= bitwidth(op) then its exact.
1479	// NB: This is slightly conservative for `sitofp`. For signed conversion, we
1480	// can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be
1481	// handled specially. We can't, however, increase the bound arbitrarily for
1482	// `sitofp` as for larger sizes, it won't sign extend.
1483	if (MaxRepresentableBits < IntSz) {
1484	// Otherwise if its signed cast check that fp precisions >= bitwidth(op) -
1485	// numSignBits(op).
1486	// TODO: If we add support for `WithCache` in `ComputeNumSignBits`, change
1487	// `IntOps[OpNo]` arguments to `KnownOps[OpNo]`.
1488	if (OpsFromSigned)
1489	NumUsedLeadingBits[OpNo] = IntSz - ComputeNumSignBits(Op: IntOps [OpNo]);
1490	// Finally for unsigned check that fp precision >= bitwidth(op) -
1491	// numLeadingZeros(op).
1492	else {
1493	NumUsedLeadingBits[OpNo] =
1494	IntSz - OpsKnown [OpNo].getKnownBits(Q: SQ).countMinLeadingZeros();
1495	}
1496	}
1497	// NB: We could also check if op is known to be a power of 2 or zero (which
1498	// will always be representable). Its unlikely, however, that is we are
1499	// unable to bound op in any way we will be able to pass the overflow checks
1500	// later on.
1501
1502	if (MaxRepresentableBits < NumUsedLeadingBits[OpNo])
1503	return false;
1504	// Signed + Mul also requires that op is non-zero to avoid -0 cases.
1505	return !OpsFromSigned \|\| BO.getOpcode() != Instruction::FMul \|\|
1506	IsNonZero (OpNo);
1507	};
1508
1509	// If we have a constant rhs, see if we can losslessly convert it to an int.
1510	if (Op1FpC != nullptr) {
1511	// Signed + Mul req non-zero
1512	if (OpsFromSigned && BO.getOpcode() == Instruction::FMul &&
1513	!match(V: Op1FpC, P: m_NonZeroFP()))
1514	return nullptr;
1515
1516	Constant *Op1IntC = ConstantFoldCastOperand(
1517	Opcode: OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, C: Op1FpC,
1518	DestTy: IntTy, DL);
1519	if (Op1IntC == nullptr)
1520	return nullptr;
1521	if (ConstantFoldCastOperand(Opcode: OpsFromSigned ? Instruction::SIToFP
1522	: Instruction::UIToFP,
1523	C: Op1IntC, DestTy: FPTy, DL) != Op1FpC)
1524	return nullptr;
1525
1526	// First try to keep sign of cast the same.
1527	IntOps [`1`] = Op1IntC;
1528	}
1529
1530	// Ensure lhs/rhs integer types match.
1531	if (IntTy != IntOps [`1`]->getType())
1532	return nullptr;
1533
1534	if (Op1FpC == nullptr) {
1535	if (!IsValidPromotion (`1`))
1536	return nullptr;
1537	}
1538	if (!IsValidPromotion (`0`))
1539	return nullptr;
1540
1541	// Final we check if the integer version of the binop will not overflow.
1542	BinaryOperator::BinaryOps IntOpc;
1543	// Because of the precision check, we can often rule out overflows.
1544	bool NeedsOverflowCheck = true;
1545	// Try to conservatively rule out overflow based on the already done precision
1546	// checks.
1547	unsigned OverflowMaxOutputBits = OpsFromSigned ? `2` : `1`;
1548	unsigned OverflowMaxCurBits =
1549	std::max(a: NumUsedLeadingBits[`0`], b: NumUsedLeadingBits[`1`]);
1550	bool OutputSigned = OpsFromSigned;
1551	switch (BO.getOpcode()) {
1552	case Instruction::FAdd:
1553	IntOpc = Instruction::Add;
1554	OverflowMaxOutputBits += OverflowMaxCurBits;
1555	break;
1556	case Instruction::FSub:
1557	IntOpc = Instruction::Sub;
1558	OverflowMaxOutputBits += OverflowMaxCurBits;
1559	break;
1560	case Instruction::FMul:
1561	IntOpc = Instruction::Mul;
1562	OverflowMaxOutputBits += OverflowMaxCurBits * `2`;
1563	break;
1564	default:
1565	llvm_unreachable("Unsupported binop");
1566	}
1567	// The precision check may have already ruled out overflow.
1568	if (OverflowMaxOutputBits < IntSz) {
1569	NeedsOverflowCheck = false;
1570	// We can bound unsigned overflow from sub to in range signed value (this is
1571	// what allows us to avoid the overflow check for sub).
1572	if (IntOpc == Instruction::Sub)
1573	OutputSigned = true;
1574	}
1575
1576	// Precision check did not rule out overflow, so need to check.
1577	// TODO: If we add support for `WithCache` in `willNotOverflow`, change
1578	// `IntOps[...]` arguments to `KnownOps[...]`.
1579	if (NeedsOverflowCheck &&
1580	!willNotOverflow(Opcode: IntOpc, LHS: IntOps [`0`], RHS: IntOps [`1`], CxtI: BO, IsSigned: OutputSigned))
1581	return nullptr;
1582
1583	Value *IntBinOp = Builder.CreateBinOp(Opc: IntOpc, LHS: IntOps [`0`], RHS: IntOps [`1`]);
1584	if (auto *IntBO = dyn_cast<BinaryOperator>(Val: IntBinOp)) {
1585	IntBO->setHasNoSignedWrap(OutputSigned);
1586	IntBO->setHasNoUnsignedWrap(!OutputSigned);
1587	}
1588	if (OutputSigned)
1589	return new SIToFPInst (IntBinOp, FPTy);
1590	return new UIToFPInst (IntBinOp, FPTy);
1591	}
1592
1593	// Try to fold:
1594	// 1) (fp_binop ({s\|u}itofp x), ({s\|u}itofp y))
1595	// -> ({s\|u}itofp (int_binop x, y))
1596	// 2) (fp_binop ({s\|u}itofp x), FpC)
1597	// -> ({s\|u}itofp (int_binop x, (fpto{s\|u}i FpC)))
1598	Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
1599	std::array<Value , `2`> IntOps = {nullptr, nullptr*};
1600	Constant Op1FpC = nullptr*;
1601	// Check for:
1602	// 1) (binop ({s\|u}itofp x), ({s\|u}itofp y))
1603	// 2) (binop ({s\|u}itofp x), FpC)
1604	if (!match(V: BO.getOperand(i_nocapture: `0`), P: m_SIToFP(Op: m_Value(V&: IntOps [`0`]))) &&
1605	!match(V: BO.getOperand(i_nocapture: `0`), P: m_UIToFP(Op: m_Value(V&: IntOps [`0`]))))
1606	return nullptr;
1607
1608	if (!match(V: BO.getOperand(i_nocapture: `1`), P: m_Constant(C&: Op1FpC)) &&
1609	!match(V: BO.getOperand(i_nocapture: `1`), P: m_SIToFP(Op: m_Value(V&: IntOps [`1`]))) &&
1610	!match(V: BO.getOperand(i_nocapture: `1`), P: m_UIToFP(Op: m_Value(V&: IntOps [`1`]))))
1611	return nullptr;
1612
1613	// Cache KnownBits a bit to potentially save some analysis.
1614	SmallVector<WithCache<const Value *>, `2`> OpsKnown = {IntOps [`0`], IntOps [`1`]};
1615
1616	// Try treating x/y as coming from both `uitofp` and `sitofp`. There are
1617	// different constraints depending on the sign of the cast.
1618	// NB: `(uitofp nneg X)` == `(sitofp nneg X)`.
1619	if (Instruction R = foldFBinOpOfIntCastsFromSign(BO, /OpsFromSigned=/*false,
1620	IntOps, Op1FpC, OpsKnown))
1621	return R;
1622	return foldFBinOpOfIntCastsFromSign(BO, /OpsFromSigned=/true, IntOps,
1623	Op1FpC, OpsKnown);
1624	}
1625
1626	/// A binop with a constant operand and a sign-extended boolean operand may be
1627	/// converted into a select of constants by applying the binary operation to
1628	/// the constant with the two possible values of the extended boolean (0 or -1).
1629	Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1630	// TODO: Handle non-commutative binop (constant is operand 0).
1631	// TODO: Handle zext.
1632	// TODO: Peek through 'not' of cast.
1633	Value *BO0 = BO.getOperand(i_nocapture: `0`);
1634	Value *BO1 = BO.getOperand(i_nocapture: `1`);
1635	Value *X;
1636	Constant *C;
1637	if (!match(V: BO0, P: m_SExt(Op: m_Value(V&: X))) \|\| !match(V: BO1, P: m_ImmConstant(C)) \|\|
1638	!X->getType()->isIntOrIntVectorTy(BitWidth: `1`))
1639	return nullptr;
1640
1641	// bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1642	Constant *Ones = ConstantInt::getAllOnesValue(Ty: BO.getType());
1643	Constant *Zero = ConstantInt::getNullValue(Ty: BO.getType());
1644	Value *TVal = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Ones, RHS: C);
1645	Value *FVal = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Zero, RHS: C);
1646	return SelectInst::Create(C: X, S1: TVal, S2: FVal);
1647	}
1648
1649	static Constant *constantFoldOperationIntoSelectOperand(Instruction &I,
1650	SelectInst *SI,
1651	bool IsTrueArm) {
1652	SmallVector<Constant *> ConstOps;
1653	for (Value *Op : I.operands()) {
1654	CmpInst::Predicate Pred;
1655	Constant C = nullptr*;
1656	if (Op == SI) {
1657	C = dyn_cast<Constant>(Val: IsTrueArm ? SI->getTrueValue()
1658	: SI->getFalseValue());
1659	} else if (match(V: SI->getCondition(),
1660	P: m_ICmp(Pred, L: m_Specific(V: Op), R: m_Constant(C))) &&
1661	Pred == (IsTrueArm ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) &&
1662	isGuaranteedNotToBeUndefOrPoison(V: C)) {
1663	// Pass
1664	} else {
1665	C = dyn_cast<Constant>(Val: Op);
1666	}
1667	if (C == nullptr)
1668	return nullptr;
1669
1670	ConstOps.push_back(Elt: C);
1671	}
1672
1673	return ConstantFoldInstOperands(I: &I, Ops: ConstOps, DL: I.getDataLayout());
1674	}
1675
1676	static Value foldOperationIntoSelectOperand(Instruction &I, SelectInst SI,
1677	Value *NewOp, InstCombiner &IC) {
1678	Instruction *Clone = I.clone();
1679	Clone->replaceUsesOfWith(From: SI, To: NewOp);
1680	Clone->dropUBImplyingAttrsAndMetadata();
1681	IC.InsertNewInstBefore(New: Clone, Old: SI->getIterator());
1682	return Clone;
1683	}
1684
1685	Instruction InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst SI,
1686	bool FoldWithMultiUse) {
1687	// Don't modify shared select instructions unless set FoldWithMultiUse
1688	if (!SI->hasOneUse() && !FoldWithMultiUse)
1689	return nullptr;
1690
1691	Value *TV = SI->getTrueValue();
1692	Value *FV = SI->getFalseValue();
1693	if (!(isa<Constant>(Val: TV) \|\| isa<Constant>(Val: FV)))
1694	return nullptr;
1695
1696	// Bool selects with constant operands can be folded to logical ops.
1697	if (SI->getType()->isIntOrIntVectorTy(BitWidth: `1`))
1698	return nullptr;
1699
1700	// Test if a FCmpInst instruction is used exclusively by a select as
1701	// part of a minimum or maximum operation. If so, refrain from doing
1702	// any other folding. This helps out other analyses which understand
1703	// non-obfuscated minimum and maximum idioms. And in this case, at
1704	// least one of the comparison operands has at least one user besides
1705	// the compare (the select), which would often largely negate the
1706	// benefit of folding anyway.
1707	if (auto *CI = dyn_cast<FCmpInst>(Val: SI->getCondition())) {
1708	if (CI->hasOneUse()) {
1709	Value Op0 = CI->getOperand(i_nocapture: `0`), Op1 = CI->getOperand(i_nocapture: `1`);
1710	if ((TV == Op0 && FV == Op1) \|\| (FV == Op0 && TV == Op1))
1711	return nullptr;
1712	}
1713	}
1714
1715	// Make sure that one of the select arms constant folds successfully.
1716	Value NewTV = constantFoldOperationIntoSelectOperand(I&: Op, SI, /IsTrueArm/* true);
1717	Value NewFV = constantFoldOperationIntoSelectOperand(I&: Op, SI, /IsTrueArm/* false);
1718	if (!NewTV && !NewFV)
1719	return nullptr;
1720
1721	// Create an instruction for the arm that did not fold.
1722	if (!NewTV)
1723	NewTV = foldOperationIntoSelectOperand(I&: Op, SI, NewOp: TV, IC&: *this);
1724	if (!NewFV)
1725	NewFV = foldOperationIntoSelectOperand(I&: Op, SI, NewOp: FV, IC&: *this);
1726	return SelectInst::Create(C: SI->getCondition(), S1: NewTV, S2: NewFV, NameStr: "", InsertBefore: nullptr, MDFrom: SI);
1727	}
1728
1729	static Value simplifyInstructionWithPHI(Instruction &I, PHINode PN,
1730	Value InValue, BasicBlock InBB,
1731	const DataLayout &DL,
1732	const SimplifyQuery SQ) {
1733	// NB: It is a precondition of this transform that the operands be
1734	// phi translatable! This is usually trivially satisfied by limiting it
1735	// to constant ops, and for selects we do a more sophisticated check.
1736	SmallVector<Value *> Ops;
1737	for (Value *Op : I.operands()) {
1738	if (Op == PN)
1739	Ops.push_back(Elt: InValue);
1740	else
1741	Ops.push_back(Elt: Op->DoPHITranslation(CurBB: PN->getParent(), PredBB: InBB));
1742	}
1743
1744	// Don't consider the simplification successful if we get back a constant
1745	// expression. That's just an instruction in hiding.
1746	// Also reject the case where we simplify back to the phi node. We wouldn't
1747	// be able to remove it in that case.
1748	Value *NewVal = simplifyInstructionWithOperands(
1749	I: &I, NewOps: Ops, Q: SQ.getWithInstruction(I: InBB->getTerminator()));
1750	if (NewVal && NewVal != PN && !match(V: NewVal, P: m_ConstantExpr()))
1751	return NewVal;
1752
1753	// Check if incoming PHI value can be replaced with constant
1754	// based on implied condition.
1755	BranchInst *TerminatorBI = dyn_cast<BranchInst>(Val: InBB->getTerminator());
1756	const ICmpInst *ICmp = dyn_cast<ICmpInst>(Val: &I);
1757	if (TerminatorBI && TerminatorBI->isConditional() &&
1758	TerminatorBI->getSuccessor(i: `0`) != TerminatorBI->getSuccessor(i: `1`) && ICmp) {
1759	bool LHSIsTrue = TerminatorBI->getSuccessor(i: `0`) == PN->getParent();
1760	std::optional<bool> ImpliedCond =
1761	isImpliedCondition(LHS: TerminatorBI->getCondition(), RHSPred: ICmp->getPredicate(),
1762	RHSOp0: Ops [`0`], RHSOp1: Ops [`1`], DL, LHSIsTrue);
1763	if (ImpliedCond)
1764	return ConstantInt::getBool(Ty: I.getType(), V: ImpliedCond.value());
1765	}
1766
1767	return nullptr;
1768	}
1769
1770	Instruction InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode PN) {
1771	unsigned NumPHIValues = PN->getNumIncomingValues();
1772	if (NumPHIValues == `0`)
1773	return nullptr;
1774
1775	// We normally only transform phis with a single use. However, if a PHI has
1776	// multiple uses and they are all the same operation, we can fold all* of the*
1777	// uses into the PHI.
1778	if (!PN->hasOneUse()) {
1779	// Walk the use list for the instruction, comparing them to I.
1780	for (User *U : PN->users()) {
1781	Instruction *UI = cast<Instruction>(Val: U);
1782	if (UI != &I && !I.isIdenticalTo(I: UI))
1783	return nullptr;
1784	}
1785	// Otherwise, we can replace all* users with the new PHI we form.*
1786	}
1787
1788	// Check to see whether the instruction can be folded into each phi operand.
1789	// If there is one operand that does not fold, remember the BB it is in.
1790	// If there is more than one or if it* is a PHI, bail out.*
1791	SmallVector<Value *> NewPhiValues;
1792	BasicBlock NonSimplifiedBB = nullptr*;
1793	Value NonSimplifiedInVal = nullptr*;
1794	for (unsigned i = `0`; i != NumPHIValues; ++i) {
1795	Value *InVal = PN->getIncomingValue(i);
1796	BasicBlock *InBB = PN->getIncomingBlock(i);
1797
1798	if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InValue: InVal, InBB, DL, SQ)) {
1799	NewPhiValues.push_back(Elt: NewVal);
1800	continue;
1801	}
1802
1803	if (NonSimplifiedBB) return nullptr; // More than one non-simplified value.
1804
1805	NonSimplifiedBB = InBB;
1806	NonSimplifiedInVal = InVal;
1807	NewPhiValues.push_back(Elt: nullptr);
1808
1809	// If the InVal is an invoke at the end of the pred block, then we can't
1810	// insert a computation after it without breaking the edge.
1811	if (isa<InvokeInst>(Val: InVal))
1812	if (cast<Instruction>(Val: InVal)->getParent() == NonSimplifiedBB)
1813	return nullptr;
1814
1815	// If the incoming non-constant value is reachable from the phis block,
1816	// we'll push the operation across a loop backedge. This could result in
1817	// an infinite combine loop, and is generally non-profitable (especially
1818	// if the operation was originally outside the loop).
1819	if (isPotentiallyReachable(From: PN->getParent(), To: NonSimplifiedBB, ExclusionSet: nullptr, DT: &DT,
1820	LI))
1821	return nullptr;
1822	}
1823
1824	// If there is exactly one non-simplified value, we can insert a copy of the
1825	// operation in that block. However, if this is a critical edge, we would be
1826	// inserting the computation on some other paths (e.g. inside a loop). Only
1827	// do this if the pred block is unconditionally branching into the phi block.
1828	// Also, make sure that the pred block is not dead code.
1829	if (NonSimplifiedBB != nullptr) {
1830	BranchInst *BI = dyn_cast<BranchInst>(Val: NonSimplifiedBB->getTerminator());
1831	if (!BI \|\| !BI->isUnconditional() \|\|
1832	!DT.isReachableFromEntry(A: NonSimplifiedBB))
1833	return nullptr;
1834	}
1835
1836	// Okay, we can do the transformation: create the new PHI node.
1837	PHINode *NewPN = PHINode::Create(Ty: I.getType(), NumReservedValues: PN->getNumIncomingValues());
1838	InsertNewInstBefore(New: NewPN, Old: PN->getIterator());
1839	NewPN->takeName(V: PN);
1840	NewPN->setDebugLoc(PN->getDebugLoc());
1841
1842	// If we are going to have to insert a new computation, do so right before the
1843	// predecessor's terminator.
1844	Instruction Clone = nullptr*;
1845	if (NonSimplifiedBB) {
1846	Clone = I.clone();
1847	for (Use &U : Clone->operands()) {
1848	if (U == PN)
1849	U = NonSimplifiedInVal;
1850	else
1851	U = U ->DoPHITranslation(CurBB: PN->getParent(), PredBB: NonSimplifiedBB);
1852	}
1853	InsertNewInstBefore(New: Clone, Old: NonSimplifiedBB->getTerminator()->getIterator());
1854	}
1855
1856	for (unsigned i = `0`; i != NumPHIValues; ++i) {
1857	if (NewPhiValues [i])
1858	NewPN->addIncoming(V: NewPhiValues [i], BB: PN->getIncomingBlock(i));
1859	else
1860	NewPN->addIncoming(V: Clone, BB: PN->getIncomingBlock(i));
1861	}
1862
1863	for (User *U : make_early_inc_range(Range: PN->users())) {
1864	Instruction *User = cast<Instruction>(Val: U);
1865	if (User == &I) continue;
1866	replaceInstUsesWith(I&: *User, V: NewPN);
1867	eraseInstFromFunction(I&: *User);
1868	}
1869
1870	replaceAllDbgUsesWith(From&: const_cast<PHINode &>(*PN),
1871	To&: const_cast<PHINode &>(*NewPN),
1872	DomPoint&: const_cast<PHINode &>(*PN), DT);
1873	return replaceInstUsesWith(I, V: NewPN);
1874	}
1875
1876	Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) {
1877	// TODO: This should be similar to the incoming values check in foldOpIntoPhi:
1878	// we are guarding against replicating the binop in >1 predecessor.
1879	// This could miss matching a phi with 2 constant incoming values.
1880	auto *Phi0 = dyn_cast<PHINode>(Val: BO.getOperand(i_nocapture: `0`));
1881	auto *Phi1 = dyn_cast<PHINode>(Val: BO.getOperand(i_nocapture: `1`));
1882	if (!Phi0 \|\| !Phi1 \|\| !Phi0->hasOneUse() \|\| !Phi1->hasOneUse() \|\|
1883	Phi0->getNumOperands() != Phi1->getNumOperands())
1884	return nullptr;
1885
1886	// TODO: Remove the restriction for binop being in the same block as the phis.
1887	if (BO.getParent() != Phi0->getParent() \|\|
1888	BO.getParent() != Phi1->getParent())
1889	return nullptr;
1890
1891	// Fold if there is at least one specific constant value in phi0 or phi1's
1892	// incoming values that comes from the same block and this specific constant
1893	// value can be used to do optimization for specific binary operator.
1894	// For example:
1895	// %phi0 = phi i32 [0, %bb0], [%i, %bb1]
1896	// %phi1 = phi i32 [%j, %bb0], [0, %bb1]
1897	// %add = add i32 %phi0, %phi1
1898	// ==>
1899	// %add = phi i32 [%j, %bb0], [%i, %bb1]
1900	Constant *C = ConstantExpr::getBinOpIdentity(Opcode: BO.getOpcode(), Ty: BO.getType(),
1901	/AllowRHSConstant/ false);
1902	if (C) {
1903	SmallVector<Value *, `4`> NewIncomingValues;
1904	auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) {
1905	auto &Phi0Use = std::get<`0`>(t&: T);
1906	auto &Phi1Use = std::get<`1`>(t&: T);
1907	if (Phi0->getIncomingBlock(U: Phi0Use) != Phi1->getIncomingBlock(U: Phi1Use))
1908	return false;
1909	Value *Phi0UseV = Phi0Use.get();
1910	Value *Phi1UseV = Phi1Use.get();
1911	if (Phi0UseV == C)
1912	NewIncomingValues.push_back(Elt: Phi1UseV);
1913	else if (Phi1UseV == C)
1914	NewIncomingValues.push_back(Elt: Phi0UseV);
1915	else
1916	return false;
1917	return true;
1918	};
1919
1920	if (all_of(Range: zip(t: Phi0->operands(), u: Phi1->operands()),
1921	P: CanFoldIncomingValuePair)) {
1922	PHINode *NewPhi =
1923	PHINode::Create(Ty: Phi0->getType(), NumReservedValues: Phi0->getNumOperands());
1924	assert(NewIncomingValues.size() == Phi0->getNumOperands() &&
1925	"The number of collected incoming values should equal the number "
1926	"of the original PHINode operands!");
1927	for (unsigned I = `0`; I < Phi0->getNumOperands(); I++)
1928	NewPhi->addIncoming(V: NewIncomingValues [I], BB: Phi0->getIncomingBlock(i: I));
1929	return NewPhi;
1930	}
1931	}
1932
1933	if (Phi0->getNumOperands() != `2` \|\| Phi1->getNumOperands() != `2`)
1934	return nullptr;
1935
1936	// Match a pair of incoming constants for one of the predecessor blocks.
1937	BasicBlock ConstBB, OtherBB;
1938	Constant C0, C1;
1939	if (match(V: Phi0->getIncomingValue(i: `0`), P: m_ImmConstant(C&: C0))) {
1940	ConstBB = Phi0->getIncomingBlock(i: `0`);
1941	OtherBB = Phi0->getIncomingBlock(i: `1`);
1942	} else if (match(V: Phi0->getIncomingValue(i: `1`), P: m_ImmConstant(C&: C0))) {
1943	ConstBB = Phi0->getIncomingBlock(i: `1`);
1944	OtherBB = Phi0->getIncomingBlock(i: `0`);
1945	} else {
1946	return nullptr;
1947	}
1948	if (!match(V: Phi1->getIncomingValueForBlock(BB: ConstBB), P: m_ImmConstant(C&: C1)))
1949	return nullptr;
1950
1951	// The block that we are hoisting to must reach here unconditionally.
1952	// Otherwise, we could be speculatively executing an expensive or
1953	// non-speculative op.
1954	auto *PredBlockBranch = dyn_cast<BranchInst>(Val: OtherBB->getTerminator());
1955	if (!PredBlockBranch \|\| PredBlockBranch->isConditional() \|\|
1956	!DT.isReachableFromEntry(A: OtherBB))
1957	return nullptr;
1958
1959	// TODO: This check could be tightened to only apply to binops (div/rem) that
1960	// are not safe to speculatively execute. But that could allow hoisting
1961	// potentially expensive instructions (fdiv for example).
1962	for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
1963	if (!isGuaranteedToTransferExecutionToSuccessor(I: &*BBIter))
1964	return nullptr;
1965
1966	// Fold constants for the predecessor block with constant incoming values.
1967	Constant *NewC = ConstantFoldBinaryOpOperands(Opcode: BO.getOpcode(), LHS: C0, RHS: C1, DL);
1968	if (!NewC)
1969	return nullptr;
1970
1971	// Make a new binop in the predecessor block with the non-constant incoming
1972	// values.
1973	Builder.SetInsertPoint(PredBlockBranch);
1974	Value *NewBO = Builder.CreateBinOp(Opc: BO.getOpcode(),
1975	LHS: Phi0->getIncomingValueForBlock(BB: OtherBB),
1976	RHS: Phi1->getIncomingValueForBlock(BB: OtherBB));
1977	if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(Val: NewBO))
1978	NotFoldedNewBO->copyIRFlags(V: &BO);
1979
1980	// Replace the binop with a phi of the new values. The old phis are dead.
1981	PHINode *NewPhi = PHINode::Create(Ty: BO.getType(), NumReservedValues: `2`);
1982	NewPhi->addIncoming(V: NewBO, BB: OtherBB);
1983	NewPhi->addIncoming(V: NewC, BB: ConstBB);
1984	return NewPhi;
1985	}
1986
1987	Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
1988	if (!isa<Constant>(Val: I.getOperand(i_nocapture: `1`)))
1989	return nullptr;
1990
1991	if (auto *Sel = dyn_cast<SelectInst>(Val: I.getOperand(i_nocapture: `0`))) {
1992	if (Instruction *NewSel = FoldOpIntoSelect(Op&: I, SI: Sel))
1993	return NewSel;
1994	} else if (auto *PN = dyn_cast<PHINode>(Val: I.getOperand(i_nocapture: `0`))) {
1995	if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
1996	return NewPhi;
1997	}
1998	return nullptr;
1999	}
2000
2001	static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
2002	// If this GEP has only 0 indices, it is the same pointer as
2003	// Src. If Src is not a trivial GEP too, don't combine
2004	// the indices.
2005	if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
2006	!Src.hasOneUse())
2007	return false;
2008	return true;
2009	}
2010
2011	Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
2012	if (!isa<VectorType>(Val: Inst.getType()))
2013	return nullptr;
2014
2015	BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
2016	Value LHS = Inst.getOperand(i_nocapture: `0`), RHS = Inst.getOperand(i_nocapture: `1`);
2017	assert(cast<VectorType>(LHS->getType())->getElementCount() ==
2018	cast<VectorType>(Inst.getType())->getElementCount());
2019	assert(cast<VectorType>(RHS->getType())->getElementCount() ==
2020	cast<VectorType>(Inst.getType())->getElementCount());
2021
2022	// If both operands of the binop are vector concatenations, then perform the
2023	// narrow binop on each pair of the source operands followed by concatenation
2024	// of the results.
2025	Value L0, L1, R0, R1;
2026	ArrayRef<int> Mask;
2027	if (match(V: LHS, P: m_Shuffle(v1: m_Value(V&: L0), v2: m_Value(V&: L1), mask: m_Mask (Mask))) &&
2028	match(V: RHS, P: m_Shuffle(v1: m_Value(V&: R0), v2: m_Value(V&: R1), mask: m_SpecificMask (Mask))) &&
2029	LHS->hasOneUse() && RHS->hasOneUse() &&
2030	cast<ShuffleVectorInst>(Val: LHS)->isConcat() &&
2031	cast<ShuffleVectorInst>(Val: RHS)->isConcat()) {
2032	// This transform does not have the speculative execution constraint as
2033	// below because the shuffle is a concatenation. The new binops are
2034	// operating on exactly the same elements as the existing binop.
2035	// TODO: We could ease the mask requirement to allow different undef lanes,
2036	// but that requires an analysis of the binop-with-undef output value.
2037	Value *NewBO0 = Builder.CreateBinOp(Opc: Opcode, LHS: L0, RHS: R0);
2038	if (auto *BO = dyn_cast<BinaryOperator>(Val: NewBO0))
2039	BO->copyIRFlags(V: &Inst);
2040	Value *NewBO1 = Builder.CreateBinOp(Opc: Opcode, LHS: L1, RHS: R1);
2041	if (auto *BO = dyn_cast<BinaryOperator>(Val: NewBO1))
2042	BO->copyIRFlags(V: &Inst);
2043	return new ShuffleVectorInst (NewBO0, NewBO1, Mask);
2044	}
2045
2046	auto createBinOpReverse = [&](Value X, Value Y) {
2047	Value *V = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y, Name: Inst.getName());
2048	if (auto *BO = dyn_cast<BinaryOperator>(Val: V))
2049	BO->copyIRFlags(V: &Inst);
2050	Module *M = Inst.getModule();
2051	Function *F =
2052	Intrinsic::getDeclaration(M, id: Intrinsic::vector_reverse, Tys: V->getType());
2053	return CallInst::Create(Func: F, Args: V);
2054	};
2055
2056	// NOTE: Reverse shuffles don't require the speculative execution protection
2057	// below because they don't affect which lanes take part in the computation.
2058
2059	Value V1, V2;
2060	if (match(V: LHS, P: m_VecReverse(Op0: m_Value(V&: V1)))) {
2061	// Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2062	if (match(V: RHS, P: m_VecReverse(Op0: m_Value(V&: V2))) &&
2063	(LHS->hasOneUse() \|\| RHS->hasOneUse() \|\|
2064	(LHS == RHS && LHS->hasNUses(N: `2`))))
2065	return createBinOpReverse (V1, V2);
2066
2067	// Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2068	if (LHS->hasOneUse() && isSplatValue(V: RHS))
2069	return createBinOpReverse (V1, RHS);
2070	}
2071	// Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2072	else if (isSplatValue(V: LHS) && match(V: RHS, P: m_OneUse(SubPattern: m_VecReverse(Op0: m_Value(V&: V2)))))
2073	return createBinOpReverse (LHS, V2);
2074
2075	// It may not be safe to reorder shuffles and things like div, urem, etc.
2076	// because we may trap when executing those ops on unknown vector elements.
2077	// See PR20059.
2078	if (!isSafeToSpeculativelyExecute(I: &Inst))
2079	return nullptr;
2080
2081	auto createBinOpShuffle = [&](Value X, Value Y, ArrayRef<int> M) {
2082	Value *XY = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y);
2083	if (auto *BO = dyn_cast<BinaryOperator>(Val: XY))
2084	BO->copyIRFlags(V: &Inst);
2085	return new ShuffleVectorInst (XY, M);
2086	};
2087
2088	// If both arguments of the binary operation are shuffles that use the same
2089	// mask and shuffle within a single vector, move the shuffle after the binop.
2090	if (match(V: LHS, P: m_Shuffle(v1: m_Value(V&: V1), v2: m_Poison(), mask: m_Mask (Mask))) &&
2091	match(V: RHS, P: m_Shuffle(v1: m_Value(V&: V2), v2: m_Poison(), mask: m_SpecificMask (Mask))) &&
2092	V1->getType() == V2->getType() &&
2093	(LHS->hasOneUse() \|\| RHS->hasOneUse() \|\| LHS == RHS)) {
2094	// Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
2095	return createBinOpShuffle (V1, V2, Mask);
2096	}
2097
2098	// If both arguments of a commutative binop are select-shuffles that use the
2099	// same mask with commuted operands, the shuffles are unnecessary.
2100	if (Inst.isCommutative() &&
2101	match(V: LHS, P: m_Shuffle(v1: m_Value(V&: V1), v2: m_Value(V&: V2), mask: m_Mask (Mask))) &&
2102	match(V: RHS,
2103	P: m_Shuffle(v1: m_Specific(V: V2), v2: m_Specific(V: V1), mask: m_SpecificMask (Mask)))) {
2104	auto *LShuf = cast<ShuffleVectorInst>(Val: LHS);
2105	auto *RShuf = cast<ShuffleVectorInst>(Val: RHS);
2106	// TODO: Allow shuffles that contain undefs in the mask?
2107	// That is legal, but it reduces undef knowledge.
2108	// TODO: Allow arbitrary shuffles by shuffling after binop?
2109	// That might be legal, but we have to deal with poison.
2110	if (LShuf->isSelect() &&
2111	!is_contained(Range: LShuf->getShuffleMask(), Element: PoisonMaskElem) &&
2112	RShuf->isSelect() &&
2113	!is_contained(Range: RShuf->getShuffleMask(), Element: PoisonMaskElem)) {
2114	// Example:
2115	// LHS = shuffle V1, V2, <0, 5, 6, 3>
2116	// RHS = shuffle V2, V1, <0, 5, 6, 3>
2117	// LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
2118	Instruction *NewBO = BinaryOperator::Create(Op: Opcode, S1: V1, S2: V2);
2119	NewBO->copyIRFlags(V: &Inst);
2120	return NewBO;
2121	}
2122	}
2123
2124	// If one argument is a shuffle within one vector and the other is a constant,
2125	// try moving the shuffle after the binary operation. This canonicalization
2126	// intends to move shuffles closer to other shuffles and binops closer to
2127	// other binops, so they can be folded. It may also enable demanded elements
2128	// transforms.
2129	Constant *C;
2130	auto *InstVTy = dyn_cast<FixedVectorType>(Val: Inst.getType());
2131	if (InstVTy &&
2132	match(V: &Inst, P: m_c_BinOp(L: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: V1), v2: m_Poison(),
2133	mask: m_Mask (Mask))),
2134	R: m_ImmConstant(C))) &&
2135	cast<FixedVectorType>(Val: V1->getType())->getNumElements() <=
2136	InstVTy->getNumElements()) {
2137	assert(InstVTy->getScalarType() == V1->getType()->getScalarType() &&
2138	"Shuffle should not change scalar type");
2139
2140	// Find constant NewC that has property:
2141	// shuffle(NewC, ShMask) = C
2142	// If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
2143	// reorder is not possible. A 1-to-1 mapping is not required. Example:
2144	// ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
2145	bool ConstOp1 = isa<Constant>(Val: RHS);
2146	ArrayRef<int> ShMask = Mask;
2147	unsigned SrcVecNumElts =
2148	cast<FixedVectorType>(Val: V1->getType())->getNumElements();
2149	PoisonValue *PoisonScalar = PoisonValue::get(T: C->getType()->getScalarType());
2150	SmallVector<Constant *, `16`> NewVecC(SrcVecNumElts, PoisonScalar);
2151	bool MayChange = true;
2152	unsigned NumElts = InstVTy->getNumElements();
2153	for (unsigned I = `0`; I < NumElts; ++I) {
2154	Constant *CElt = C->getAggregateElement(Elt: I);
2155	if (ShMask [I] >= `0`) {
2156	assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
2157	Constant *NewCElt = NewVecC [ShMask [I]];
2158	// Bail out if:
2159	// 1. The constant vector contains a constant expression.
2160	// 2. The shuffle needs an element of the constant vector that can't
2161	// be mapped to a new constant vector.
2162	// 3. This is a widening shuffle that copies elements of V1 into the
2163	// extended elements (extending with poison is allowed).
2164	if (!CElt \|\| (!isa<PoisonValue>(Val: NewCElt) && NewCElt != CElt) \|\|
2165	I >= SrcVecNumElts) {
2166	MayChange = false;
2167	break;
2168	}
2169	NewVecC [ShMask [I]] = CElt;
2170	}
2171	// If this is a widening shuffle, we must be able to extend with poison
2172	// elements. If the original binop does not produce a poison in the high
2173	// lanes, then this transform is not safe.
2174	// Similarly for poison lanes due to the shuffle mask, we can only
2175	// transform binops that preserve poison.
2176	// TODO: We could shuffle those non-poison constant values into the
2177	// result by using a constant vector (rather than an poison vector)
2178	// as operand 1 of the new binop, but that might be too aggressive
2179	// for target-independent shuffle creation.
2180	if (I >= SrcVecNumElts \|\| ShMask [I] < `0`) {
2181	Constant *MaybePoison =
2182	ConstOp1
2183	? ConstantFoldBinaryOpOperands(Opcode, LHS: PoisonScalar, RHS: CElt, DL)
2184	: ConstantFoldBinaryOpOperands(Opcode, LHS: CElt, RHS: PoisonScalar, DL);
2185	if (!MaybePoison \|\| !isa<PoisonValue>(Val: MaybePoison)) {
2186	MayChange = false;
2187	break;
2188	}
2189	}
2190	}
2191	if (MayChange) {
2192	Constant *NewC = ConstantVector::get(V: NewVecC);
2193	// It may not be safe to execute a binop on a vector with poison elements
2194	// because the entire instruction can be folded to undef or create poison
2195	// that did not exist in the original code.
2196	// TODO: The shift case should not be necessary.
2197	if (Inst.isIntDivRem() \|\| (Inst.isShift() && ConstOp1))
2198	NewC = getSafeVectorConstantForBinop(Opcode, In: NewC, IsRHSConstant: ConstOp1);
2199
2200	// Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
2201	// Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
2202	Value *NewLHS = ConstOp1 ? V1 : NewC;
2203	Value *NewRHS = ConstOp1 ? NewC : V1;
2204	return createBinOpShuffle (NewLHS, NewRHS, Mask);
2205	}
2206	}
2207
2208	// Try to reassociate to sink a splat shuffle after a binary operation.
2209	if (Inst.isAssociative() && Inst.isCommutative()) {
2210	// Canonicalize shuffle operand as LHS.
2211	if (isa<ShuffleVectorInst>(Val: RHS))
2212	std::swap(a&: LHS, b&: RHS);
2213
2214	Value *X;
2215	ArrayRef<int> MaskC;
2216	int SplatIndex;
2217	Value Y, OtherOp;
2218	if (!match(V: LHS,
2219	P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: X), v2: m_Undef(), mask: m_Mask (MaskC)))) \|\|
2220	!match(Mask: MaskC, P: m_SplatOrPoisonMask (SplatIndex)) \|\|
2221	X->getType() != Inst.getType() \|\|
2222	!match(V: RHS, P: m_OneUse(SubPattern: m_BinOp(Opcode, L: m_Value(V&: Y), R: m_Value(V&: OtherOp)))))
2223	return nullptr;
2224
2225	// FIXME: This may not be safe if the analysis allows undef elements. By
2226	// moving 'Y' before the splat shuffle, we are implicitly assuming
2227	// that it is not undef/poison at the splat index.
2228	if (isSplatValue(V: OtherOp, Index: SplatIndex)) {
2229	std::swap(a&: Y, b&: OtherOp);
2230	} else if (!isSplatValue(V: Y, Index: SplatIndex)) {
2231	return nullptr;
2232	}
2233
2234	// X and Y are splatted values, so perform the binary operation on those
2235	// values followed by a splat followed by the 2nd binary operation:
2236	// bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
2237	Value *NewBO = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y);
2238	SmallVector<int, `8`> NewMask(MaskC.size(), SplatIndex);
2239	Value *NewSplat = Builder.CreateShuffleVector(V: NewBO, Mask: NewMask);
2240	Instruction *R = BinaryOperator::Create(Op: Opcode, S1: NewSplat, S2: OtherOp);
2241
2242	// Intersect FMF on both new binops. Other (poison-generating) flags are
2243	// dropped to be safe.
2244	if (isa<FPMathOperator>(Val: R)) {
2245	R->copyFastMathFlags(I: &Inst);
2246	R->andIRFlags(V: RHS);
2247	}
2248	if (auto *NewInstBO = dyn_cast<BinaryOperator>(Val: NewBO))
2249	NewInstBO->copyIRFlags(V: R);
2250	return R;
2251	}
2252
2253	return nullptr;
2254	}
2255
2256	/// Try to narrow the width of a binop if at least 1 operand is an extend of
2257	/// of a value. This requires a potentially expensive known bits check to make
2258	/// sure the narrow op does not overflow.
2259	Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
2260	// We need at least one extended operand.
2261	Value Op0 = BO.getOperand(i_nocapture: `0`), Op1 = BO.getOperand(i_nocapture: `1`);
2262
2263	// If this is a sub, we swap the operands since we always want an extension
2264	// on the RHS. The LHS can be an extension or a constant.
2265	if (BO.getOpcode() == Instruction::Sub)
2266	std::swap(a&: Op0, b&: Op1);
2267
2268	Value *X;
2269	bool IsSext = match(V: Op0, P: m_SExt(Op: m_Value(V&: X)));
2270	if (!IsSext && !match(V: Op0, P: m_ZExt(Op: m_Value(V&: X))))
2271	return nullptr;
2272
2273	// If both operands are the same extension from the same source type and we
2274	// can eliminate at least one (hasOneUse), this might work.
2275	CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
2276	Value *Y;
2277	if (!(match(V: Op1, P: m_ZExtOrSExt(Op: m_Value(V&: Y))) && X->getType() == Y->getType() &&
2278	cast<Operator>(Val: Op1)->getOpcode() == CastOpc &&
2279	(Op0->hasOneUse() \|\| Op1->hasOneUse()))) {
2280	// If that did not match, see if we have a suitable constant operand.
2281	// Truncating and extending must produce the same constant.
2282	Constant *WideC;
2283	if (!Op0->hasOneUse() \|\| !match(V: Op1, P: m_Constant(C&: WideC)))
2284	return nullptr;
2285	Constant *NarrowC = getLosslessTrunc(C: WideC, TruncTy: X->getType(), ExtOp: CastOpc);
2286	if (!NarrowC)
2287	return nullptr;
2288	Y = NarrowC;
2289	}
2290
2291	// Swap back now that we found our operands.
2292	if (BO.getOpcode() == Instruction::Sub)
2293	std::swap(a&: X, b&: Y);
2294
2295	// Both operands have narrow versions. Last step: the math must not overflow
2296	// in the narrow width.
2297	if (!willNotOverflow(Opcode: BO.getOpcode(), LHS: X, RHS: Y, CxtI: BO, IsSigned: IsSext))
2298	return nullptr;
2299
2300	// bo (ext X), (ext Y) --> ext (bo X, Y)
2301	// bo (ext X), C --> ext (bo X, C')
2302	Value *NarrowBO = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: X, RHS: Y, Name: "narrow");
2303	if (auto *NewBinOp = dyn_cast<BinaryOperator>(Val: NarrowBO)) {
2304	if (IsSext)
2305	NewBinOp->setHasNoSignedWrap();
2306	else
2307	NewBinOp->setHasNoUnsignedWrap();
2308	}
2309	return CastInst::Create(CastOpc, S: NarrowBO, Ty: BO.getType());
2310	}
2311
2312	static bool isMergedGEPInBounds(GEPOperator &GEP1, GEPOperator &GEP2) {
2313	return GEP1.isInBounds() && GEP2.isInBounds();
2314	}
2315
2316	/// Thread a GEP operation with constant indices through the constant true/false
2317	/// arms of a select.
2318	static Instruction *foldSelectGEP(GetElementPtrInst &GEP,
2319	InstCombiner::BuilderTy &Builder) {
2320	if (!GEP.hasAllConstantIndices())
2321	return nullptr;
2322
2323	Instruction *Sel;
2324	Value *Cond;
2325	Constant TrueC, FalseC;
2326	if (!match(V: GEP.getPointerOperand(), P: m_Instruction(I&: Sel)) \|\|
2327	!match(V: Sel,
2328	P: m_Select(C: m_Value(V&: Cond), L: m_Constant(C&: TrueC), R: m_Constant(C&: FalseC))))
2329	return nullptr;
2330
2331	// gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
2332	// Propagate 'inbounds' and metadata from existing instructions.
2333	// Note: using IRBuilder to create the constants for efficiency.
2334	SmallVector<Value *, `4`> IndexC(GEP.indices());
2335	GEPNoWrapFlags NW = GEP.getNoWrapFlags();
2336	Type *Ty = GEP.getSourceElementType();
2337	Value *NewTrueC = Builder.CreateGEP(Ty, Ptr: TrueC, IdxList: IndexC, Name: "", NW);
2338	Value *NewFalseC = Builder.CreateGEP(Ty, Ptr: FalseC, IdxList: IndexC, Name: "", NW);
2339	return SelectInst::Create(C: Cond, S1: NewTrueC, S2: NewFalseC, NameStr: "", InsertBefore: nullptr, MDFrom: Sel);
2340	}
2341
2342	// Canonicalization:
2343	// gep T, (gep i8, base, C1), (Index + C2) into
2344	// gep T, (gep i8, base, C1 + C2 sizeof(T)), Index*
2345	static Instruction *canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP,
2346	GEPOperator *Src,
2347	InstCombinerImpl &IC) {
2348	if (GEP.getNumIndices() != `1`)
2349	return nullptr;
2350	auto &DL = IC.getDataLayout();
2351	Value *Base;
2352	const APInt *C1;
2353	if (!match(V: Src, P: m_PtrAdd(PointerOp: m_Value(V&: Base), OffsetOp: m_APInt(Res&: C1))))
2354	return nullptr;
2355	Value *VarIndex;
2356	const APInt *C2;
2357	Type *PtrTy = Src->getType()->getScalarType();
2358	unsigned IndexSizeInBits = DL.getIndexTypeSizeInBits(Ty: PtrTy);
2359	if (!match(V: GEP.getOperand(i_nocapture: `1`), P: m_AddLike(L: m_Value(V&: VarIndex), R: m_APInt(Res&: C2))))
2360	return nullptr;
2361	if (C1->getBitWidth() != IndexSizeInBits \|\|
2362	C2->getBitWidth() != IndexSizeInBits)
2363	return nullptr;
2364	Type *BaseType = GEP.getSourceElementType();
2365	if (isa<ScalableVectorType>(Val: BaseType))
2366	return nullptr;
2367	APInt TypeSize(IndexSizeInBits, DL.getTypeAllocSize(Ty: BaseType));
2368	APInt NewOffset = TypeSize * C2 + C1;
2369	if (NewOffset.isZero() \|\|
2370	(Src->hasOneUse() && GEP.getOperand(i_nocapture: `1`)->hasOneUse())) {
2371	Value *GEPConst =
2372	IC.Builder.CreatePtrAdd(Ptr: Base, Offset: IC.Builder.getInt(AI: NewOffset));
2373	return GetElementPtrInst::Create(PointeeType: BaseType, Ptr: GEPConst, IdxList: VarIndex);
2374	}
2375
2376	return nullptr;
2377	}
2378
2379	Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
2380	GEPOperator *Src) {
2381	// Combine Indices - If the source pointer to this getelementptr instruction
2382	// is a getelementptr instruction with matching element type, combine the
2383	// indices of the two getelementptr instructions into a single instruction.
2384	if (!shouldMergeGEPs(GEP&: cast<GEPOperator>(Val: &GEP), Src&: Src))
2385	return nullptr;
2386
2387	if (auto I = canonicalizeGEPOfConstGEPI8(GEP, Src, IC&: this))
2388	return I;
2389
2390	// For constant GEPs, use a more general offset-based folding approach.
2391	Type *PtrTy = Src->getType()->getScalarType();
2392	if (GEP.hasAllConstantIndices() &&
2393	(Src->hasOneUse() \|\| Src->hasAllConstantIndices())) {
2394	// Split Src into a variable part and a constant suffix.
2395	gep_type_iterator GTI = gep_type_begin(GEP: *Src);
2396	Type *BaseType = GTI.getIndexedType();
2397	bool IsFirstType = true;
2398	unsigned NumVarIndices = `0`;
2399	for (auto Pair : enumerate(First: Src->indices())) {
2400	if (!isa<ConstantInt>(Val: Pair.value())) {
2401	BaseType = GTI.getIndexedType();
2402	IsFirstType = false;
2403	NumVarIndices = Pair.index() + `1`;
2404	}
2405	++GTI;
2406	}
2407
2408	// Determine the offset for the constant suffix of Src.
2409	APInt Offset(DL.getIndexTypeSizeInBits(Ty: PtrTy), `0`);
2410	if (NumVarIndices != Src->getNumIndices()) {
2411	// FIXME: getIndexedOffsetInType() does not handled scalable vectors.
2412	if (BaseType->isScalableTy())
2413	return nullptr;
2414
2415	SmallVector<Value *> ConstantIndices;
2416	if (!IsFirstType)
2417	ConstantIndices.push_back(
2418	Elt: Constant::getNullValue(Ty: Type::getInt32Ty(C&: GEP.getContext())));
2419	append_range(C&: ConstantIndices, R: drop_begin(RangeOrContainer: Src->indices(), N: NumVarIndices));
2420	Offset += DL.getIndexedOffsetInType(ElemTy: BaseType, Indices: ConstantIndices);
2421	}
2422
2423	// Add the offset for GEP (which is fully constant).
2424	if (!GEP.accumulateConstantOffset(DL, Offset))
2425	return nullptr;
2426
2427	APInt OffsetOld = Offset;
2428	// Convert the total offset back into indices.
2429	SmallVector<APInt> ConstIndices =
2430	DL.getGEPIndicesForOffset(ElemTy&: BaseType, Offset);
2431	if (!Offset.isZero() \|\| (!IsFirstType && !ConstIndices [`0`].isZero())) {
2432	// If both GEP are constant-indexed, and cannot be merged in either way,
2433	// convert them to a GEP of i8.
2434	if (Src->hasAllConstantIndices())
2435	return replaceInstUsesWith(
2436	I&: GEP, V: Builder.CreateGEP(
2437	Ty: Builder.getInt8Ty(), Ptr: Src->getOperand(i_nocapture: `0`),
2438	IdxList: Builder.getInt(AI: OffsetOld), Name: "",
2439	NW: isMergedGEPInBounds(GEP1&: Src, GEP2&: cast<GEPOperator>(Val: &GEP))));
2440	return nullptr;
2441	}
2442
2443	bool IsInBounds = isMergedGEPInBounds(GEP1&: Src, GEP2&: cast<GEPOperator>(Val: &GEP));
2444	SmallVector<Value *> Indices;
2445	append_range(C&: Indices, R: drop_end(RangeOrContainer: Src->indices(),
2446	N: Src->getNumIndices() - NumVarIndices));
2447	for (const APInt &Idx : drop_begin(RangeOrContainer&: ConstIndices, N: !IsFirstType)) {
2448	Indices.push_back(Elt: ConstantInt::get(Context&: GEP.getContext(), V: Idx));
2449	// Even if the total offset is inbounds, we may end up representing it
2450	// by first performing a larger negative offset, and then a smaller
2451	// positive one. The large negative offset might go out of bounds. Only
2452	// preserve inbounds if all signs are the same.
2453	IsInBounds &= Idx.isNonNegative() == ConstIndices [`0`].isNonNegative();
2454	}
2455
2456	return replaceInstUsesWith(
2457	I&: GEP, V: Builder.CreateGEP(Ty: Src->getSourceElementType(), Ptr: Src->getOperand(i_nocapture: `0`),
2458	IdxList: Indices, Name: "", NW: IsInBounds));
2459	}
2460
2461	if (Src->getResultElementType() != GEP.getSourceElementType())
2462	return nullptr;
2463
2464	SmallVector<Value*, `8`> Indices;
2465
2466	// Find out whether the last index in the source GEP is a sequential idx.
2467	bool EndsWithSequential = false;
2468	for (gep_type_iterator I = gep_type_begin(GEP: Src), E = gep_type_end(GEP: Src);
2469	I != E; ++I)
2470	EndsWithSequential = I.isSequential();
2471
2472	// Can we combine the two pointer arithmetics offsets?
2473	if (EndsWithSequential) {
2474	// Replace: gep (gep %P, long B), long A, ...
2475	// With: T = long A+B; gep %P, T, ...
2476	Value *SO1 = Src->getOperand(i_nocapture: Src->getNumOperands()-`1`);
2477	Value *GO1 = GEP.getOperand(i_nocapture: `1`);
2478
2479	// If they aren't the same type, then the input hasn't been processed
2480	// by the loop above yet (which canonicalizes sequential index types to
2481	// intptr_t). Just avoid transforming this until the input has been
2482	// normalized.
2483	if (SO1->getType() != GO1->getType())
2484	return nullptr;
2485
2486	Value *Sum =
2487	simplifyAddInst(LHS: GO1, RHS: SO1, IsNSW: false, IsNUW: false, Q: SQ.getWithInstruction(I: &GEP));
2488	// Only do the combine when we are sure the cost after the
2489	// merge is never more than that before the merge.
2490	if (Sum == nullptr)
2491	return nullptr;
2492
2493	// Update the GEP in place if possible.
2494	if (Src->getNumOperands() == `2`) {
2495	GEP.setIsInBounds(isMergedGEPInBounds(GEP1&: Src, GEP2&: cast<GEPOperator>(Val: &GEP)));
2496	replaceOperand(I&: GEP, OpNum: `0`, V: Src->getOperand(i_nocapture: `0`));
2497	replaceOperand(I&: GEP, OpNum: `1`, V: Sum);
2498	return &GEP;
2499	}
2500	Indices.append(in_start: Src->op_begin()+`1`, in_end: Src->op_end()-`1`);
2501	Indices.push_back(Elt: Sum);
2502	Indices.append(in_start: GEP.op_begin()+`2`, in_end: GEP.op_end());
2503	} else if (isa<Constant>(Val: *GEP.idx_begin()) &&
2504	cast<Constant>(Val&: *GEP.idx_begin())->isNullValue() &&
2505	Src->getNumOperands() != `1`) {
2506	// Otherwise we can do the fold if the first index of the GEP is a zero
2507	Indices.append(in_start: Src->op_begin()+`1`, in_end: Src->op_end());
2508	Indices.append(in_start: GEP.idx_begin()+`1`, in_end: GEP.idx_end());
2509	}
2510
2511	if (!Indices.empty())
2512	return replaceInstUsesWith(
2513	I&: GEP, V: Builder.CreateGEP(
2514	Ty: Src->getSourceElementType(), Ptr: Src->getOperand(i_nocapture: `0`), IdxList: Indices, Name: "",
2515	NW: isMergedGEPInBounds(GEP1&: Src, GEP2&: cast<GEPOperator>(Val: &GEP))));
2516
2517	return nullptr;
2518	}
2519
2520	Value InstCombiner::getFreelyInvertedImpl(Value V, bool WillInvertAllUses,
2521	BuilderTy *Builder,
2522	bool &DoesConsume, unsigned Depth) {
2523	static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(`1`));
2524	// ~(~(X)) -> X.
2525	Value A, B;
2526	if (match(V, P: m_Not(V: m_Value(V&: A)))) {
2527	DoesConsume = true;
2528	return A;
2529	}
2530
2531	Constant *C;
2532	// Constants can be considered to be not'ed values.
2533	if (match(V, P: m_ImmConstant(C)))
2534	return ConstantExpr::getNot(C);
2535
2536	if (Depth++ >= MaxAnalysisRecursionDepth)
2537	return nullptr;
2538
2539	// The rest of the cases require that we invert all uses so don't bother
2540	// doing the analysis if we know we can't use the result.
2541	if (!WillInvertAllUses)
2542	return nullptr;
2543
2544	// Compares can be inverted if all of their uses are being modified to use
2545	// the ~V.
2546	if (auto *I = dyn_cast<CmpInst>(Val: V)) {
2547	if (Builder != nullptr)
2548	return Builder->CreateCmp(Pred: I->getInversePredicate(), LHS: I->getOperand(i_nocapture: `0`),
2549	RHS: I->getOperand(i_nocapture: `1`));
2550	return NonNull;
2551	}
2552
2553	// If `V` is of the form `A + B` then `-1 - V` can be folded into
2554	// `(-1 - B) - A` if we are willing to invert all of the uses.
2555	if (match(V, P: m_Add(L: m_Value(V&: A), R: m_Value(V&: B)))) {
2556	if (auto *BV = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
2557	DoesConsume, Depth))
2558	return Builder ? Builder->CreateSub(LHS: BV, RHS: A) : NonNull;
2559	if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2560	DoesConsume, Depth))
2561	return Builder ? Builder->CreateSub(LHS: AV, RHS: B) : NonNull;
2562	return nullptr;
2563	}
2564
2565	// If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded
2566	// into `A ^ B` if we are willing to invert all of the uses.
2567	if (match(V, P: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B)))) {
2568	if (auto *BV = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
2569	DoesConsume, Depth))
2570	return Builder ? Builder->CreateXor(LHS: A, RHS: BV) : NonNull;
2571	if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2572	DoesConsume, Depth))
2573	return Builder ? Builder->CreateXor(LHS: AV, RHS: B) : NonNull;
2574	return nullptr;
2575	}
2576
2577	// If `V` is of the form `B - A` then `-1 - V` can be folded into
2578	// `A + (-1 - B)` if we are willing to invert all of the uses.
2579	if (match(V, P: m_Sub(L: m_Value(V&: A), R: m_Value(V&: B)))) {
2580	if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2581	DoesConsume, Depth))
2582	return Builder ? Builder->CreateAdd(LHS: AV, RHS: B) : NonNull;
2583	return nullptr;
2584	}
2585
2586	// If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded
2587	// into `A s>> B` if we are willing to invert all of the uses.
2588	if (match(V, P: m_AShr(L: m_Value(V&: A), R: m_Value(V&: B)))) {
2589	if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2590	DoesConsume, Depth))
2591	return Builder ? Builder->CreateAShr(LHS: AV, RHS: B) : NonNull;
2592	return nullptr;
2593	}
2594
2595	Value *Cond;
2596	// LogicOps are special in that we canonicalize them at the cost of an
2597	// instruction.
2598	bool IsSelect = match(V, P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: A), R: m_Value(V&: B))) &&
2599	!shouldAvoidAbsorbingNotIntoSelect(SI: *cast<SelectInst>(Val: V));
2600	// Selects/min/max with invertible operands are freely invertible
2601	if (IsSelect \|\| match(V, P: m_MaxOrMin(L: m_Value(V&: A), R: m_Value(V&: B)))) {
2602	bool LocalDoesConsume = DoesConsume;
2603	if (!getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), /Builder/ nullptr,
2604	DoesConsume&: LocalDoesConsume, Depth))
2605	return nullptr;
2606	if (Value *NotA = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2607	DoesConsume&: LocalDoesConsume, Depth)) {
2608	DoesConsume = LocalDoesConsume;
2609	if (Builder != nullptr) {
2610	Value *NotB = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
2611	DoesConsume, Depth);
2612	assert(NotB != nullptr &&
2613	"Unable to build inverted value for known freely invertable op");
2614	if (auto *II = dyn_cast<IntrinsicInst>(Val: V))
2615	return Builder->CreateBinaryIntrinsic(
2616	ID: getInverseMinMaxIntrinsic(MinMaxID: II->getIntrinsicID()), LHS: NotA, RHS: NotB);
2617	return Builder->CreateSelect(C: Cond, True: NotA, False: NotB);
2618	}
2619	return NonNull;
2620	}
2621	}
2622
2623	if (PHINode *PN = dyn_cast<PHINode>(Val: V)) {
2624	bool LocalDoesConsume = DoesConsume;
2625	SmallVector<std::pair<Value , BasicBlock >, `8`> IncomingValues;
2626	for (Use &U : PN->operands()) {
2627	BasicBlock *IncomingBlock = PN->getIncomingBlock(U);
2628	Value *NewIncomingVal = getFreelyInvertedImpl(
2629	V: U.get(), /WillInvertAllUses=/false,
2630	/Builder=/nullptr, DoesConsume&: LocalDoesConsume, Depth: MaxAnalysisRecursionDepth - `1`);
2631	if (NewIncomingVal == nullptr)
2632	return nullptr;
2633	// Make sure that we can safely erase the original PHI node.
2634	if (NewIncomingVal == V)
2635	return nullptr;
2636	if (Builder != nullptr)
2637	IncomingValues.emplace_back(Args&: NewIncomingVal, Args&: IncomingBlock);
2638	}
2639
2640	DoesConsume = LocalDoesConsume;
2641	if (Builder != nullptr) {
2642	IRBuilderBase::InsertPointGuard Guard(*Builder);
2643	Builder->SetInsertPoint(PN);
2644	PHINode *NewPN =
2645	Builder->CreatePHI(Ty: PN->getType(), NumReservedValues: PN->getNumIncomingValues());
2646	for (auto [Val, Pred] : IncomingValues)
2647	NewPN->addIncoming(V: Val, BB: Pred);
2648	return NewPN;
2649	}
2650	return NonNull;
2651	}
2652
2653	if (match(V, P: m_SExtLike(Op: m_Value(V&: A)))) {
2654	if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2655	DoesConsume, Depth))
2656	return Builder ? Builder->CreateSExt(V: AV, DestTy: V->getType()) : NonNull;
2657	return nullptr;
2658	}
2659
2660	if (match(V, P: m_Trunc(Op: m_Value(V&: A)))) {
2661	if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2662	DoesConsume, Depth))
2663	return Builder ? Builder->CreateTrunc(V: AV, DestTy: V->getType()) : NonNull;
2664	return nullptr;
2665	}
2666
2667	// De Morgan's Laws:
2668	// (~(A \| B)) -> (~A & ~B)
2669	// (~(A & B)) -> (~A \| ~B)
2670	auto TryInvertAndOrUsingDeMorgan = [&](Instruction::BinaryOps Opcode,
2671	bool IsLogical, Value *A,
2672	Value B) -> Value {
2673	bool LocalDoesConsume = DoesConsume;
2674	if (!getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), /Builder=/nullptr,
2675	DoesConsume&: LocalDoesConsume, Depth))
2676	return nullptr;
2677	if (auto *NotA = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2678	DoesConsume&: LocalDoesConsume, Depth)) {
2679	auto *NotB = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
2680	DoesConsume&: LocalDoesConsume, Depth);
2681	DoesConsume = LocalDoesConsume;
2682	if (IsLogical)
2683	return Builder ? Builder->CreateLogicalOp(Opc: Opcode, Cond1: NotA, Cond2: NotB) : NonNull;
2684	return Builder ? Builder->CreateBinOp(Opc: Opcode, LHS: NotA, RHS: NotB) : NonNull;
2685	}
2686
2687	return nullptr;
2688	};
2689
2690	if (match(V, P: m_Or(L: m_Value(V&: A), R: m_Value(V&: B))))
2691	return TryInvertAndOrUsingDeMorgan (Instruction::And, /IsLogical=/false, A,
2692	B);
2693
2694	if (match(V, P: m_And(L: m_Value(V&: A), R: m_Value(V&: B))))
2695	return TryInvertAndOrUsingDeMorgan (Instruction::Or, /IsLogical=/false, A,
2696	B);
2697
2698	if (match(V, P: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B))))
2699	return TryInvertAndOrUsingDeMorgan (Instruction::And, /IsLogical=/true, A,
2700	B);
2701
2702	if (match(V, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B))))
2703	return TryInvertAndOrUsingDeMorgan (Instruction::Or, /IsLogical=/true, A,
2704	B);
2705
2706	return nullptr;
2707	}
2708
2709	Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
2710	Value *PtrOp = GEP.getOperand(i_nocapture: `0`);
2711	SmallVector<Value *, `8`> Indices(GEP.indices());
2712	Type *GEPType = GEP.getType();
2713	Type *GEPEltType = GEP.getSourceElementType();
2714	if (Value *V =
2715	simplifyGEPInst(SrcTy: GEPEltType, Ptr: PtrOp, Indices, NW: GEP.getNoWrapFlags(),
2716	Q: SQ.getWithInstruction(I: &GEP)))
2717	return replaceInstUsesWith(I&: GEP, V);
2718
2719	// For vector geps, use the generic demanded vector support.
2720	// Skip if GEP return type is scalable. The number of elements is unknown at
2721	// compile-time.
2722	if (auto *GEPFVTy = dyn_cast<FixedVectorType>(Val: GEPType)) {
2723	auto VWidth = GEPFVTy->getNumElements();
2724	APInt PoisonElts(VWidth, `0`);
2725	APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
2726	if (Value *V = SimplifyDemandedVectorElts(V: &GEP, DemandedElts: AllOnesEltMask,
2727	PoisonElts)) {
2728	if (V != &GEP)
2729	return replaceInstUsesWith(I&: GEP, V);
2730	return &GEP;
2731	}
2732
2733	// TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
2734	// possible (decide on canonical form for pointer broadcast), 3) exploit
2735	// undef elements to decrease demanded bits
2736	}
2737
2738	// Eliminate unneeded casts for indices, and replace indices which displace
2739	// by multiples of a zero size type with zero.
2740	bool MadeChange = false;
2741
2742	// Index width may not be the same width as pointer width.
2743	// Data layout chooses the right type based on supported integer types.
2744	Type *NewScalarIndexTy =
2745	DL.getIndexType(PtrTy: GEP.getPointerOperandType()->getScalarType());
2746
2747	gep_type_iterator GTI = gep_type_begin(GEP);
2748	for (User::op_iterator I = GEP.op_begin() + `1`, E = GEP.op_end(); I != E;
2749	++I, ++GTI) {
2750	// Skip indices into struct types.
2751	if (GTI.isStruct())
2752	continue;
2753
2754	Type IndexTy = (I)->getType();
2755	Type *NewIndexType =
2756	IndexTy->isVectorTy()
2757	? VectorType::get(ElementType: NewScalarIndexTy,
2758	EC: cast<VectorType>(Val: IndexTy)->getElementCount())
2759	: NewScalarIndexTy;
2760
2761	// If the element type has zero size then any index over it is equivalent
2762	// to an index of zero, so replace it with zero if it is not zero already.
2763	Type *EltTy = GTI.getIndexedType();
2764	if (EltTy->isSized() && DL.getTypeAllocSize(Ty: EltTy).isZero())
2765	if (!isa<Constant>(Val: *I) \|\| !match(V: I->get(), P: m_Zero())) {
2766	*I = Constant::getNullValue(Ty: NewIndexType);
2767	MadeChange = true;
2768	}
2769
2770	if (IndexTy != NewIndexType) {
2771	// If we are using a wider index than needed for this platform, shrink
2772	// it to what we need. If narrower, sign-extend it to what we need.
2773	// This explicit cast can make subsequent optimizations more obvious.
2774	I = Builder.CreateIntCast(V: I, DestTy: NewIndexType, isSigned: true);
2775	MadeChange = true;
2776	}
2777	}
2778	if (MadeChange)
2779	return &GEP;
2780
2781	// Canonicalize constant GEPs to i8 type.
2782	if (!GEPEltType->isIntegerTy(Bitwidth: `8`) && GEP.hasAllConstantIndices()) {
2783	APInt Offset(DL.getIndexTypeSizeInBits(Ty: GEPType), `0`);
2784	if (GEP.accumulateConstantOffset(DL, Offset))
2785	return replaceInstUsesWith(
2786	I&: GEP, V: Builder.CreatePtrAdd(Ptr: PtrOp, Offset: Builder.getInt(AI: Offset), Name: "",
2787	NW: GEP.getNoWrapFlags()));
2788	}
2789
2790	// Canonicalize
2791	// - scalable GEPs to an explicit offset using the llvm.vscale intrinsic.
2792	// This has better support in BasicAA.
2793	// - gep i32 p, mul(O, C) -> gep i8, p, mul(O, C4) to fold the two*
2794	// multiplies together.
2795	if (GEPEltType->isScalableTy() \|\|
2796	(!GEPEltType->isIntegerTy(Bitwidth: `8`) && GEP.getNumIndices() == `1` &&
2797	match(V: GEP.getOperand(i_nocapture: `1`),
2798	P: m_OneUse(SubPattern: m_CombineOr(L: m_Mul(L: m_Value(), R: m_ConstantInt()),
2799	R: m_Shl(L: m_Value(), R: m_ConstantInt())))))) {
2800	Value *Offset = EmitGEPOffset(GEP: cast<GEPOperator>(Val: &GEP));
2801	return replaceInstUsesWith(
2802	I&: GEP, V: Builder.CreatePtrAdd(Ptr: PtrOp, Offset, Name: "", NW: GEP.getNoWrapFlags()));
2803	}
2804
2805	// Check to see if the inputs to the PHI node are getelementptr instructions.
2806	if (auto *PN = dyn_cast<PHINode>(Val: PtrOp)) {
2807	auto *Op1 = dyn_cast<GetElementPtrInst>(Val: PN->getOperand(i_nocapture: `0`));
2808	if (!Op1)
2809	return nullptr;
2810
2811	// Don't fold a GEP into itself through a PHI node. This can only happen
2812	// through the back-edge of a loop. Folding a GEP into itself means that
2813	// the value of the previous iteration needs to be stored in the meantime,
2814	// thus requiring an additional register variable to be live, but not
2815	// actually achieving anything (the GEP still needs to be executed once per
2816	// loop iteration).
2817	if (Op1 == &GEP)
2818	return nullptr;
2819
2820	int DI = -`1`;
2821
2822	for (auto I = PN->op_begin()+`1`, E = PN->op_end(); I !=E; ++I) {
2823	auto Op2 = dyn_cast<GetElementPtrInst>(Val&: I);
2824	if (!Op2 \|\| Op1->getNumOperands() != Op2->getNumOperands() \|\|
2825	Op1->getSourceElementType() != Op2->getSourceElementType())
2826	return nullptr;
2827
2828	// As for Op1 above, don't try to fold a GEP into itself.
2829	if (Op2 == &GEP)
2830	return nullptr;
2831
2832	// Keep track of the type as we walk the GEP.
2833	Type CurTy = nullptr*;
2834
2835	for (unsigned J = `0`, F = Op1->getNumOperands(); J != F; ++J) {
2836	if (Op1->getOperand(i_nocapture: J)->getType() != Op2->getOperand(i_nocapture: J)->getType())
2837	return nullptr;
2838
2839	if (Op1->getOperand(i_nocapture: J) != Op2->getOperand(i_nocapture: J)) {
2840	if (DI == -`1`) {
2841	// We have not seen any differences yet in the GEPs feeding the
2842	// PHI yet, so we record this one if it is allowed to be a
2843	// variable.
2844
2845	// The first two arguments can vary for any GEP, the rest have to be
2846	// static for struct slots
2847	if (J > `1`) {
2848	assert(CurTy && "No current type?");
2849	if (CurTy->isStructTy())
2850	return nullptr;
2851	}
2852
2853	DI = J;
2854	} else {
2855	// The GEP is different by more than one input. While this could be
2856	// extended to support GEPs that vary by more than one variable it
2857	// doesn't make sense since it greatly increases the complexity and
2858	// would result in an R+R+R addressing mode which no backend
2859	// directly supports and would need to be broken into several
2860	// simpler instructions anyway.
2861	return nullptr;
2862	}
2863	}
2864
2865	// Sink down a layer of the type for the next iteration.
2866	if (J > `0`) {
2867	if (J == `1`) {
2868	CurTy = Op1->getSourceElementType();
2869	} else {
2870	CurTy =
2871	GetElementPtrInst::getTypeAtIndex(Ty: CurTy, Idx: Op1->getOperand(i_nocapture: J));
2872	}
2873	}
2874	}
2875	}
2876
2877	// If not all GEPs are identical we'll have to create a new PHI node.
2878	// Check that the old PHI node has only one use so that it will get
2879	// removed.
2880	if (DI != -`1` && !PN->hasOneUse())
2881	return nullptr;
2882
2883	auto *NewGEP = cast<GetElementPtrInst>(Val: Op1->clone());
2884	if (DI == -`1`) {
2885	// All the GEPs feeding the PHI are identical. Clone one down into our
2886	// BB so that it can be merged with the current GEP.
2887	} else {
2888	// All the GEPs feeding the PHI differ at a single offset. Clone a GEP
2889	// into the current block so it can be merged, and create a new PHI to
2890	// set that index.
2891	PHINode *NewPN;
2892	{
2893	IRBuilderBase::InsertPointGuard Guard(Builder);
2894	Builder.SetInsertPoint(PN);
2895	NewPN = Builder.CreatePHI(Ty: Op1->getOperand(i_nocapture: DI)->getType(),
2896	NumReservedValues: PN->getNumOperands());
2897	}
2898
2899	for (auto &I : PN->operands())
2900	NewPN->addIncoming(V: cast<GEPOperator>(Val&: I)->getOperand(i_nocapture: DI),
2901	BB: PN->getIncomingBlock(U: I));
2902
2903	NewGEP->setOperand(i_nocapture: DI, Val_nocapture: NewPN);
2904	}
2905
2906	NewGEP->insertBefore(BB&: *GEP.getParent(), InsertPos: GEP.getParent()->getFirstInsertionPt());
2907	return replaceOperand(I&: GEP, OpNum: `0`, V: NewGEP);
2908	}
2909
2910	if (auto *Src = dyn_cast<GEPOperator>(Val: PtrOp))
2911	if (Instruction *I = visitGEPOfGEP(GEP, Src))
2912	return I;
2913
2914	if (GEP.getNumIndices() == `1`) {
2915	unsigned AS = GEP.getPointerAddressSpace();
2916	if (GEP.getOperand(i_nocapture: `1`)->getType()->getScalarSizeInBits() ==
2917	DL.getIndexSizeInBits(AS)) {
2918	uint64_t TyAllocSize = DL.getTypeAllocSize(Ty: GEPEltType).getFixedValue();
2919
2920	if (TyAllocSize == `1`) {
2921	// Canonicalize (gep i8 X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),*
2922	// but only if the result pointer is only used as if it were an integer,
2923	// or both point to the same underlying object (otherwise provenance is
2924	// not necessarily retained).
2925	Value *X = GEP.getPointerOperand();
2926	Value *Y;
2927	if (match(V: GEP.getOperand(i_nocapture: `1`),
2928	P: m_Sub(L: m_PtrToInt(Op: m_Value(V&: Y)), R: m_PtrToInt(Op: m_Specific(V: X)))) &&
2929	GEPType == Y->getType()) {
2930	bool HasSameUnderlyingObject =
2931	getUnderlyingObject(V: X) == getUnderlyingObject(V: Y);
2932	bool Changed = false;
2933	GEP.replaceUsesWithIf(New: Y, ShouldReplace: [&](Use &U) {
2934	bool ShouldReplace = HasSameUnderlyingObject \|\|
2935	isa<ICmpInst>(Val: U.getUser()) \|\|
2936	isa<PtrToIntInst>(Val: U.getUser());
2937	Changed \|= ShouldReplace;
2938	return ShouldReplace;
2939	});
2940	return Changed ? &GEP : nullptr;
2941	}
2942	} else if (auto *ExactIns =
2943	dyn_cast<PossiblyExactOperator>(Val: GEP.getOperand(i_nocapture: `1`))) {
2944	// Canonicalize (gep T X, V / sizeof(T)) to (gep i8* X, V)*
2945	Value *V;
2946	if (ExactIns->isExact()) {
2947	if ((has_single_bit(Value: TyAllocSize) &&
2948	match(V: GEP.getOperand(i_nocapture: `1`),
2949	P: m_Shr(L: m_Value(V),
2950	R: m_SpecificInt(V: countr_zero(Val: TyAllocSize))))) \|\|
2951	match(V: GEP.getOperand(i_nocapture: `1`),
2952	P: m_IDiv(L: m_Value(V), R: m_SpecificInt(V: TyAllocSize)))) {
2953	return GetElementPtrInst::Create(PointeeType: Builder.getInt8Ty(),
2954	Ptr: GEP.getPointerOperand(), IdxList: V,
2955	NW: GEP.getNoWrapFlags());
2956	}
2957	}
2958	if (ExactIns->isExact() && ExactIns->hasOneUse()) {
2959	// Try to canonicalize non-i8 element type to i8 if the index is an
2960	// exact instruction. If the index is an exact instruction (div/shr)
2961	// with a constant RHS, we can fold the non-i8 element scale into the
2962	// div/shr (similiar to the mul case, just inverted).
2963	const APInt *C;
2964	std::optional<APInt> NewC;
2965	if (has_single_bit(Value: TyAllocSize) &&
2966	match(V: ExactIns, P: m_Shr(L: m_Value(V), R: m_APInt(Res&: C))) &&
2967	C->uge(RHS: countr_zero(Val: TyAllocSize)))
2968	NewC = *C - countr_zero(Val: TyAllocSize);
2969	else if (match(V: ExactIns, P: m_UDiv(L: m_Value(V), R: m_APInt(Res&: C)))) {
2970	APInt Quot;
2971	uint64_t Rem;
2972	APInt::udivrem(LHS: *C, RHS: TyAllocSize, Quotient&: Quot, Remainder&: Rem);
2973	if (Rem == `0`)
2974	NewC = Quot;
2975	} else if (match(V: ExactIns, P: m_SDiv(L: m_Value(V), R: m_APInt(Res&: C)))) {
2976	APInt Quot;
2977	int64_t Rem;
2978	APInt::sdivrem(LHS: *C, RHS: TyAllocSize, Quotient&: Quot, Remainder&: Rem);
2979	// For sdiv we need to make sure we arent creating INT_MIN / -1.
2980	if (!Quot.isAllOnes() && Rem == `0`)
2981	NewC = Quot;
2982	}
2983
2984	if (NewC.has_value()) {
2985	Value *NewOp = Builder.CreateBinOp(
2986	Opc: static_cast<Instruction::BinaryOps>(ExactIns->getOpcode()), LHS: V,
2987	RHS: ConstantInt::get(Ty: V->getType(), V: *NewC));
2988	cast<BinaryOperator>(Val: NewOp)->setIsExact();
2989	return GetElementPtrInst::Create(PointeeType: Builder.getInt8Ty(),
2990	Ptr: GEP.getPointerOperand(), IdxList: NewOp,
2991	NW: GEP.getNoWrapFlags());
2992	}
2993	}
2994	}
2995	}
2996	}
2997	// We do not handle pointer-vector geps here.
2998	if (GEPType->isVectorTy())
2999	return nullptr;
3000
3001	if (GEP.getNumIndices() == `1`) {
3002	// We can only preserve inbounds if the original gep is inbounds, the add
3003	// is nsw, and the add operands are non-negative.
3004	auto CanPreserveInBounds = [&](bool AddIsNSW, Value Idx1, Value Idx2) {
3005	SimplifyQuery Q = SQ.getWithInstruction(I: &GEP);
3006	return GEP.isInBounds() && AddIsNSW && isKnownNonNegative(V: Idx1, SQ: Q) &&
3007	isKnownNonNegative(V: Idx2, SQ: Q);
3008	};
3009
3010	// Try to replace ADD + GEP with GEP + GEP.
3011	Value Idx1, Idx2;
3012	if (match(V: GEP.getOperand(i_nocapture: `1`),
3013	P: m_OneUse(SubPattern: m_Add(L: m_Value(V&: Idx1), R: m_Value(V&: Idx2))))) {
3014	// %idx = add i64 %idx1, %idx2
3015	// %gep = getelementptr i32, ptr %ptr, i64 %idx
3016	// as:
3017	// %newptr = getelementptr i32, ptr %ptr, i64 %idx1
3018	// %newgep = getelementptr i32, ptr %newptr, i64 %idx2
3019	bool IsInBounds = CanPreserveInBounds (
3020	cast<OverflowingBinaryOperator>(Val: GEP.getOperand(i_nocapture: `1`))->hasNoSignedWrap(),
3021	Idx1, Idx2);
3022	auto *NewPtr =
3023	Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: GEP.getPointerOperand(),
3024	IdxList: Idx1, Name: "", NW: IsInBounds);
3025	return replaceInstUsesWith(
3026	I&: GEP, V: Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: NewPtr, IdxList: Idx2, Name: "",
3027	NW: IsInBounds));
3028	}
3029	ConstantInt *C;
3030	if (match(V: GEP.getOperand(i_nocapture: `1`), P: m_OneUse(SubPattern: m_SExtLike(Op: m_OneUse(SubPattern: m_NSWAdd(
3031	L: m_Value(V&: Idx1), R: m_ConstantInt(CI&: C))))))) {
3032	// %add = add nsw i32 %idx1, idx2
3033	// %sidx = sext i32 %add to i64
3034	// %gep = getelementptr i32, ptr %ptr, i64 %sidx
3035	// as:
3036	// %newptr = getelementptr i32, ptr %ptr, i32 %idx1
3037	// %newgep = getelementptr i32, ptr %newptr, i32 idx2
3038	bool IsInBounds = CanPreserveInBounds (
3039	/IsNSW=/true, Idx1, C);
3040	auto *NewPtr = Builder.CreateGEP(
3041	Ty: GEP.getSourceElementType(), Ptr: GEP.getPointerOperand(),
3042	IdxList: Builder.CreateSExt(V: Idx1, DestTy: GEP.getOperand(i_nocapture: `1`)->getType()), Name: "",
3043	NW: IsInBounds);
3044	return replaceInstUsesWith(
3045	I&: GEP,
3046	V: Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: NewPtr,
3047	IdxList: Builder.CreateSExt(V: C, DestTy: GEP.getOperand(i_nocapture: `1`)->getType()),
3048	Name: "", NW: IsInBounds));
3049	}
3050	}
3051
3052	if (!GEP.isInBounds()) {
3053	unsigned IdxWidth =
3054	DL.getIndexSizeInBits(AS: PtrOp->getType()->getPointerAddressSpace());
3055	APInt BasePtrOffset(IdxWidth, `0`);
3056	Value *UnderlyingPtrOp =
3057	PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL,
3058	Offset&: BasePtrOffset);
3059	bool CanBeNull, CanBeFreed;
3060	uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes(
3061	DL, CanBeNull, CanBeFreed);
3062	if (!CanBeNull && !CanBeFreed && DerefBytes != `0`) {
3063	if (GEP.accumulateConstantOffset(DL, Offset&: BasePtrOffset) &&
3064	BasePtrOffset.isNonNegative()) {
3065	APInt AllocSize(IdxWidth, DerefBytes);
3066	if (BasePtrOffset.ule(RHS: AllocSize)) {
3067	return GetElementPtrInst::CreateInBounds(
3068	PointeeType: GEP.getSourceElementType(), Ptr: PtrOp, IdxList: Indices, NameStr: GEP.getName());
3069	}
3070	}
3071	}
3072	}
3073
3074	if (Instruction *R = foldSelectGEP(GEP, Builder))
3075	return R;
3076
3077	return nullptr;
3078	}
3079
3080	static bool isNeverEqualToUnescapedAlloc(Value V, const* TargetLibraryInfo &TLI,
3081	Instruction *AI) {
3082	if (isa<ConstantPointerNull>(Val: V))
3083	return true;
3084	if (auto *LI = dyn_cast<LoadInst>(Val: V))
3085	return isa<GlobalVariable>(Val: LI->getPointerOperand());
3086	// Two distinct allocations will never be equal.
3087	return isAllocLikeFn(V, TLI: &TLI) && V != AI;
3088	}
3089
3090	/// Given a call CB which uses an address UsedV, return true if we can prove the
3091	/// call's only possible effect is storing to V.
3092	static bool isRemovableWrite(CallBase &CB, Value *UsedV,
3093	const TargetLibraryInfo &TLI) {
3094	if (!CB.use_empty())
3095	// TODO: add recursion if returned attribute is present
3096	return false;
3097
3098	if (CB.isTerminator())
3099	// TODO: remove implementation restriction
3100	return false;
3101
3102	if (!CB.willReturn() \|\| !CB.doesNotThrow())
3103	return false;
3104
3105	// If the only possible side effect of the call is writing to the alloca,
3106	// and the result isn't used, we can safely remove any reads implied by the
3107	// call including those which might read the alloca itself.
3108	std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CI: &CB, TLI);
3109	return Dest && Dest ->Ptr == UsedV;
3110	}
3111
3112	static bool isAllocSiteRemovable(Instruction *AI,
3113	SmallVectorImpl<WeakTrackingVH> &Users,
3114	const TargetLibraryInfo &TLI) {
3115	SmallVector<Instruction*, `4`> Worklist;
3116	const std::optional<StringRef> Family = getAllocationFamily(I: AI, TLI: &TLI);
3117	Worklist.push_back(Elt: AI);
3118
3119	do {
3120	Instruction *PI = Worklist.pop_back_val();
3121	for (User *U : PI->users()) {
3122	Instruction *I = cast<Instruction>(Val: U);
3123	switch (I->getOpcode()) {
3124	default:
3125	// Give up the moment we see something we can't handle.
3126	return false;
3127
3128	case Instruction::AddrSpaceCast:
3129	case Instruction::BitCast:
3130	case Instruction::GetElementPtr:
3131	Users.emplace_back(Args&: I);
3132	Worklist.push_back(Elt: I);
3133	continue;
3134
3135	case Instruction::ICmp: {
3136	ICmpInst *ICI = cast<ICmpInst>(Val: I);
3137	// We can fold eq/ne comparisons with null to false/true, respectively.
3138	// We also fold comparisons in some conditions provided the alloc has
3139	// not escaped (see isNeverEqualToUnescapedAlloc).
3140	if (!ICI->isEquality())
3141	return false;
3142	unsigned OtherIndex = (ICI->getOperand(i_nocapture: `0`) == PI) ? `1` : `0`;
3143	if (!isNeverEqualToUnescapedAlloc(V: ICI->getOperand(i_nocapture: OtherIndex), TLI, AI))
3144	return false;
3145
3146	// Do not fold compares to aligned_alloc calls, as they may have to
3147	// return null in case the required alignment cannot be satisfied,
3148	// unless we can prove that both alignment and size are valid.
3149	auto AlignmentAndSizeKnownValid = [](CallBase *CB) {
3150	// Check if alignment and size of a call to aligned_alloc is valid,
3151	// that is alignment is a power-of-2 and the size is a multiple of the
3152	// alignment.
3153	const APInt *Alignment;
3154	const APInt *Size;
3155	return match(V: CB->getArgOperand(i: `0`), P: m_APInt(Res&: Alignment)) &&
3156	match(V: CB->getArgOperand(i: `1`), P: m_APInt(Res&: Size)) &&
3157	Alignment->isPowerOf2() && Size->urem(RHS: *Alignment).isZero();
3158	};
3159	auto *CB = dyn_cast<CallBase>(Val: AI);
3160	LibFunc TheLibFunc;
3161	if (CB && TLI.getLibFunc(FDecl: *CB->getCalledFunction(), F&: TheLibFunc) &&
3162	TLI.has(F: TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc &&
3163	!AlignmentAndSizeKnownValid (CB))
3164	return false;
3165	Users.emplace_back(Args&: I);
3166	continue;
3167	}
3168
3169	case Instruction::Call:
3170	// Ignore no-op and store intrinsics.
3171	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
3172	switch (II->getIntrinsicID()) {
3173	default:
3174	return false;
3175
3176	case Intrinsic::memmove:
3177	case Intrinsic::memcpy:
3178	case Intrinsic::memset: {
3179	MemIntrinsic *MI = cast<MemIntrinsic>(Val: II);
3180	if (MI->isVolatile() \|\| MI->getRawDest() != PI)
3181	return false;
3182	[[fallthrough]];
3183	}
3184	case Intrinsic::assume:
3185	case Intrinsic::invariant_start:
3186	case Intrinsic::invariant_end:
3187	case Intrinsic::lifetime_start:
3188	case Intrinsic::lifetime_end:
3189	case Intrinsic::objectsize:
3190	Users.emplace_back(Args&: I);
3191	continue;
3192	case Intrinsic::launder_invariant_group:
3193	case Intrinsic::strip_invariant_group:
3194	Users.emplace_back(Args&: I);
3195	Worklist.push_back(Elt: I);
3196	continue;
3197	}
3198	}
3199
3200	if (isRemovableWrite(CB&: *cast<CallBase>(Val: I), UsedV: PI, TLI)) {
3201	Users.emplace_back(Args&: I);
3202	continue;
3203	}
3204
3205	if (getFreedOperand(CB: cast<CallBase>(Val: I), TLI: &TLI) == PI &&
3206	getAllocationFamily(I, TLI: &TLI) == Family) {
3207	assert(Family);
3208	Users.emplace_back(Args&: I);
3209	continue;
3210	}
3211
3212	if (getReallocatedOperand(CB: cast<CallBase>(Val: I)) == PI &&
3213	getAllocationFamily(I, TLI: &TLI) == Family) {
3214	assert(Family);
3215	Users.emplace_back(Args&: I);
3216	Worklist.push_back(Elt: I);
3217	continue;
3218	}
3219
3220	return false;
3221
3222	case Instruction::Store: {
3223	StoreInst *SI = cast<StoreInst>(Val: I);
3224	if (SI->isVolatile() \|\| SI->getPointerOperand() != PI)
3225	return false;
3226	Users.emplace_back(Args&: I);
3227	continue;
3228	}
3229	}
3230	llvm_unreachable("missing a return?");
3231	}
3232	} while (!Worklist.empty());
3233	return true;
3234	}
3235
3236	Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
3237	assert(isa<AllocaInst>(MI) \|\| isRemovableAlloc(&cast<CallBase>(MI), &TLI));
3238
3239	// If we have a malloc call which is only used in any amount of comparisons to
3240	// null and free calls, delete the calls and replace the comparisons with true
3241	// or false as appropriate.
3242
3243	// This is based on the principle that we can substitute our own allocation
3244	// function (which will never return null) rather than knowledge of the
3245	// specific function being called. In some sense this can change the permitted
3246	// outputs of a program (when we convert a malloc to an alloca, the fact that
3247	// the allocation is now on the stack is potentially visible, for example),
3248	// but we believe in a permissible manner.
3249	SmallVector<WeakTrackingVH, `64`> Users;
3250
3251	// If we are removing an alloca with a dbg.declare, insert dbg.value calls
3252	// before each store.
3253	SmallVector<DbgVariableIntrinsic *, `8`> DVIs;
3254	SmallVector<DbgVariableRecord *, `8`> DVRs;
3255	std::unique_ptr<DIBuilder> DIB;
3256	if (isa<AllocaInst>(Val: MI)) {
3257	findDbgUsers(DbgInsts&: DVIs, V: &MI, DbgVariableRecords: &DVRs);
3258	DIB.reset(p: new DIBuilder (MI.getModule(), /AllowUnresolved=/*false));
3259	}
3260
3261	if (isAllocSiteRemovable(AI: &MI, Users, TLI)) {
3262	for (unsigned i = `0`, e = Users.size(); i != e; ++i) {
3263	// Lowering all @llvm.objectsize calls first because they may
3264	// use a bitcast/GEP of the alloca we are removing.
3265	if (!Users [i])
3266	continue;
3267
3268	Instruction I = cast<Instruction>(Val: &Users [i]);
3269
3270	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
3271	if (II->getIntrinsicID() == Intrinsic::objectsize) {
3272	SmallVector<Instruction *> InsertedInstructions;
3273	Value *Result = lowerObjectSizeCall(
3274	ObjectSize: II, DL, TLI: &TLI, AA, /MustSucceed=/true, InsertedInstructions: &InsertedInstructions);
3275	for (Instruction *Inserted : InsertedInstructions)
3276	Worklist.add(I: Inserted);
3277	replaceInstUsesWith(I&: *I, V: Result);
3278	eraseInstFromFunction(I&: *I);
3279	Users [i] = nullptr; // Skip examining in the next loop.
3280	}
3281	}
3282	}
3283	for (unsigned i = `0`, e = Users.size(); i != e; ++i) {
3284	if (!Users [i])
3285	continue;
3286
3287	Instruction I = cast<Instruction>(Val: &Users [i]);
3288
3289	if (ICmpInst *C = dyn_cast<ICmpInst>(Val: I)) {
3290	replaceInstUsesWith(I&: *C,
3291	V: ConstantInt::get(Ty: Type::getInt1Ty(C&: C->getContext()),
3292	V: C->isFalseWhenEqual()));
3293	} else if (auto *SI = dyn_cast<StoreInst>(Val: I)) {
3294	for (auto *DVI : DVIs)
3295	if (DVI->isAddressOfVariable())
3296	ConvertDebugDeclareToDebugValue(DII: DVI, SI, Builder&: *DIB);
3297	for (auto *DVR : DVRs)
3298	if (DVR->isAddressOfVariable())
3299	ConvertDebugDeclareToDebugValue(DVR, SI, Builder&: *DIB);
3300	} else {
3301	// Casts, GEP, or anything else: we're about to delete this instruction,
3302	// so it can not have any valid uses.
3303	replaceInstUsesWith(I&: *I, V: PoisonValue::get(T: I->getType()));
3304	}
3305	eraseInstFromFunction(I&: *I);
3306	}
3307
3308	if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &MI)) {
3309	// Replace invoke with a NOP intrinsic to maintain the original CFG
3310	Module *M = II->getModule();
3311	Function *F = Intrinsic::getDeclaration(M, id: Intrinsic::donothing);
3312	InvokeInst::Create(Func: F, IfNormal: II->getNormalDest(), IfException: II->getUnwindDest(),
3313	Args: std::nullopt, NameStr: "", InsertBefore: II->getParent());
3314	}
3315
3316	// Remove debug intrinsics which describe the value contained within the
3317	// alloca. In addition to removing dbg.{declare,addr} which simply point to
3318	// the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
3319	//
3320	// ```
3321	// define void @foo(i32 %0) {
3322	// %a = alloca i32 ; Deleted.
3323	// store i32 %0, i32 %a*
3324	// dbg.value(i32 %0, "arg0") ; Not deleted.
3325	// dbg.value(i32 %a, "arg0", DW_OP_deref) ; Deleted.*
3326	// call void @trivially_inlinable_no_op(i32 %a)*
3327	// ret void
3328	// }
3329	// ```
3330	//
3331	// This may not be required if we stop describing the contents of allocas
3332	// using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
3333	// the LowerDbgDeclare utility.
3334	//
3335	// If there is a dead store to `%a` in @trivially_inlinable_no_op, the
3336	// "arg0" dbg.value may be stale after the call. However, failing to remove
3337	// the DW_OP_deref dbg.value causes large gaps in location coverage.
3338	//
3339	// FIXME: the Assignment Tracking project has now likely made this
3340	// redundant (and it's sometimes harmful).
3341	for (auto *DVI : DVIs)
3342	if (DVI->isAddressOfVariable() \|\| DVI->getExpression()->startsWithDeref())
3343	DVI->eraseFromParent();
3344	for (auto *DVR : DVRs)
3345	if (DVR->isAddressOfVariable() \|\| DVR->getExpression()->startsWithDeref())
3346	DVR->eraseFromParent();
3347
3348	return eraseInstFromFunction(I&: MI);
3349	}
3350	return nullptr;
3351	}
3352
3353	/// Move the call to free before a NULL test.
3354	///
3355	/// Check if this free is accessed after its argument has been test
3356	/// against NULL (property 0).
3357	/// If yes, it is legal to move this call in its predecessor block.
3358	///
3359	/// The move is performed only if the block containing the call to free
3360	/// will be removed, i.e.:
3361	/// 1. it has only one predecessor P, and P has two successors
3362	/// 2. it contains the call, noops, and an unconditional branch
3363	/// 3. its successor is the same as its predecessor's successor
3364	///
3365	/// The profitability is out-of concern here and this function should
3366	/// be called only if the caller knows this transformation would be
3367	/// profitable (e.g., for code size).
3368	static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI,
3369	const DataLayout &DL) {
3370	Value *Op = FI.getArgOperand(i: `0`);
3371	BasicBlock *FreeInstrBB = FI.getParent();
3372	BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
3373
3374	// Validate part of constraint #1: Only one predecessor
3375	// FIXME: We can extend the number of predecessor, but in that case, we
3376	// would duplicate the call to free in each predecessor and it may
3377	// not be profitable even for code size.
3378	if (!PredBB)
3379	return nullptr;
3380
3381	// Validate constraint #2: Does this block contains only the call to
3382	// free, noops, and an unconditional branch?
3383	BasicBlock *SuccBB;
3384	Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
3385	if (!match(V: FreeInstrBBTerminator, P: m_UnconditionalBr(Succ&: SuccBB)))
3386	return nullptr;
3387
3388	// If there are only 2 instructions in the block, at this point,
3389	// this is the call to free and unconditional.
3390	// If there are more than 2 instructions, check that they are noops
3391	// i.e., they won't hurt the performance of the generated code.
3392	if (FreeInstrBB->size() != `2`) {
3393	for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) {
3394	if (&Inst == &FI \|\| &Inst == FreeInstrBBTerminator)
3395	continue;
3396	auto *Cast = dyn_cast<CastInst>(Val: &Inst);
3397	if (!Cast \|\| !Cast->isNoopCast(DL))
3398	return nullptr;
3399	}
3400	}
3401	// Validate the rest of constraint #1 by matching on the pred branch.
3402	Instruction *TI = PredBB->getTerminator();
3403	BasicBlock TrueBB, FalseBB;
3404	ICmpInst::Predicate Pred;
3405	if (!match(V: TI, P: m_Br(C: m_ICmp(Pred,
3406	L: m_CombineOr(L: m_Specific(V: Op),
3407	R: m_Specific(V: Op->stripPointerCasts())),
3408	R: m_Zero()),
3409	T&: TrueBB, F&: FalseBB)))
3410	return nullptr;
3411	if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
3412	return nullptr;
3413
3414	// Validate constraint #3: Ensure the null case just falls through.
3415	if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
3416	return nullptr;
3417	assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
3418	"Broken CFG: missing edge from predecessor to successor");
3419
3420	// At this point, we know that everything in FreeInstrBB can be moved
3421	// before TI.
3422	for (Instruction &Instr : llvm::make_early_inc_range(Range&: *FreeInstrBB)) {
3423	if (&Instr == FreeInstrBBTerminator)
3424	break;
3425	Instr.moveBeforePreserving(MovePos: TI);
3426	}
3427	assert(FreeInstrBB->size() == `1` &&
3428	"Only the branch instruction should remain");
3429
3430	// Now that we've moved the call to free before the NULL check, we have to
3431	// remove any attributes on its parameter that imply it's non-null, because
3432	// those attributes might have only been valid because of the NULL check, and
3433	// we can get miscompiles if we keep them. This is conservative if non-null is
3434	// also implied by something other than the NULL check, but it's guaranteed to
3435	// be correct, and the conservativeness won't matter in practice, since the
3436	// attributes are irrelevant for the call to free itself and the pointer
3437	// shouldn't be used after the call.
3438	AttributeList Attrs = FI.getAttributes();
3439	Attrs = Attrs.removeParamAttribute(C&: FI.getContext(), ArgNo: `0`, Kind: Attribute::NonNull);
3440	Attribute Dereferenceable = Attrs.getParamAttr(ArgNo: `0`, Kind: Attribute::Dereferenceable);
3441	if (Dereferenceable.isValid()) {
3442	uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
3443	Attrs = Attrs.removeParamAttribute(C&: FI.getContext(), ArgNo: `0`,
3444	Kind: Attribute::Dereferenceable);
3445	Attrs = Attrs.addDereferenceableOrNullParamAttr(C&: FI.getContext(), ArgNo: `0`, Bytes);
3446	}
3447	FI.setAttributes(Attrs);
3448
3449	return &FI;
3450	}
3451
3452	Instruction InstCombinerImpl::visitFree(CallInst &FI, Value Op) {
3453	// free undef -> unreachable.
3454	if (isa<UndefValue>(Val: Op)) {
3455	// Leave a marker since we can't modify the CFG here.
3456	CreateNonTerminatorUnreachable(InsertAt: &FI);
3457	return eraseInstFromFunction(I&: FI);
3458	}
3459
3460	// If we have 'free null' delete the instruction. This can happen in stl code
3461	// when lots of inlining happens.
3462	if (isa<ConstantPointerNull>(Val: Op))
3463	return eraseInstFromFunction(I&: FI);
3464
3465	// If we had free(realloc(...)) with no intervening uses, then eliminate the
3466	// realloc() entirely.
3467	CallInst *CI = dyn_cast<CallInst>(Val: Op);
3468	if (CI && CI->hasOneUse())
3469	if (Value *ReallocatedOp = getReallocatedOperand(CB: CI))
3470	return eraseInstFromFunction(I&: replaceInstUsesWith(I&: CI, V: ReallocatedOp));
3471
3472	// If we optimize for code size, try to move the call to free before the null
3473	// test so that simplify cfg can remove the empty block and dead code
3474	// elimination the branch. I.e., helps to turn something like:
3475	// if (foo) free(foo);
3476	// into
3477	// free(foo);
3478	//
3479	// Note that we can only do this for 'free' and not for any flavor of
3480	// 'operator delete'; there is no 'operator delete' symbol for which we are
3481	// permitted to invent a call, even if we're passing in a null pointer.
3482	if (MinimizeSize) {
3483	LibFunc Func;
3484	if (TLI.getLibFunc(CB: FI, F&: Func) && TLI.has(F: Func) && Func == LibFunc_free)
3485	if (Instruction *I = tryToMoveFreeBeforeNullTest(FI, DL))
3486	return I;
3487	}
3488
3489	return nullptr;
3490	}
3491
3492	Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) {
3493	Value *RetVal = RI.getReturnValue();
3494	if (!RetVal \|\| !AttributeFuncs::isNoFPClassCompatibleType(Ty: RetVal->getType()))
3495	return nullptr;
3496
3497	Function *F = RI.getFunction();
3498	FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass();
3499	if (ReturnClass == fcNone)
3500	return nullptr;
3501
3502	KnownFPClass KnownClass;
3503	Value *Simplified =
3504	SimplifyDemandedUseFPClass(V: RetVal, DemandedMask: ~ReturnClass, Known&: KnownClass, Depth: `0`, CxtI: &RI);
3505	if (!Simplified)
3506	return nullptr;
3507
3508	return ReturnInst::Create(C&: RI.getContext(), retVal: Simplified);
3509	}
3510
3511	// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
3512	bool InstCombinerImpl::removeInstructionsBeforeUnreachable(Instruction &I) {
3513	// Try to remove the previous instruction if it must lead to unreachable.
3514	// This includes instructions like stores and "llvm.assume" that may not get
3515	// removed by simple dead code elimination.
3516	bool Changed = false;
3517	while (Instruction *Prev = I.getPrevNonDebugInstruction()) {
3518	// While we theoretically can erase EH, that would result in a block that
3519	// used to start with an EH no longer starting with EH, which is invalid.
3520	// To make it valid, we'd need to fixup predecessors to no longer refer to
3521	// this block, but that changes CFG, which is not allowed in InstCombine.
3522	if (Prev->isEHPad())
3523	break; // Can not drop any more instructions. We're done here.
3524
3525	if (!isGuaranteedToTransferExecutionToSuccessor(I: Prev))
3526	break; // Can not drop any more instructions. We're done here.
3527	// Otherwise, this instruction can be freely erased,
3528	// even if it is not side-effect free.
3529
3530	// A value may still have uses before we process it here (for example, in
3531	// another unreachable block), so convert those to poison.
3532	replaceInstUsesWith(I&: *Prev, V: PoisonValue::get(T: Prev->getType()));
3533	eraseInstFromFunction(I&: *Prev);
3534	Changed = true;
3535	}
3536	return Changed;
3537	}
3538
3539	Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) {
3540	removeInstructionsBeforeUnreachable(I);
3541	return nullptr;
3542	}
3543
3544	Instruction *InstCombinerImpl::visitUnconditionalBranchInst(BranchInst &BI) {
3545	assert(BI.isUnconditional() && "Only for unconditional branches.");
3546
3547	// If this store is the second-to-last instruction in the basic block
3548	// (excluding debug info and bitcasts of pointers) and if the block ends with
3549	// an unconditional branch, try to move the store to the successor block.
3550
3551	auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
3552	auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) {
3553	return BBI ->isDebugOrPseudoInst() \|\|
3554	(isa<BitCastInst>(Val: BBI) && BBI ->getType()->isPointerTy());
3555	};
3556
3557	BasicBlock::iterator FirstInstr = BBI ->getParent()->begin();
3558	do {
3559	if (BBI != FirstInstr)
3560	--BBI;
3561	} while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI));
3562
3563	return dyn_cast<StoreInst>(Val&: BBI);
3564	};
3565
3566	if (StoreInst *SI = GetLastSinkableStore (BasicBlock::iterator (BI)))
3567	if (mergeStoreIntoSuccessor(SI&: *SI))
3568	return &BI;
3569
3570	return nullptr;
3571	}
3572
3573	void InstCombinerImpl::addDeadEdge(BasicBlock From, BasicBlock To,
3574	SmallVectorImpl<BasicBlock *> &Worklist) {
3575	if (!DeadEdges.insert(V: {From, To}).second)
3576	return;
3577
3578	// Replace phi node operands in successor with poison.
3579	for (PHINode &PN : To->phis())
3580	for (Use &U : PN.incoming_values())
3581	if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(Val: U)) {
3582	replaceUse(U, NewValue: PoisonValue::get(T: PN.getType()));
3583	addToWorklist(I: &PN);
3584	MadeIRChange = true;
3585	}
3586
3587	Worklist.push_back(Elt: To);
3588	}
3589
3590	// Under the assumption that I is unreachable, remove it and following
3591	// instructions. Changes are reported directly to MadeIRChange.
3592	void InstCombinerImpl::handleUnreachableFrom(
3593	Instruction I, SmallVectorImpl<BasicBlock > &Worklist) {
3594	BasicBlock *BB = I->getParent();
3595	for (Instruction &Inst : make_early_inc_range(
3596	Range: make_range(x: std::next(x: BB->getTerminator()->getReverseIterator()),
3597	y: std::next(x: I->getReverseIterator())))) {
3598	if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {
3599	replaceInstUsesWith(I&: Inst, V: PoisonValue::get(T: Inst.getType()));
3600	MadeIRChange = true;
3601	}
3602	if (Inst.isEHPad() \|\| Inst.getType()->isTokenTy())
3603	continue;
3604	// RemoveDIs: erase debug-info on this instruction manually.
3605	Inst.dropDbgRecords();
3606	eraseInstFromFunction(I&: Inst);
3607	MadeIRChange = true;
3608	}
3609
3610	SmallVector<Value *> Changed;
3611	if (handleUnreachableTerminator(I: BB->getTerminator(), PoisonedValues&: Changed)) {
3612	MadeIRChange = true;
3613	for (Value *V : Changed)
3614	addToWorklist(I: cast<Instruction>(Val: V));
3615	}
3616
3617	// Handle potentially dead successors.
3618	for (BasicBlock *Succ : successors(BB))
3619	addDeadEdge(From: BB, To: Succ, Worklist);
3620	}
3621
3622	void InstCombinerImpl::handlePotentiallyDeadBlocks(
3623	SmallVectorImpl<BasicBlock *> &Worklist) {
3624	while (!Worklist.empty()) {
3625	BasicBlock *BB = Worklist.pop_back_val();
3626	if (!all_of(Range: predecessors(BB), P: [&](BasicBlock *Pred) {
3627	return DeadEdges.contains(V: {Pred, BB}) \|\| DT.dominates(A: BB, B: Pred);
3628	}))
3629	continue;
3630
3631	handleUnreachableFrom(I: &BB->front(), Worklist);
3632	}
3633	}
3634
3635	void InstCombinerImpl::handlePotentiallyDeadSuccessors(BasicBlock *BB,
3636	BasicBlock *LiveSucc) {
3637	SmallVector<BasicBlock *> Worklist;
3638	for (BasicBlock *Succ : successors(BB)) {
3639	// The live successor isn't dead.
3640	if (Succ == LiveSucc)
3641	continue;
3642
3643	addDeadEdge(From: BB, To: Succ, Worklist);
3644	}
3645
3646	handlePotentiallyDeadBlocks(Worklist);
3647	}
3648
3649	Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) {
3650	if (BI.isUnconditional())
3651	return visitUnconditionalBranchInst(BI);
3652
3653	// Change br (not X), label True, label False to: br X, label False, True
3654	Value *Cond = BI.getCondition();
3655	Value *X;
3656	if (match(V: Cond, P: m_Not(V: m_Value(V&: X))) && !isa<Constant>(Val: X)) {
3657	// Swap Destinations and condition...
3658	BI.swapSuccessors();
3659	if (BPI)
3660	BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
3661	return replaceOperand(I&: BI, OpNum: `0`, V: X);
3662	}
3663
3664	// Canonicalize logical-and-with-invert as logical-or-with-invert.
3665	// This is done by inverting the condition and swapping successors:
3666	// br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X \|\| Y), F, T
3667	Value *Y;
3668	if (isa<SelectInst>(Val: Cond) &&
3669	match(V: Cond,
3670	P: m_OneUse(SubPattern: m_LogicalAnd(L: m_Value(V&: X), R: m_OneUse(SubPattern: m_Not(V: m_Value(V&: Y))))))) {
3671	Value *NotX = Builder.CreateNot(V: X, Name: "not." + X->getName());
3672	Value *Or = Builder.CreateLogicalOr(Cond1: NotX, Cond2: Y);
3673	BI.swapSuccessors();
3674	if (BPI)
3675	BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
3676	return replaceOperand(I&: BI, OpNum: `0`, V: Or);
3677	}
3678
3679	// If the condition is irrelevant, remove the use so that other
3680	// transforms on the condition become more effective.
3681	if (!isa<ConstantInt>(Val: Cond) && BI.getSuccessor(i: `0`) == BI.getSuccessor(i: `1`))
3682	return replaceOperand(I&: BI, OpNum: `0`, V: ConstantInt::getFalse(Ty: Cond->getType()));
3683
3684	// Canonicalize, for example, fcmp_one -> fcmp_oeq.
3685	CmpInst::Predicate Pred;
3686	if (match(V: Cond, P: m_OneUse(SubPattern: m_FCmp(Pred, L: m_Value(), R: m_Value()))) &&
3687	!isCanonicalPredicate(Pred)) {
3688	// Swap destinations and condition.
3689	auto *Cmp = cast<CmpInst>(Val: Cond);
3690	Cmp->setPredicate(CmpInst::getInversePredicate(pred: Pred));
3691	BI.swapSuccessors();
3692	if (BPI)
3693	BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
3694	Worklist.push(I: Cmp);
3695	return &BI;
3696	}
3697
3698	if (isa<UndefValue>(Val: Cond)) {
3699	handlePotentiallyDeadSuccessors(BB: BI.getParent(), /LiveSucc/ nullptr);
3700	return nullptr;
3701	}
3702	if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) {
3703	handlePotentiallyDeadSuccessors(BB: BI.getParent(),
3704	LiveSucc: BI.getSuccessor(i: !CI->getZExtValue()));
3705	return nullptr;
3706	}
3707
3708	DC.registerBranch(BI: &BI);
3709	return nullptr;
3710	}
3711
3712	// Replaces (switch (select cond, X, C)/(select cond, C, X)) with (switch X) if
3713	// we can prove that both (switch C) and (switch X) go to the default when cond
3714	// is false/true.
3715	static Value *simplifySwitchOnSelectUsingRanges(SwitchInst &SI,
3716	SelectInst *Select,
3717	bool IsTrueArm) {
3718	unsigned CstOpIdx = IsTrueArm ? `1` : `2`;
3719	auto *C = dyn_cast<ConstantInt>(Val: Select->getOperand(i_nocapture: CstOpIdx));
3720	if (!C)
3721	return nullptr;
3722
3723	BasicBlock *CstBB = SI.findCaseValue(C)->getCaseSuccessor();
3724	if (CstBB != SI.getDefaultDest())
3725	return nullptr;
3726	Value *X = Select->getOperand(i_nocapture: `3` - CstOpIdx);
3727	ICmpInst::Predicate Pred;
3728	const APInt *RHSC;
3729	if (!match(V: Select->getCondition(),
3730	P: m_ICmp(Pred, L: m_Specific(V: X), R: m_APInt(Res&: RHSC))))
3731	return nullptr;
3732	if (IsTrueArm)
3733	Pred = ICmpInst::getInversePredicate(pred: Pred);
3734
3735	// See whether we can replace the select with X
3736	ConstantRange CR = ConstantRange::makeExactICmpRegion(Pred, Other: *RHSC);
3737	for (auto Case : SI.cases())
3738	if (!CR.contains(Val: Case.getCaseValue()->getValue()))
3739	return nullptr;
3740
3741	return X;
3742	}
3743
3744	Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) {
3745	Value *Cond = SI.getCondition();
3746	Value *Op0;
3747	ConstantInt *AddRHS;
3748	if (match(V: Cond, P: m_Add(L: m_Value(V&: Op0), R: m_ConstantInt(CI&: AddRHS)))) {
3749	// Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
3750	for (auto Case : SI.cases()) {
3751	Constant *NewCase = ConstantExpr::getSub(C1: Case.getCaseValue(), C2: AddRHS);
3752	assert(isa<ConstantInt>(NewCase) &&
3753	"Result of expression should be constant");
3754	Case.setValue(cast<ConstantInt>(Val: NewCase));
3755	}
3756	return replaceOperand(I&: SI, OpNum: `0`, V: Op0);
3757	}
3758
3759	ConstantInt *SubLHS;
3760	if (match(V: Cond, P: m_Sub(L: m_ConstantInt(CI&: SubLHS), R: m_Value(V&: Op0)))) {
3761	// Change 'switch (1-X) case 1:' into 'switch (X) case 0'.
3762	for (auto Case : SI.cases()) {
3763	Constant *NewCase = ConstantExpr::getSub(C1: SubLHS, C2: Case.getCaseValue());
3764	assert(isa<ConstantInt>(NewCase) &&
3765	"Result of expression should be constant");
3766	Case.setValue(cast<ConstantInt>(Val: NewCase));
3767	}
3768	return replaceOperand(I&: SI, OpNum: `0`, V: Op0);
3769	}
3770
3771	uint64_t ShiftAmt;
3772	if (match(V: Cond, P: m_Shl(L: m_Value(V&: Op0), R: m_ConstantInt(V&: ShiftAmt))) &&
3773	ShiftAmt < Op0->getType()->getScalarSizeInBits() &&
3774	all_of(Range: SI.cases(), P: [&](const auto &Case) {
3775	return Case.getCaseValue()->getValue().countr_zero() >= ShiftAmt;
3776	})) {
3777	// Change 'switch (X << 2) case 4:' into 'switch (X) case 1:'.
3778	OverflowingBinaryOperator *Shl = cast<OverflowingBinaryOperator>(Val: Cond);
3779	if (Shl->hasNoUnsignedWrap() \|\| Shl->hasNoSignedWrap() \|\|
3780	Shl->hasOneUse()) {
3781	Value *NewCond = Op0;
3782	if (!Shl->hasNoUnsignedWrap() && !Shl->hasNoSignedWrap()) {
3783	// If the shift may wrap, we need to mask off the shifted bits.
3784	unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
3785	NewCond = Builder.CreateAnd(
3786	LHS: Op0, RHS: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - ShiftAmt));
3787	}
3788	for (auto Case : SI.cases()) {
3789	const APInt &CaseVal = Case.getCaseValue()->getValue();
3790	APInt ShiftedCase = Shl->hasNoSignedWrap() ? CaseVal.ashr(ShiftAmt)
3791	: CaseVal.lshr(shiftAmt: ShiftAmt);
3792	Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: ShiftedCase));
3793	}
3794	return replaceOperand(I&: SI, OpNum: `0`, V: NewCond);
3795	}
3796	}
3797
3798	// Fold switch(zext/sext(X)) into switch(X) if possible.
3799	if (match(V: Cond, P: m_ZExtOrSExt(Op: m_Value(V&: Op0)))) {
3800	bool IsZExt = isa<ZExtInst>(Val: Cond);
3801	Type *SrcTy = Op0->getType();
3802	unsigned NewWidth = SrcTy->getScalarSizeInBits();
3803
3804	if (all_of(Range: SI.cases(), P: [&](const auto &Case) {
3805	const APInt &CaseVal = Case.getCaseValue()->getValue();
3806	return IsZExt ? CaseVal.isIntN(N: NewWidth)
3807	: CaseVal.isSignedIntN(N: NewWidth);
3808	})) {
3809	for (auto &Case : SI.cases()) {
3810	APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(width: NewWidth);
3811	Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: TruncatedCase));
3812	}
3813	return replaceOperand(I&: SI, OpNum: `0`, V: Op0);
3814	}
3815	}
3816
3817	// Fold switch(select cond, X, Y) into switch(X/Y) if possible
3818	if (auto *Select = dyn_cast<SelectInst>(Val: Cond)) {
3819	if (Value *V =
3820	simplifySwitchOnSelectUsingRanges(SI, Select, /IsTrueArm=/true))
3821	return replaceOperand(I&: SI, OpNum: `0`, V);
3822	if (Value *V =
3823	simplifySwitchOnSelectUsingRanges(SI, Select, /IsTrueArm=/false))
3824	return replaceOperand(I&: SI, OpNum: `0`, V);
3825	}
3826
3827	KnownBits Known = computeKnownBits(V: Cond, Depth: `0`, CxtI: &SI);
3828	unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
3829	unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
3830
3831	// Compute the number of leading bits we can ignore.
3832	// TODO: A better way to determine this would use ComputeNumSignBits().
3833	for (const auto &C : SI.cases()) {
3834	LeadingKnownZeros =
3835	std::min(a: LeadingKnownZeros, b: C.getCaseValue()->getValue().countl_zero());
3836	LeadingKnownOnes =
3837	std::min(a: LeadingKnownOnes, b: C.getCaseValue()->getValue().countl_one());
3838	}
3839
3840	unsigned NewWidth = Known.getBitWidth() - std::max(a: LeadingKnownZeros, b: LeadingKnownOnes);
3841
3842	// Shrink the condition operand if the new type is smaller than the old type.
3843	// But do not shrink to a non-standard type, because backend can't generate
3844	// good code for that yet.
3845	// TODO: We can make it aggressive again after fixing PR39569.
3846	if (NewWidth > `0` && NewWidth < Known.getBitWidth() &&
3847	shouldChangeType(FromWidth: Known.getBitWidth(), ToWidth: NewWidth)) {
3848	IntegerType *Ty = IntegerType::get(C&: SI.getContext(), NumBits: NewWidth);
3849	Builder.SetInsertPoint(&SI);
3850	Value *NewCond = Builder.CreateTrunc(V: Cond, DestTy: Ty, Name: "trunc");
3851
3852	for (auto Case : SI.cases()) {
3853	APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(width: NewWidth);
3854	Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: TruncatedCase));
3855	}
3856	return replaceOperand(I&: SI, OpNum: `0`, V: NewCond);
3857	}
3858
3859	if (isa<UndefValue>(Val: Cond)) {
3860	handlePotentiallyDeadSuccessors(BB: SI.getParent(), /LiveSucc/ nullptr);
3861	return nullptr;
3862	}
3863	if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) {
3864	handlePotentiallyDeadSuccessors(BB: SI.getParent(),
3865	LiveSucc: SI.findCaseValue(C: CI)->getCaseSuccessor());
3866	return nullptr;
3867	}
3868
3869	return nullptr;
3870	}
3871
3872	Instruction *
3873	InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
3874	auto *WO = dyn_cast<WithOverflowInst>(Val: EV.getAggregateOperand());
3875	if (!WO)
3876	return nullptr;
3877
3878	Intrinsic::ID OvID = WO->getIntrinsicID();
3879	const APInt C = nullptr*;
3880	if (match(V: WO->getRHS(), P: m_APIntAllowPoison(Res&: C))) {
3881	if (*EV.idx_begin() == `0` && (OvID == Intrinsic::smul_with_overflow \|\|
3882	OvID == Intrinsic::umul_with_overflow)) {
3883	// extractvalue (any_mul_with_overflow X, -1), 0 --> -X
3884	if (C->isAllOnes())
3885	return BinaryOperator::CreateNeg(Op: WO->getLHS());
3886	// extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
3887	if (C->isPowerOf2()) {
3888	return BinaryOperator::CreateShl(
3889	V1: WO->getLHS(),
3890	V2: ConstantInt::get(Ty: WO->getLHS()->getType(), V: C->logBase2()));
3891	}
3892	}
3893	}
3894
3895	// We're extracting from an overflow intrinsic. See if we're the only user.
3896	// That allows us to simplify multiple result intrinsics to simpler things
3897	// that just get one value.
3898	if (!WO->hasOneUse())
3899	return nullptr;
3900
3901	// Check if we're grabbing only the result of a 'with overflow' intrinsic
3902	// and replace it with a traditional binary instruction.
3903	if (*EV.idx_begin() == `0`) {
3904	Instruction::BinaryOps BinOp = WO->getBinaryOp();
3905	Value LHS = WO->getLHS(), RHS = WO->getRHS();
3906	// Replace the old instruction's uses with poison.
3907	replaceInstUsesWith(I&: *WO, V: PoisonValue::get(T: WO->getType()));
3908	eraseInstFromFunction(I&: *WO);
3909	return BinaryOperator::Create(Op: BinOp, S1: LHS, S2: RHS);
3910	}
3911
3912	assert(*EV.idx_begin() == `1` && "Unexpected extract index for overflow inst");
3913
3914	// (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
3915	if (OvID == Intrinsic::usub_with_overflow)
3916	return new ICmpInst (ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
3917
3918	// smul with i1 types overflows when both sides are set: -1 -1 == +1, but*
3919	// +1 is not possible because we assume signed values.
3920	if (OvID == Intrinsic::smul_with_overflow &&
3921	WO->getLHS()->getType()->isIntOrIntVectorTy(BitWidth: `1`))
3922	return BinaryOperator::CreateAnd(V1: WO->getLHS(), V2: WO->getRHS());
3923
3924	// extractvalue (umul_with_overflow X, X), 1 -> X u> 2^(N/2)-1
3925	if (OvID == Intrinsic::umul_with_overflow && WO->getLHS() == WO->getRHS()) {
3926	unsigned BitWidth = WO->getLHS()->getType()->getScalarSizeInBits();
3927	// Only handle even bitwidths for performance reasons.
3928	if (BitWidth % `2` == `0`)
3929	return new ICmpInst (
3930	ICmpInst::ICMP_UGT, WO->getLHS(),
3931	ConstantInt::get(Ty: WO->getLHS()->getType(),
3932	V: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth / `2`)));
3933	}
3934
3935	// If only the overflow result is used, and the right hand side is a
3936	// constant (or constant splat), we can remove the intrinsic by directly
3937	// checking for overflow.
3938	if (C) {
3939	// Compute the no-wrap range for LHS given RHS=C, then construct an
3940	// equivalent icmp, potentially using an offset.
3941	ConstantRange NWR = ConstantRange::makeExactNoWrapRegion(
3942	BinOp: WO->getBinaryOp(), Other: *C, NoWrapKind: WO->getNoWrapKind());
3943
3944	CmpInst::Predicate Pred;
3945	APInt NewRHSC, Offset;
3946	NWR.getEquivalentICmp(Pred, RHS&: NewRHSC, Offset);
3947	auto *OpTy = WO->getRHS()->getType();
3948	auto *NewLHS = WO->getLHS();
3949	if (Offset != `0`)
3950	NewLHS = Builder.CreateAdd(LHS: NewLHS, RHS: ConstantInt::get(Ty: OpTy, V: Offset));
3951	return new ICmpInst (ICmpInst::getInversePredicate(pred: Pred), NewLHS,
3952	ConstantInt::get(Ty: OpTy, V: NewRHSC));
3953	}
3954
3955	return nullptr;
3956	}
3957
3958	Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) {
3959	Value *Agg = EV.getAggregateOperand();
3960
3961	if (!EV.hasIndices())
3962	return replaceInstUsesWith(I&: EV, V: Agg);
3963
3964	if (Value *V = simplifyExtractValueInst(Agg, Idxs: EV.getIndices(),
3965	Q: SQ.getWithInstruction(I: &EV)))
3966	return replaceInstUsesWith(I&: EV, V);
3967
3968	if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Val: Agg)) {
3969	// We're extracting from an insertvalue instruction, compare the indices
3970	const unsigned exti, exte, insi, inse;
3971	for (exti = EV.idx_begin(), insi = IV->idx_begin(),
3972	exte = EV.idx_end(), inse = IV->idx_end();
3973	exti != exte && insi != inse;
3974	++exti, ++insi) {
3975	if (insi != exti)
3976	// The insert and extract both reference distinctly different elements.
3977	// This means the extract is not influenced by the insert, and we can
3978	// replace the aggregate operand of the extract with the aggregate
3979	// operand of the insert. i.e., replace
3980	// %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3981	// %E = extractvalue { i32, { i32 } } %I, 0
3982	// with
3983	// %E = extractvalue { i32, { i32 } } %A, 0
3984	return ExtractValueInst::Create(Agg: IV->getAggregateOperand(),
3985	Idxs: EV.getIndices());
3986	}
3987	if (exti == exte && insi == inse)
3988	// Both iterators are at the end: Index lists are identical. Replace
3989	// %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3990	// %C = extractvalue { i32, { i32 } } %B, 1, 0
3991	// with "i32 42"
3992	return replaceInstUsesWith(I&: EV, V: IV->getInsertedValueOperand());
3993	if (exti == exte) {
3994	// The extract list is a prefix of the insert list. i.e. replace
3995	// %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3996	// %E = extractvalue { i32, { i32 } } %I, 1
3997	// with
3998	// %X = extractvalue { i32, { i32 } } %A, 1
3999	// %E = insertvalue { i32 } %X, i32 42, 0
4000	// by switching the order of the insert and extract (though the
4001	// insertvalue should be left in, since it may have other uses).
4002	Value *NewEV = Builder.CreateExtractValue(Agg: IV->getAggregateOperand(),
4003	Idxs: EV.getIndices());
4004	return InsertValueInst::Create(Agg: NewEV, Val: IV->getInsertedValueOperand(),
4005	Idxs: ArrayRef(insi, inse));
4006	}
4007	if (insi == inse)
4008	// The insert list is a prefix of the extract list
4009	// We can simply remove the common indices from the extract and make it
4010	// operate on the inserted value instead of the insertvalue result.
4011	// i.e., replace
4012	// %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
4013	// %E = extractvalue { i32, { i32 } } %I, 1, 0
4014	// with
4015	// %E extractvalue { i32 } { i32 42 }, 0
4016	return ExtractValueInst::Create(Agg: IV->getInsertedValueOperand(),
4017	Idxs: ArrayRef(exti, exte));
4018	}
4019
4020	if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
4021	return R;
4022
4023	if (LoadInst *L = dyn_cast<LoadInst>(Val: Agg)) {
4024	// Bail out if the aggregate contains scalable vector type
4025	if (auto *STy = dyn_cast<StructType>(Val: Agg->getType());
4026	STy && STy->containsScalableVectorType())
4027	return nullptr;
4028
4029	// If the (non-volatile) load only has one use, we can rewrite this to a
4030	// load from a GEP. This reduces the size of the load. If a load is used
4031	// only by extractvalue instructions then this either must have been
4032	// optimized before, or it is a struct with padding, in which case we
4033	// don't want to do the transformation as it loses padding knowledge.
4034	if (L->isSimple() && L->hasOneUse()) {
4035	// extractvalue has integer indices, getelementptr has Values. Convert.*
4036	SmallVector<Value*, `4`> Indices;
4037	// Prefix an i32 0 since we need the first element.
4038	Indices.push_back(Elt: Builder.getInt32(C: `0`));
4039	for (unsigned Idx : EV.indices())
4040	Indices.push_back(Elt: Builder.getInt32(C: Idx));
4041
4042	// We need to insert these at the location of the old load, not at that of
4043	// the extractvalue.
4044	Builder.SetInsertPoint(L);
4045	Value *GEP = Builder.CreateInBoundsGEP(Ty: L->getType(),
4046	Ptr: L->getPointerOperand(), IdxList: Indices);
4047	Instruction *NL = Builder.CreateLoad(Ty: EV.getType(), Ptr: GEP);
4048	// Whatever aliasing information we had for the orignal load must also
4049	// hold for the smaller load, so propagate the annotations.
4050	NL->setAAMetadata(L->getAAMetadata());
4051	// Returning the load directly will cause the main loop to insert it in
4052	// the wrong spot, so use replaceInstUsesWith().
4053	return replaceInstUsesWith(I&: EV, V: NL);
4054	}
4055	}
4056
4057	if (auto *PN = dyn_cast<PHINode>(Val: Agg))
4058	if (Instruction *Res = foldOpIntoPhi(I&: EV, PN))
4059	return Res;
4060
4061	// Canonicalize extract (select Cond, TV, FV)
4062	// -> select cond, (extract TV), (extract FV)
4063	if (auto *SI = dyn_cast<SelectInst>(Val: Agg))
4064	if (Instruction R = FoldOpIntoSelect(Op&: EV, SI, /FoldWithMultiUse=/*true))
4065	return R;
4066
4067	// We could simplify extracts from other values. Note that nested extracts may
4068	// already be simplified implicitly by the above: extract (extract (insert) )
4069	// will be translated into extract ( insert ( extract ) ) first and then just
4070	// the value inserted, if appropriate. Similarly for extracts from single-use
4071	// loads: extract (extract (load)) will be translated to extract (load (gep))
4072	// and if again single-use then via load (gep (gep)) to load (gep).
4073	// However, double extracts from e.g. function arguments or return values
4074	// aren't handled yet.
4075	return nullptr;
4076	}
4077
4078	/// Return 'true' if the given typeinfo will match anything.
4079	static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
4080	switch (Personality) {
4081	case EHPersonality::GNU_C:
4082	case EHPersonality::GNU_C_SjLj:
4083	case EHPersonality::Rust:
4084	// The GCC C EH and Rust personality only exists to support cleanups, so
4085	// it's not clear what the semantics of catch clauses are.
4086	return false;
4087	case EHPersonality::Unknown:
4088	return false;
4089	case EHPersonality::GNU_Ada:
4090	// While __gnat_all_others_value will match any Ada exception, it doesn't
4091	// match foreign exceptions (or didn't, before gcc-4.7).
4092	return false;
4093	case EHPersonality::GNU_CXX:
4094	case EHPersonality::GNU_CXX_SjLj:
4095	case EHPersonality::GNU_ObjC:
4096	case EHPersonality::MSVC_X86SEH:
4097	case EHPersonality::MSVC_TableSEH:
4098	case EHPersonality::MSVC_CXX:
4099	case EHPersonality::CoreCLR:
4100	case EHPersonality::Wasm_CXX:
4101	case EHPersonality::XL_CXX:
4102	case EHPersonality::ZOS_CXX:
4103	return TypeInfo->isNullValue();
4104	}
4105	llvm_unreachable("invalid enum");
4106	}
4107
4108	static bool shorter_filter(const Value LHS, const* Value *RHS) {
4109	return
4110	cast<ArrayType>(Val: LHS->getType())->getNumElements()
4111	<
4112	cast<ArrayType>(Val: RHS->getType())->getNumElements();
4113	}
4114
4115	Instruction *InstCombinerImpl::visitLandingPadInst(LandingPadInst &LI) {
4116	// The logic here should be correct for any real-world personality function.
4117	// However if that turns out not to be true, the offending logic can always
4118	// be conditioned on the personality function, like the catch-all logic is.
4119	EHPersonality Personality =
4120	classifyEHPersonality(Pers: LI.getParent()->getParent()->getPersonalityFn());
4121
4122	// Simplify the list of clauses, eg by removing repeated catch clauses
4123	// (these are often created by inlining).
4124	bool MakeNewInstruction = false; // If true, recreate using the following:
4125	SmallVector<Constant , `16`> NewClauses; // - Clauses for the new instruction;*
4126	bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
4127
4128	SmallPtrSet<Value , `16`> AlreadyCaught; // Typeinfos known caught already.*
4129	for (unsigned i = `0`, e = LI.getNumClauses(); i != e; ++i) {
4130	bool isLastClause = i + `1` == e;
4131	if (LI.isCatch(Idx: i)) {
4132	// A catch clause.
4133	Constant *CatchClause = LI.getClause(Idx: i);
4134	Constant *TypeInfo = CatchClause->stripPointerCasts();
4135
4136	// If we already saw this clause, there is no point in having a second
4137	// copy of it.
4138	if (AlreadyCaught.insert(Ptr: TypeInfo).second) {
4139	// This catch clause was not already seen.
4140	NewClauses.push_back(Elt: CatchClause);
4141	} else {
4142	// Repeated catch clause - drop the redundant copy.
4143	MakeNewInstruction = true;
4144	}
4145
4146	// If this is a catch-all then there is no point in keeping any following
4147	// clauses or marking the landingpad as having a cleanup.
4148	if (isCatchAll(Personality, TypeInfo)) {
4149	if (!isLastClause)
4150	MakeNewInstruction = true;
4151	CleanupFlag = false;
4152	break;
4153	}
4154	} else {
4155	// A filter clause. If any of the filter elements were already caught
4156	// then they can be dropped from the filter. It is tempting to try to
4157	// exploit the filter further by saying that any typeinfo that does not
4158	// occur in the filter can't be caught later (and thus can be dropped).
4159	// However this would be wrong, since typeinfos can match without being
4160	// equal (for example if one represents a C++ class, and the other some
4161	// class derived from it).
4162	assert(LI.isFilter(i) && "Unsupported landingpad clause!");
4163	Constant *FilterClause = LI.getClause(Idx: i);
4164	ArrayType *FilterType = cast<ArrayType>(Val: FilterClause->getType());
4165	unsigned NumTypeInfos = FilterType->getNumElements();
4166
4167	// An empty filter catches everything, so there is no point in keeping any
4168	// following clauses or marking the landingpad as having a cleanup. By
4169	// dealing with this case here the following code is made a bit simpler.
4170	if (!NumTypeInfos) {
4171	NewClauses.push_back(Elt: FilterClause);
4172	if (!isLastClause)
4173	MakeNewInstruction = true;
4174	CleanupFlag = false;
4175	break;
4176	}
4177
4178	bool MakeNewFilter = false; // If true, make a new filter.
4179	SmallVector<Constant , `16`> NewFilterElts; // New elements.*
4180	if (isa<ConstantAggregateZero>(Val: FilterClause)) {
4181	// Not an empty filter - it contains at least one null typeinfo.
4182	assert(NumTypeInfos > `0` && "Should have handled empty filter already!");
4183	Constant *TypeInfo =
4184	Constant::getNullValue(Ty: FilterType->getElementType());
4185	// If this typeinfo is a catch-all then the filter can never match.
4186	if (isCatchAll(Personality, TypeInfo)) {
4187	// Throw the filter away.
4188	MakeNewInstruction = true;
4189	continue;
4190	}
4191
4192	// There is no point in having multiple copies of this typeinfo, so
4193	// discard all but the first copy if there is more than one.
4194	NewFilterElts.push_back(Elt: TypeInfo);
4195	if (NumTypeInfos > `1`)
4196	MakeNewFilter = true;
4197	} else {
4198	ConstantArray *Filter = cast<ConstantArray>(Val: FilterClause);
4199	SmallPtrSet<Value , `16`> SeenInFilter; // For uniquing the elements.*
4200	NewFilterElts.reserve(N: NumTypeInfos);
4201
4202	// Remove any filter elements that were already caught or that already
4203	// occurred in the filter. While there, see if any of the elements are
4204	// catch-alls. If so, the filter can be discarded.
4205	bool SawCatchAll = false;
4206	for (unsigned j = `0`; j != NumTypeInfos; ++j) {
4207	Constant *Elt = Filter->getOperand(i_nocapture: j);
4208	Constant *TypeInfo = Elt->stripPointerCasts();
4209	if (isCatchAll(Personality, TypeInfo)) {
4210	// This element is a catch-all. Bail out, noting this fact.
4211	SawCatchAll = true;
4212	break;
4213	}
4214
4215	// Even if we've seen a type in a catch clause, we don't want to
4216	// remove it from the filter. An unexpected type handler may be
4217	// set up for a call site which throws an exception of the same
4218	// type caught. In order for the exception thrown by the unexpected
4219	// handler to propagate correctly, the filter must be correctly
4220	// described for the call site.
4221	//
4222	// Example:
4223	//
4224	// void unexpected() { throw 1;}
4225	// void foo() throw (int) {
4226	// std::set_unexpected(unexpected);
4227	// try {
4228	// throw 2.0;
4229	// } catch (int i) {}
4230	// }
4231
4232	// There is no point in having multiple copies of the same typeinfo in
4233	// a filter, so only add it if we didn't already.
4234	if (SeenInFilter.insert(Ptr: TypeInfo).second)
4235	NewFilterElts.push_back(Elt: cast<Constant>(Val: Elt));
4236	}
4237	// A filter containing a catch-all cannot match anything by definition.
4238	if (SawCatchAll) {
4239	// Throw the filter away.
4240	MakeNewInstruction = true;
4241	continue;
4242	}
4243
4244	// If we dropped something from the filter, make a new one.
4245	if (NewFilterElts.size() < NumTypeInfos)
4246	MakeNewFilter = true;
4247	}
4248	if (MakeNewFilter) {
4249	FilterType = ArrayType::get(ElementType: FilterType->getElementType(),
4250	NumElements: NewFilterElts.size());
4251	FilterClause = ConstantArray::get(T: FilterType, V: NewFilterElts);
4252	MakeNewInstruction = true;
4253	}
4254
4255	NewClauses.push_back(Elt: FilterClause);
4256
4257	// If the new filter is empty then it will catch everything so there is
4258	// no point in keeping any following clauses or marking the landingpad
4259	// as having a cleanup. The case of the original filter being empty was
4260	// already handled above.
4261	if (MakeNewFilter && !NewFilterElts.size()) {
4262	assert(MakeNewInstruction && "New filter but not a new instruction!");
4263	CleanupFlag = false;
4264	break;
4265	}
4266	}
4267	}
4268
4269	// If several filters occur in a row then reorder them so that the shortest
4270	// filters come first (those with the smallest number of elements). This is
4271	// advantageous because shorter filters are more likely to match, speeding up
4272	// unwinding, but mostly because it increases the effectiveness of the other
4273	// filter optimizations below.
4274	for (unsigned i = `0`, e = NewClauses.size(); i + `1` < e; ) {
4275	unsigned j;
4276	// Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
4277	for (j = i; j != e; ++j)
4278	if (!isa<ArrayType>(Val: NewClauses [j]->getType()))
4279	break;
4280
4281	// Check whether the filters are already sorted by length. We need to know
4282	// if sorting them is actually going to do anything so that we only make a
4283	// new landingpad instruction if it does.
4284	for (unsigned k = i; k + `1` < j; ++k)
4285	if (shorter_filter(LHS: NewClauses [k+`1`], RHS: NewClauses [k])) {
4286	// Not sorted, so sort the filters now. Doing an unstable sort would be
4287	// correct too but reordering filters pointlessly might confuse users.
4288	std::stable_sort(first: NewClauses.begin() + i, last: NewClauses.begin() + j,
4289	comp: shorter_filter);
4290	MakeNewInstruction = true;
4291	break;
4292	}
4293
4294	// Look for the next batch of filters.
4295	i = j + `1`;
4296	}
4297
4298	// If typeinfos matched if and only if equal, then the elements of a filter L
4299	// that occurs later than a filter F could be replaced by the intersection of
4300	// the elements of F and L. In reality two typeinfos can match without being
4301	// equal (for example if one represents a C++ class, and the other some class
4302	// derived from it) so it would be wrong to perform this transform in general.
4303	// However the transform is correct and useful if F is a subset of L. In that
4304	// case L can be replaced by F, and thus removed altogether since repeating a
4305	// filter is pointless. So here we look at all pairs of filters F and L where
4306	// L follows F in the list of clauses, and remove L if every element of F is
4307	// an element of L. This can occur when inlining C++ functions with exception
4308	// specifications.
4309	for (unsigned i = `0`; i + `1` < NewClauses.size(); ++i) {
4310	// Examine each filter in turn.
4311	Value *Filter = NewClauses [i];
4312	ArrayType *FTy = dyn_cast<ArrayType>(Val: Filter->getType());
4313	if (!FTy)
4314	// Not a filter - skip it.
4315	continue;
4316	unsigned FElts = FTy->getNumElements();
4317	// Examine each filter following this one. Doing this backwards means that
4318	// we don't have to worry about filters disappearing under us when removed.
4319	for (unsigned j = NewClauses.size() - `1`; j != i; --j) {
4320	Value *LFilter = NewClauses [j];
4321	ArrayType *LTy = dyn_cast<ArrayType>(Val: LFilter->getType());
4322	if (!LTy)
4323	// Not a filter - skip it.
4324	continue;
4325	// If Filter is a subset of LFilter, i.e. every element of Filter is also
4326	// an element of LFilter, then discard LFilter.
4327	SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
4328	// If Filter is empty then it is a subset of LFilter.
4329	if (!FElts) {
4330	// Discard LFilter.
4331	NewClauses.erase(CI: J);
4332	MakeNewInstruction = true;
4333	// Move on to the next filter.
4334	continue;
4335	}
4336	unsigned LElts = LTy->getNumElements();
4337	// If Filter is longer than LFilter then it cannot be a subset of it.
4338	if (FElts > LElts)
4339	// Move on to the next filter.
4340	continue;
4341	// At this point we know that LFilter has at least one element.
4342	if (isa<ConstantAggregateZero>(Val: LFilter)) { // LFilter only contains zeros.
4343	// Filter is a subset of LFilter iff Filter contains only zeros (as we
4344	// already know that Filter is not longer than LFilter).
4345	if (isa<ConstantAggregateZero>(Val: Filter)) {
4346	assert(FElts <= LElts && "Should have handled this case earlier!");
4347	// Discard LFilter.
4348	NewClauses.erase(CI: J);
4349	MakeNewInstruction = true;
4350	}
4351	// Move on to the next filter.
4352	continue;
4353	}
4354	ConstantArray *LArray = cast<ConstantArray>(Val: LFilter);
4355	if (isa<ConstantAggregateZero>(Val: Filter)) { // Filter only contains zeros.
4356	// Since Filter is non-empty and contains only zeros, it is a subset of
4357	// LFilter iff LFilter contains a zero.
4358	assert(FElts > `0` && "Should have eliminated the empty filter earlier!");
4359	for (unsigned l = `0`; l != LElts; ++l)
4360	if (LArray->getOperand(i_nocapture: l)->isNullValue()) {
4361	// LFilter contains a zero - discard it.
4362	NewClauses.erase(CI: J);
4363	MakeNewInstruction = true;
4364	break;
4365	}
4366	// Move on to the next filter.
4367	continue;
4368	}
4369	// At this point we know that both filters are ConstantArrays. Loop over
4370	// operands to see whether every element of Filter is also an element of
4371	// LFilter. Since filters tend to be short this is probably faster than
4372	// using a method that scales nicely.
4373	ConstantArray *FArray = cast<ConstantArray>(Val: Filter);
4374	bool AllFound = true;
4375	for (unsigned f = `0`; f != FElts; ++f) {
4376	Value *FTypeInfo = FArray->getOperand(i_nocapture: f)->stripPointerCasts();
4377	AllFound = false;
4378	for (unsigned l = `0`; l != LElts; ++l) {
4379	Value *LTypeInfo = LArray->getOperand(i_nocapture: l)->stripPointerCasts();
4380	if (LTypeInfo == FTypeInfo) {
4381	AllFound = true;
4382	break;
4383	}
4384	}
4385	if (!AllFound)
4386	break;
4387	}
4388	if (AllFound) {
4389	// Discard LFilter.
4390	NewClauses.erase(CI: J);
4391	MakeNewInstruction = true;
4392	}
4393	// Move on to the next filter.
4394	}
4395	}
4396
4397	// If we changed any of the clauses, replace the old landingpad instruction
4398	// with a new one.
4399	if (MakeNewInstruction) {
4400	LandingPadInst *NLI = LandingPadInst::Create(RetTy: LI.getType(),
4401	NumReservedClauses: NewClauses.size());
4402	for (Constant *C : NewClauses)
4403	NLI->addClause(ClauseVal: C);
4404	// A landing pad with no clauses must have the cleanup flag set. It is
4405	// theoretically possible, though highly unlikely, that we eliminated all
4406	// clauses. If so, force the cleanup flag to true.
4407	if (NewClauses.empty())
4408	CleanupFlag = true;
4409	NLI->setCleanup(CleanupFlag);
4410	return NLI;
4411	}
4412
4413	// Even if none of the clauses changed, we may nonetheless have understood
4414	// that the cleanup flag is pointless. Clear it if so.
4415	if (LI.isCleanup() != CleanupFlag) {
4416	assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
4417	LI.setCleanup(CleanupFlag);
4418	return &LI;
4419	}
4420
4421	return nullptr;
4422	}
4423
4424	Value *
4425	InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) {
4426	// Try to push freeze through instructions that propagate but don't produce
4427	// poison as far as possible. If an operand of freeze follows three
4428	// conditions 1) one-use, 2) does not produce poison, and 3) has all but one
4429	// guaranteed-non-poison operands then push the freeze through to the one
4430	// operand that is not guaranteed non-poison. The actual transform is as
4431	// follows.
4432	// Op1 = ... ; Op1 can be posion
4433	// Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
4434	// ; single guaranteed-non-poison operands
4435	// ... = Freeze(Op0)
4436	// =>
4437	// Op1 = ...
4438	// Op1.fr = Freeze(Op1)
4439	// ... = Inst(Op1.fr, NonPoisonOps...)
4440	auto *OrigOp = OrigFI.getOperand(i_nocapture: `0`);
4441	auto *OrigOpInst = dyn_cast<Instruction>(Val: OrigOp);
4442
4443	// While we could change the other users of OrigOp to use freeze(OrigOp), that
4444	// potentially reduces their optimization potential, so let's only do this iff
4445	// the OrigOp is only used by the freeze.
4446	if (!OrigOpInst \|\| !OrigOpInst->hasOneUse() \|\| isa<PHINode>(Val: OrigOp))
4447	return nullptr;
4448
4449	// We can't push the freeze through an instruction which can itself create
4450	// poison. If the only source of new poison is flags, we can simply
4451	// strip them (since we know the only use is the freeze and nothing can
4452	// benefit from them.)
4453	if (canCreateUndefOrPoison(Op: cast<Operator>(Val: OrigOp),
4454	/ConsiderFlagsAndMetadata/ false))
4455	return nullptr;
4456
4457	// If operand is guaranteed not to be poison, there is no need to add freeze
4458	// to the operand. So we first find the operand that is not guaranteed to be
4459	// poison.
4460	Use MaybePoisonOperand = nullptr*;
4461	for (Use &U : OrigOpInst->operands()) {
4462	if (isa<MetadataAsValue>(Val: U.get()) \|\|
4463	isGuaranteedNotToBeUndefOrPoison(V: U.get()))
4464	continue;
4465	if (!MaybePoisonOperand)
4466	MaybePoisonOperand = &U;
4467	else
4468	return nullptr;
4469	}
4470
4471	OrigOpInst->dropPoisonGeneratingAnnotations();
4472
4473	// If all operands are guaranteed to be non-poison, we can drop freeze.
4474	if (!MaybePoisonOperand)
4475	return OrigOp;
4476
4477	Builder.SetInsertPoint(OrigOpInst);
4478	auto *FrozenMaybePoisonOperand = Builder.CreateFreeze(
4479	V: MaybePoisonOperand->get(), Name: MaybePoisonOperand->get()->getName() + ".fr");
4480
4481	replaceUse(U&: *MaybePoisonOperand, NewValue: FrozenMaybePoisonOperand);
4482	return OrigOp;
4483	}
4484
4485	Instruction *InstCombinerImpl::foldFreezeIntoRecurrence(FreezeInst &FI,
4486	PHINode *PN) {
4487	// Detect whether this is a recurrence with a start value and some number of
4488	// backedge values. We'll check whether we can push the freeze through the
4489	// backedge values (possibly dropping poison flags along the way) until we
4490	// reach the phi again. In that case, we can move the freeze to the start
4491	// value.
4492	Use StartU = nullptr*;
4493	SmallVector<Value *> Worklist;
4494	for (Use &U : PN->incoming_values()) {
4495	if (DT.dominates(A: PN->getParent(), B: PN->getIncomingBlock(U))) {
4496	// Add backedge value to worklist.
4497	Worklist.push_back(Elt: U.get());
4498	continue;
4499	}
4500
4501	// Don't bother handling multiple start values.
4502	if (StartU)
4503	return nullptr;
4504	StartU = &U;
4505	}
4506
4507	if (!StartU \|\| Worklist.empty())
4508	return nullptr; // Not a recurrence.
4509
4510	Value *StartV = StartU->get();
4511	BasicBlock StartBB = PN->getIncomingBlock(U: StartU);
4512	bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(V: StartV);
4513	// We can't insert freeze if the start value is the result of the
4514	// terminator (e.g. an invoke).
4515	if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
4516	return nullptr;
4517
4518	SmallPtrSet<Value *, `32`> Visited;
4519	SmallVector<Instruction *> DropFlags;
4520	while (!Worklist.empty()) {
4521	Value *V = Worklist.pop_back_val();
4522	if (!Visited.insert(Ptr: V).second)
4523	continue;
4524
4525	if (Visited.size() > `32`)
4526	return nullptr; // Limit the total number of values we inspect.
4527
4528	// Assume that PN is non-poison, because it will be after the transform.
4529	if (V == PN \|\| isGuaranteedNotToBeUndefOrPoison(V))
4530	continue;
4531
4532	Instruction *I = dyn_cast<Instruction>(Val: V);
4533	if (!I \|\| canCreateUndefOrPoison(Op: cast<Operator>(Val: I),
4534	/ConsiderFlagsAndMetadata/ false))
4535	return nullptr;
4536
4537	DropFlags.push_back(Elt: I);
4538	append_range(C&: Worklist, R: I->operands());
4539	}
4540
4541	for (Instruction *I : DropFlags)
4542	I->dropPoisonGeneratingAnnotations();
4543
4544	if (StartNeedsFreeze) {
4545	Builder.SetInsertPoint(StartBB->getTerminator());
4546	Value *FrozenStartV = Builder.CreateFreeze(V: StartV,
4547	Name: StartV->getName() + ".fr");
4548	replaceUse(U&: *StartU, NewValue: FrozenStartV);
4549	}
4550	return replaceInstUsesWith(I&: FI, V: PN);
4551	}
4552
4553	bool InstCombinerImpl::freezeOtherUses(FreezeInst &FI) {
4554	Value *Op = FI.getOperand(i_nocapture: `0`);
4555
4556	if (isa<Constant>(Val: Op) \|\| Op->hasOneUse())
4557	return false;
4558
4559	// Move the freeze directly after the definition of its operand, so that
4560	// it dominates the maximum number of uses. Note that it may not dominate
4561	// all* uses if the operand is an invoke/callbr and the use is in a phi on*
4562	// the normal/default destination. This is why the domination check in the
4563	// replacement below is still necessary.
4564	BasicBlock::iterator MoveBefore;
4565	if (isa<Argument>(Val: Op)) {
4566	MoveBefore =
4567	FI.getFunction()->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
4568	} else {
4569	auto MoveBeforeOpt = cast<Instruction>(Val: Op)->getInsertionPointAfterDef();
4570	if (!MoveBeforeOpt)
4571	return false;
4572	MoveBefore = *MoveBeforeOpt;
4573	}
4574
4575	// Don't move to the position of a debug intrinsic.
4576	if (isa<DbgInfoIntrinsic>(Val: MoveBefore))
4577	MoveBefore = MoveBefore ->getNextNonDebugInstruction()->getIterator();
4578	// Re-point iterator to come after any debug-info records, if we're
4579	// running in "RemoveDIs" mode
4580	MoveBefore.setHeadBit(false);
4581
4582	bool Changed = false;
4583	if (&FI != &*MoveBefore) {
4584	FI.moveBefore(BB&: *MoveBefore ->getParent(), I: MoveBefore);
4585	Changed = true;
4586	}
4587
4588	Op->replaceUsesWithIf(New: &FI, ShouldReplace: [&](Use &U) -> bool {
4589	bool Dominates = DT.dominates(Def: &FI, U);
4590	Changed \|= Dominates;
4591	return Dominates;
4592	});
4593
4594	return Changed;
4595	}
4596
4597	// Check if any direct or bitcast user of this value is a shuffle instruction.
4598	static bool isUsedWithinShuffleVector(Value *V) {
4599	for (auto *U : V->users()) {
4600	if (isa<ShuffleVectorInst>(Val: U))
4601	return true;
4602	else if (match(V: U, P: m_BitCast(Op: m_Specific(V))) && isUsedWithinShuffleVector(V: U))
4603	return true;
4604	}
4605	return false;
4606	}
4607
4608	Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
4609	Value *Op0 = I.getOperand(i_nocapture: `0`);
4610
4611	if (Value *V = simplifyFreezeInst(Op: Op0, Q: SQ.getWithInstruction(I: &I)))
4612	return replaceInstUsesWith(I, V);
4613
4614	// freeze (phi const, x) --> phi const, (freeze x)
4615	if (auto *PN = dyn_cast<PHINode>(Val: Op0)) {
4616	if (Instruction *NV = foldOpIntoPhi(I, PN))
4617	return NV;
4618	if (Instruction *NV = foldFreezeIntoRecurrence(FI&: I, PN))
4619	return NV;
4620	}
4621
4622	if (Value *NI = pushFreezeToPreventPoisonFromPropagating(OrigFI&: I))
4623	return replaceInstUsesWith(I, V: NI);
4624
4625	// If I is freeze(undef), check its uses and fold it to a fixed constant.
4626	// - or: pick -1
4627	// - select's condition: if the true value is constant, choose it by making
4628	// the condition true.
4629	// - default: pick 0
4630	//
4631	// Note that this transform is intentionally done here rather than
4632	// via an analysis in InstSimplify or at individual user sites. That is
4633	// because we must produce the same value for all uses of the freeze -
4634	// it's the reason "freeze" exists!
4635	//
4636	// TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
4637	// duplicating logic for binops at least.
4638	auto getUndefReplacement = [&I](Type *Ty) {
4639	Constant BestValue = nullptr*;
4640	Constant *NullValue = Constant::getNullValue(Ty);
4641	for (const auto *U : I.users()) {
4642	Constant *C = NullValue;
4643	if (match(V: U, P: m_Or(L: m_Value(), R: m_Value())))
4644	C = ConstantInt::getAllOnesValue(Ty);
4645	else if (match(V: U, P: m_Select(C: m_Specific(V: &I), L: m_Constant(), R: m_Value())))
4646	C = ConstantInt::getTrue(Ty);
4647
4648	if (!BestValue)
4649	BestValue = C;
4650	else if (BestValue != C)
4651	BestValue = NullValue;
4652	}
4653	assert(BestValue && "Must have at least one use");
4654	return BestValue;
4655	};
4656
4657	if (match(V: Op0, P: m_Undef())) {
4658	// Don't fold freeze(undef/poison) if it's used as a vector operand in
4659	// a shuffle. This may improve codegen for shuffles that allow
4660	// unspecified inputs.
4661	if (isUsedWithinShuffleVector(V: &I))
4662	return nullptr;
4663	return replaceInstUsesWith(I, V: getUndefReplacement (I.getType()));
4664	}
4665
4666	Constant *C;
4667	if (match(V: Op0, P: m_Constant(C)) && C->containsUndefOrPoisonElement()) {
4668	Constant *ReplaceC = getUndefReplacement (I.getType()->getScalarType());
4669	return replaceInstUsesWith(I, V: Constant::replaceUndefsWith(C, Replacement: ReplaceC));
4670	}
4671
4672	// Replace uses of Op with freeze(Op).
4673	if (freezeOtherUses(FI&: I))
4674	return &I;
4675
4676	return nullptr;
4677	}
4678
4679	/// Check for case where the call writes to an otherwise dead alloca. This
4680	/// shows up for unused out-params in idiomatic C/C++ code. Note that this
4681	/// helper only* analyzes the write; doesn't check any other legality aspect.*
4682	static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI) {
4683	auto *CB = dyn_cast<CallBase>(Val: I);
4684	if (!CB)
4685	// TODO: handle e.g. store to alloca here - only worth doing if we extend
4686	// to allow reload along used path as described below. Otherwise, this
4687	// is simply a store to a dead allocation which will be removed.
4688	return false;
4689	std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CI: CB, TLI);
4690	if (!Dest)
4691	return false;
4692	auto *AI = dyn_cast<AllocaInst>(Val: getUnderlyingObject(V: Dest ->Ptr));
4693	if (!AI)
4694	// TODO: allow malloc?
4695	return false;
4696	// TODO: allow memory access dominated by move point? Note that since AI
4697	// could have a reference to itself captured by the call, we would need to
4698	// account for cycles in doing so.
4699	SmallVector<const User *> AllocaUsers;
4700	SmallPtrSet<const User *, `4`> Visited;
4701	auto pushUsers = [&](const Instruction &I) {
4702	for (const User *U : I.users()) {
4703	if (Visited.insert(Ptr: U).second)
4704	AllocaUsers.push_back(Elt: U);
4705	}
4706	};
4707	pushUsers (*AI);
4708	while (!AllocaUsers.empty()) {
4709	auto *UserI = cast<Instruction>(Val: AllocaUsers.pop_back_val());
4710	if (isa<BitCastInst>(Val: UserI) \|\| isa<GetElementPtrInst>(Val: UserI) \|\|
4711	isa<AddrSpaceCastInst>(Val: UserI)) {
4712	pushUsers (*UserI);
4713	continue;
4714	}
4715	if (UserI == CB)
4716	continue;
4717	// TODO: support lifetime.start/end here
4718	return false;
4719	}
4720	return true;
4721	}
4722
4723	/// Try to move the specified instruction from its current block into the
4724	/// beginning of DestBlock, which can only happen if it's safe to move the
4725	/// instruction past all of the instructions between it and the end of its
4726	/// block.
4727	bool InstCombinerImpl::tryToSinkInstruction(Instruction *I,
4728	BasicBlock *DestBlock) {
4729	BasicBlock *SrcBlock = I->getParent();
4730
4731	// Cannot move control-flow-involving, volatile loads, vaarg, etc.
4732	if (isa<PHINode>(Val: I) \|\| I->isEHPad() \|\| I->mayThrow() \|\| !I->willReturn() \|\|
4733	I->isTerminator())
4734	return false;
4735
4736	// Do not sink static or dynamic alloca instructions. Static allocas must
4737	// remain in the entry block, and dynamic allocas must not be sunk in between
4738	// a stacksave / stackrestore pair, which would incorrectly shorten its
4739	// lifetime.
4740	if (isa<AllocaInst>(Val: I))
4741	return false;
4742
4743	// Do not sink into catchswitch blocks.
4744	if (isa<CatchSwitchInst>(Val: DestBlock->getTerminator()))
4745	return false;
4746
4747	// Do not sink convergent call instructions.
4748	if (auto *CI = dyn_cast<CallInst>(Val: I)) {
4749	if (CI->isConvergent())
4750	return false;
4751	}
4752
4753	// Unless we can prove that the memory write isn't visibile except on the
4754	// path we're sinking to, we must bail.
4755	if (I->mayWriteToMemory()) {
4756	if (!SoleWriteToDeadLocal(I, TLI))
4757	return false;
4758	}
4759
4760	// We can only sink load instructions if there is nothing between the load and
4761	// the end of block that could change the value.
4762	if (I->mayReadFromMemory()) {
4763	// We don't want to do any sophisticated alias analysis, so we only check
4764	// the instructions after I in I's parent block if we try to sink to its
4765	// successor block.
4766	if (DestBlock->getUniquePredecessor() != I->getParent())
4767	return false;
4768	for (BasicBlock::iterator Scan = std::next(x: I->getIterator()),
4769	E = I->getParent()->end();
4770	Scan != E; ++Scan)
4771	if (Scan ->mayWriteToMemory())
4772	return false;
4773	}
4774
4775	I->dropDroppableUses(ShouldDrop: [&](const Use *U) {
4776	auto *I = dyn_cast<Instruction>(Val: U->getUser());
4777	if (I && I->getParent() != DestBlock) {
4778	Worklist.add(I);
4779	return true;
4780	}
4781	return false;
4782	});
4783	/// FIXME: We could remove droppable uses that are not dominated by
4784	/// the new position.
4785
4786	BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
4787	I->moveBefore(BB&: *DestBlock, I: InsertPos);
4788	++NumSunkInst;
4789
4790	// Also sink all related debug uses from the source basic block. Otherwise we
4791	// get debug use before the def. Attempt to salvage debug uses first, to
4792	// maximise the range variables have location for. If we cannot salvage, then
4793	// mark the location undef: we know it was supposed to receive a new location
4794	// here, but that computation has been sunk.
4795	SmallVector<DbgVariableIntrinsic *, `2`> DbgUsers;
4796	SmallVector<DbgVariableRecord *, `2`> DbgVariableRecords;
4797	findDbgUsers(DbgInsts&: DbgUsers, V: I, DbgVariableRecords: &DbgVariableRecords);
4798	if (!DbgUsers.empty())
4799	tryToSinkInstructionDbgValues(I, InsertPos, SrcBlock, DestBlock, DbgUsers);
4800	if (!DbgVariableRecords.empty())
4801	tryToSinkInstructionDbgVariableRecords(I, InsertPos, SrcBlock, DestBlock,
4802	DPUsers&: DbgVariableRecords);
4803
4804	// PS: there are numerous flaws with this behaviour, not least that right now
4805	// assignments can be re-ordered past other assignments to the same variable
4806	// if they use different Values. Creating more undef assignements can never be
4807	// undone. And salvaging all users outside of this block can un-necessarily
4808	// alter the lifetime of the live-value that the variable refers to.
4809	// Some of these things can be resolved by tolerating debug use-before-defs in
4810	// LLVM-IR, however it depends on the instruction-referencing CodeGen backend
4811	// being used for more architectures.
4812
4813	return true;
4814	}
4815
4816	void InstCombinerImpl::tryToSinkInstructionDbgValues(
4817	Instruction I, BasicBlock::iterator InsertPos, BasicBlock SrcBlock,
4818	BasicBlock DestBlock, SmallVectorImpl<DbgVariableIntrinsic > &DbgUsers) {
4819	// For all debug values in the destination block, the sunk instruction
4820	// will still be available, so they do not need to be dropped.
4821	SmallVector<DbgVariableIntrinsic *, `2`> DbgUsersToSalvage;
4822	for (auto &DbgUser : DbgUsers)
4823	if (DbgUser->getParent() != DestBlock)
4824	DbgUsersToSalvage.push_back(Elt: DbgUser);
4825
4826	// Process the sinking DbgUsersToSalvage in reverse order, as we only want
4827	// to clone the last appearing debug intrinsic for each given variable.
4828	SmallVector<DbgVariableIntrinsic *, `2`> DbgUsersToSink;
4829	for (DbgVariableIntrinsic *DVI : DbgUsersToSalvage)
4830	if (DVI->getParent() == SrcBlock)
4831	DbgUsersToSink.push_back(Elt: DVI);
4832	llvm::sort(C&: DbgUsersToSink,
4833	Comp: [](auto A, auto* B) { return* B->comesBefore(A); });
4834
4835	SmallVector<DbgVariableIntrinsic *, `2`> DIIClones;
4836	SmallSet<DebugVariable, `4`> SunkVariables;
4837	for (auto *User : DbgUsersToSink) {
4838	// A dbg.declare instruction should not be cloned, since there can only be
4839	// one per variable fragment. It should be left in the original place
4840	// because the sunk instruction is not an alloca (otherwise we could not be
4841	// here).
4842	if (isa<DbgDeclareInst>(Val: User))
4843	continue;
4844
4845	DebugVariable DbgUserVariable =
4846	DebugVariable (User->getVariable(), User->getExpression(),
4847	User->getDebugLoc()->getInlinedAt());
4848
4849	if (!SunkVariables.insert(V: DbgUserVariable).second)
4850	continue;
4851
4852	// Leave dbg.assign intrinsics in their original positions and there should
4853	// be no need to insert a clone.
4854	if (isa<DbgAssignIntrinsic>(Val: User))
4855	continue;
4856
4857	DIIClones.emplace_back(Args: cast<DbgVariableIntrinsic>(Val: User->clone()));
4858	if (isa<DbgDeclareInst>(Val: User) && isa<CastInst>(Val: I))
4859	DIIClones.back()->replaceVariableLocationOp(OldValue: I, NewValue: I->getOperand(i: `0`));
4860	LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << `'\n'`);
4861	}
4862
4863	// Perform salvaging without the clones, then sink the clones.
4864	if (!DIIClones.empty()) {
4865	salvageDebugInfoForDbgValues(I&: *I, Insns: DbgUsersToSalvage, DPInsns: {});
4866	// The clones are in reverse order of original appearance, reverse again to
4867	// maintain the original order.
4868	for (auto &DIIClone : llvm::reverse(C&: DIIClones)) {
4869	DIIClone->insertBefore(InsertPos: &*InsertPos);
4870	LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << `'\n'`);
4871	}
4872	}
4873	}
4874
4875	void InstCombinerImpl::tryToSinkInstructionDbgVariableRecords(
4876	Instruction I, BasicBlock::iterator InsertPos, BasicBlock SrcBlock,
4877	BasicBlock *DestBlock,
4878	SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords) {
4879	// Implementation of tryToSinkInstructionDbgValues, but for the
4880	// DbgVariableRecord of variable assignments rather than dbg.values.
4881
4882	// Fetch all DbgVariableRecords not already in the destination.
4883	SmallVector<DbgVariableRecord *, `2`> DbgVariableRecordsToSalvage;
4884	for (auto &DVR : DbgVariableRecords)
4885	if (DVR->getParent() != DestBlock)
4886	DbgVariableRecordsToSalvage.push_back(Elt: DVR);
4887
4888	// Fetch a second collection, of DbgVariableRecords in the source block that
4889	// we're going to sink.
4890	SmallVector<DbgVariableRecord *> DbgVariableRecordsToSink;
4891	for (DbgVariableRecord *DVR : DbgVariableRecordsToSalvage)
4892	if (DVR->getParent() == SrcBlock)
4893	DbgVariableRecordsToSink.push_back(Elt: DVR);
4894
4895	// Sort DbgVariableRecords according to their position in the block. This is a
4896	// partial order: DbgVariableRecords attached to different instructions will
4897	// be ordered by the instruction order, but DbgVariableRecords attached to the
4898	// same instruction won't have an order.
4899	auto Order = [](DbgVariableRecord A, DbgVariableRecord B) -> bool {
4900	return B->getInstruction()->comesBefore(Other: A->getInstruction());
4901	};
4902	llvm::stable_sort(Range&: DbgVariableRecordsToSink, C: Order);
4903
4904	// If there are two assignments to the same variable attached to the same
4905	// instruction, the ordering between the two assignments is important. Scan
4906	// for this (rare) case and establish which is the last assignment.
4907	using InstVarPair = std::pair<const Instruction *, DebugVariable>;
4908	SmallDenseMap<InstVarPair, DbgVariableRecord *> FilterOutMap;
4909	if (DbgVariableRecordsToSink.size() > `1`) {
4910	SmallDenseMap<InstVarPair, unsigned> CountMap;
4911	// Count how many assignments to each variable there is per instruction.
4912	for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
4913	DebugVariable DbgUserVariable =
4914	DebugVariable (DVR->getVariable(), DVR->getExpression(),
4915	DVR->getDebugLoc()->getInlinedAt());
4916	CountMap [std::make_pair(x: DVR->getInstruction(), y&: DbgUserVariable)] += `1`;
4917	}
4918
4919	// If there are any instructions with two assignments, add them to the
4920	// FilterOutMap to record that they need extra filtering.
4921	SmallPtrSet<const Instruction *, `4`> DupSet;
4922	for (auto It : CountMap) {
4923	if (It.second > `1`) {
4924	FilterOutMap [It.first] = nullptr;
4925	DupSet.insert(Ptr: It.first.first);
4926	}
4927	}
4928
4929	// For all instruction/variable pairs needing extra filtering, find the
4930	// latest assignment.
4931	for (const Instruction *Inst : DupSet) {
4932	for (DbgVariableRecord &DVR :
4933	llvm::reverse(C: filterDbgVars(R: Inst->getDbgRecordRange()))) {
4934	DebugVariable DbgUserVariable =
4935	DebugVariable (DVR.getVariable(), DVR.getExpression(),
4936	DVR.getDebugLoc()->getInlinedAt());
4937	auto FilterIt =
4938	FilterOutMap.find(Val: std::make_pair(x&: Inst, y&: DbgUserVariable));
4939	if (FilterIt == FilterOutMap.end())
4940	continue;
4941	if (FilterIt ->second != nullptr)
4942	continue;
4943	FilterIt ->second = &DVR;
4944	}
4945	}
4946	}
4947
4948	// Perform cloning of the DbgVariableRecords that we plan on sinking, filter
4949	// out any duplicate assignments identified above.
4950	SmallVector<DbgVariableRecord *, `2`> DVRClones;
4951	SmallSet<DebugVariable, `4`> SunkVariables;
4952	for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
4953	if (DVR->Type == DbgVariableRecord::LocationType::Declare)
4954	continue;
4955
4956	DebugVariable DbgUserVariable =
4957	DebugVariable (DVR->getVariable(), DVR->getExpression(),
4958	DVR->getDebugLoc()->getInlinedAt());
4959
4960	// For any variable where there were multiple assignments in the same place,
4961	// ignore all but the last assignment.
4962	if (!FilterOutMap.empty()) {
4963	InstVarPair IVP = std::make_pair(x: DVR->getInstruction(), y&: DbgUserVariable);
4964	auto It = FilterOutMap.find(Val: IVP);
4965
4966	// Filter out.
4967	if (It != FilterOutMap.end() && It ->second != DVR)
4968	continue;
4969	}
4970
4971	if (!SunkVariables.insert(V: DbgUserVariable).second)
4972	continue;
4973
4974	if (DVR->isDbgAssign())
4975	continue;
4976
4977	DVRClones.emplace_back(Args: DVR->clone());
4978	LLVM_DEBUG(dbgs() << "CLONE: " << *DVRClones.back() << `'\n'`);
4979	}
4980
4981	// Perform salvaging without the clones, then sink the clones.
4982	if (DVRClones.empty())
4983	return;
4984
4985	salvageDebugInfoForDbgValues(I&: *I, Insns: {}, DPInsns: DbgVariableRecordsToSalvage);
4986
4987	// The clones are in reverse order of original appearance. Assert that the
4988	// head bit is set on the iterator as we _should_ have received it via
4989	// getFirstInsertionPt. Inserting like this will reverse the clone order as
4990	// we'll repeatedly insert at the head, such as:
4991	// DVR-3 (third insertion goes here)
4992	// DVR-2 (second insertion goes here)
4993	// DVR-1 (first insertion goes here)
4994	// Any-Prior-DVRs
4995	// InsertPtInst
4996	assert(InsertPos.getHeadBit());
4997	for (DbgVariableRecord *DVRClone : DVRClones) {
4998	InsertPos ->getParent()->insertDbgRecordBefore(DR: DVRClone, Here: InsertPos);
4999	LLVM_DEBUG(dbgs() << "SINK: " << *DVRClone << `'\n'`);
5000	}
5001	}
5002
5003	bool InstCombinerImpl::run() {
5004	while (!Worklist.isEmpty()) {
5005	// Walk deferred instructions in reverse order, and push them to the
5006	// worklist, which means they'll end up popped from the worklist in-order.
5007	while (Instruction *I = Worklist.popDeferred()) {
5008	// Check to see if we can DCE the instruction. We do this already here to
5009	// reduce the number of uses and thus allow other folds to trigger.
5010	// Note that eraseInstFromFunction() may push additional instructions on
5011	// the deferred worklist, so this will DCE whole instruction chains.
5012	if (isInstructionTriviallyDead(I, TLI: &TLI)) {
5013	eraseInstFromFunction(I&: *I);
5014	++NumDeadInst;
5015	continue;
5016	}
5017
5018	Worklist.push(I);
5019	}
5020
5021	Instruction *I = Worklist.removeOne();
5022	if (I == nullptr) continue; // skip null values.
5023
5024	// Check to see if we can DCE the instruction.
5025	if (isInstructionTriviallyDead(I, TLI: &TLI)) {
5026	eraseInstFromFunction(I&: *I);
5027	++NumDeadInst;
5028	continue;
5029	}
5030
5031	if (!DebugCounter::shouldExecute(CounterName: VisitCounter))
5032	continue;
5033
5034	// See if we can trivially sink this instruction to its user if we can
5035	// prove that the successor is not executed more frequently than our block.
5036	// Return the UserBlock if successful.
5037	auto getOptionalSinkBlockForInst =
5038	[this](Instruction I) -> std::optional<BasicBlock > {
5039	if (!EnableCodeSinking)
5040	return std::nullopt;
5041
5042	BasicBlock *BB = I->getParent();
5043	BasicBlock UserParent = nullptr*;
5044	unsigned NumUsers = `0`;
5045
5046	for (Use &U : I->uses()) {
5047	User *User = U.getUser();
5048	if (User->isDroppable())
5049	continue;
5050	if (NumUsers > MaxSinkNumUsers)
5051	return std::nullopt;
5052
5053	Instruction *UserInst = cast<Instruction>(Val: User);
5054	// Special handling for Phi nodes - get the block the use occurs in.
5055	BasicBlock *UserBB = UserInst->getParent();
5056	if (PHINode *PN = dyn_cast<PHINode>(Val: UserInst))
5057	UserBB = PN->getIncomingBlock(U);
5058	// Bail out if we have uses in different blocks. We don't do any
5059	// sophisticated analysis (i.e finding NearestCommonDominator of these
5060	// use blocks).
5061	if (UserParent && UserParent != UserBB)
5062	return std::nullopt;
5063	UserParent = UserBB;
5064
5065	// Make sure these checks are done only once, naturally we do the checks
5066	// the first time we get the userparent, this will save compile time.
5067	if (NumUsers == `0`) {
5068	// Try sinking to another block. If that block is unreachable, then do
5069	// not bother. SimplifyCFG should handle it.
5070	if (UserParent == BB \|\| !DT.isReachableFromEntry(A: UserParent))
5071	return std::nullopt;
5072
5073	auto *Term = UserParent->getTerminator();
5074	// See if the user is one of our successors that has only one
5075	// predecessor, so that we don't have to split the critical edge.
5076	// Another option where we can sink is a block that ends with a
5077	// terminator that does not pass control to other block (such as
5078	// return or unreachable or resume). In this case:
5079	// - I dominates the User (by SSA form);
5080	// - the User will be executed at most once.
5081	// So sinking I down to User is always profitable or neutral.
5082	if (UserParent->getUniquePredecessor() != BB && !succ_empty(I: Term))
5083	return std::nullopt;
5084
5085	assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
5086	}
5087
5088	NumUsers++;
5089	}
5090
5091	// No user or only has droppable users.
5092	if (!UserParent)
5093	return std::nullopt;
5094
5095	return UserParent;
5096	};
5097
5098	auto OptBB = getOptionalSinkBlockForInst (I);
5099	if (OptBB) {
5100	auto UserParent = OptBB;
5101	// Okay, the CFG is simple enough, try to sink this instruction.
5102	if (tryToSinkInstruction(I, DestBlock: UserParent)) {
5103	LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << `'\n'`);
5104	MadeIRChange = true;
5105	// We'll add uses of the sunk instruction below, but since
5106	// sinking can expose opportunities for it's operands* add*
5107	// them to the worklist
5108	for (Use &U : I->operands())
5109	if (Instruction *OpI = dyn_cast<Instruction>(Val: U.get()))
5110	Worklist.push(I: OpI);
5111	}
5112	}
5113
5114	// Now that we have an instruction, try combining it to simplify it.
5115	Builder.SetInsertPoint(I);
5116	Builder.CollectMetadataToCopy(
5117	Src: I, MetadataKinds: {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
5118
5119	#ifndef NDEBUG
5120	std::string OrigI;
5121	#endif
5122	LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS););
5123	LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << `'\n'`);
5124
5125	if (Instruction Result = visit(I&: I)) {
5126	++NumCombined;
5127	// Should we replace the old instruction with a new one?
5128	if (Result != I) {
5129	LLVM_DEBUG(dbgs() << "IC: Old = " << *I << `'\n'`
5130	<< " New = " << *Result << `'\n'`);
5131
5132	Result->copyMetadata(SrcInst: *I,
5133	WL: {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
5134	// Everything uses the new instruction now.
5135	I->replaceAllUsesWith(V: Result);
5136
5137	// Move the name to the new instruction first.
5138	Result->takeName(V: I);
5139
5140	// Insert the new instruction into the basic block...
5141	BasicBlock *InstParent = I->getParent();
5142	BasicBlock::iterator InsertPos = I->getIterator();
5143
5144	// Are we replace a PHI with something that isn't a PHI, or vice versa?
5145	if (isa<PHINode>(Val: Result) != isa<PHINode>(Val: I)) {
5146	// We need to fix up the insertion point.
5147	if (isa<PHINode>(Val: I)) // PHI -> Non-PHI
5148	InsertPos = InstParent->getFirstInsertionPt();
5149	else // Non-PHI -> PHI
5150	InsertPos = InstParent->getFirstNonPHIIt();
5151	}
5152
5153	Result->insertInto(ParentBB: InstParent, It: InsertPos);
5154
5155	// Push the new instruction and any users onto the worklist.
5156	Worklist.pushUsersToWorkList(I&: *Result);
5157	Worklist.push(I: Result);
5158
5159	eraseInstFromFunction(I&: *I);
5160	} else {
5161	LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << `'\n'`
5162	<< " New = " << *I << `'\n'`);
5163
5164	// If the instruction was modified, it's possible that it is now dead.
5165	// if so, remove it.
5166	if (isInstructionTriviallyDead(I, TLI: &TLI)) {
5167	eraseInstFromFunction(I&: *I);
5168	} else {
5169	Worklist.pushUsersToWorkList(I&: *I);
5170	Worklist.push(I);
5171	}
5172	}
5173	MadeIRChange = true;
5174	}
5175	}
5176
5177	Worklist.zap();
5178	return MadeIRChange;
5179	}
5180
5181	// Track the scopes used by !alias.scope and !noalias. In a function, a
5182	// @llvm.experimental.noalias.scope.decl is only useful if that scope is used
5183	// by both sets. If not, the declaration of the scope can be safely omitted.
5184	// The MDNode of the scope can be omitted as well for the instructions that are
5185	// part of this function. We do not do that at this point, as this might become
5186	// too time consuming to do.
5187	class AliasScopeTracker {
5188	SmallPtrSet<const MDNode *, `8`> UsedAliasScopesAndLists;
5189	SmallPtrSet<const MDNode *, `8`> UsedNoAliasScopesAndLists;
5190
5191	public:
5192	void analyse(Instruction *I) {
5193	// This seems to be faster than checking 'mayReadOrWriteMemory()'.
5194	if (!I->hasMetadataOtherThanDebugLoc())
5195	return;
5196
5197	auto Track = [](Metadata ScopeList, auto* &Container) {
5198	const auto *MDScopeList = dyn_cast_or_null<MDNode>(Val: ScopeList);
5199	if (!MDScopeList \|\| !Container.insert(MDScopeList).second)
5200	return;
5201	for (const auto &MDOperand : MDScopeList->operands())
5202	if (auto *MDScope = dyn_cast<MDNode>(Val: MDOperand))
5203	Container.insert(MDScope);
5204	};
5205
5206	Track(I->getMetadata(KindID: LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
5207	Track(I->getMetadata(KindID: LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
5208	}
5209
5210	bool isNoAliasScopeDeclDead(Instruction *Inst) {
5211	NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: Inst);
5212	if (!Decl)
5213	return false;
5214
5215	assert(Decl->use_empty() &&
5216	"llvm.experimental.noalias.scope.decl in use ?");
5217	const MDNode *MDSL = Decl->getScopeList();
5218	assert(MDSL->getNumOperands() == `1` &&
5219	"llvm.experimental.noalias.scope should refer to a single scope");
5220	auto &MDOperand = MDSL->getOperand(I: `0`);
5221	if (auto *MD = dyn_cast<MDNode>(Val: MDOperand))
5222	return !UsedAliasScopesAndLists.contains(Ptr: MD) \|\|
5223	!UsedNoAliasScopesAndLists.contains(Ptr: MD);
5224
5225	// Not an MDNode ? throw away.
5226	return true;
5227	}
5228	};
5229
5230	/// Populate the IC worklist from a function, by walking it in reverse
5231	/// post-order and adding all reachable code to the worklist.
5232	///
5233	/// This has a couple of tricks to make the code faster and more powerful. In
5234	/// particular, we constant fold and DCE instructions as we go, to avoid adding
5235	/// them to the worklist (this significantly speeds up instcombine on code where
5236	/// many instructions are dead or constant). Additionally, if we find a branch
5237	/// whose condition is a known constant, we only visit the reachable successors.
5238	bool InstCombinerImpl::prepareWorklist(
5239	Function &F, ReversePostOrderTraversal<BasicBlock *> &RPOT) {
5240	bool MadeIRChange = false;
5241	SmallPtrSet<BasicBlock *, `32`> LiveBlocks;
5242	SmallVector<Instruction *, `128`> InstrsForInstructionWorklist;
5243	DenseMap<Constant , Constant > FoldedConstants;
5244	AliasScopeTracker SeenAliasScopes;
5245
5246	auto HandleOnlyLiveSuccessor = [&](BasicBlock BB, BasicBlock LiveSucc) {
5247	for (BasicBlock *Succ : successors(BB))
5248	if (Succ != LiveSucc && DeadEdges.insert(V: {BB, Succ}).second)
5249	for (PHINode &PN : Succ->phis())
5250	for (Use &U : PN.incoming_values())
5251	if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(Val: U)) {
5252	U.set(PoisonValue::get(T: PN.getType()));
5253	MadeIRChange = true;
5254	}
5255	};
5256
5257	for (BasicBlock *BB : RPOT) {
5258	if (!BB->isEntryBlock() && all_of(Range: predecessors(BB), P: [&](BasicBlock *Pred) {
5259	return DeadEdges.contains(V: {Pred, BB}) \|\| DT.dominates(A: BB, B: Pred);
5260	})) {
5261	HandleOnlyLiveSuccessor (BB, nullptr);
5262	continue;
5263	}
5264	LiveBlocks.insert(Ptr: BB);
5265
5266	for (Instruction &Inst : llvm::make_early_inc_range(Range&: *BB)) {
5267	// ConstantProp instruction if trivially constant.
5268	if (!Inst.use_empty() &&
5269	(Inst.getNumOperands() == `0` \|\| isa<Constant>(Val: Inst.getOperand(i: `0`))))
5270	if (Constant *C = ConstantFoldInstruction(I: &Inst, DL, TLI: &TLI)) {
5271	LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
5272	<< `'\n'`);
5273	Inst.replaceAllUsesWith(V: C);
5274	++NumConstProp;
5275	if (isInstructionTriviallyDead(I: &Inst, TLI: &TLI))
5276	Inst.eraseFromParent();
5277	MadeIRChange = true;
5278	continue;
5279	}
5280
5281	// See if we can constant fold its operands.
5282	for (Use &U : Inst.operands()) {
5283	if (!isa<ConstantVector>(Val: U) && !isa<ConstantExpr>(Val: U))
5284	continue;
5285
5286	auto *C = cast<Constant>(Val&: U);
5287	Constant *&FoldRes = FoldedConstants [C];
5288	if (!FoldRes)
5289	FoldRes = ConstantFoldConstant(C, DL, TLI: &TLI);
5290
5291	if (FoldRes != C) {
5292	LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
5293	<< "\n Old = " << *C
5294	<< "\n New = " << *FoldRes << `'\n'`);
5295	U = FoldRes;
5296	MadeIRChange = true;
5297	}
5298	}
5299
5300	// Skip processing debug and pseudo intrinsics in InstCombine. Processing
5301	// these call instructions consumes non-trivial amount of time and
5302	// provides no value for the optimization.
5303	if (!Inst.isDebugOrPseudoInst()) {
5304	InstrsForInstructionWorklist.push_back(Elt: &Inst);
5305	SeenAliasScopes.analyse(I: &Inst);
5306	}
5307	}
5308
5309	// If this is a branch or switch on a constant, mark only the single
5310	// live successor. Otherwise assume all successors are live.
5311	Instruction *TI = BB->getTerminator();
5312	if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI); BI && BI->isConditional()) {
5313	if (isa<UndefValue>(Val: BI->getCondition())) {
5314	// Branch on undef is UB.
5315	HandleOnlyLiveSuccessor (BB, nullptr);
5316	continue;
5317	}
5318	if (auto *Cond = dyn_cast<ConstantInt>(Val: BI->getCondition())) {
5319	bool CondVal = Cond->getZExtValue();
5320	HandleOnlyLiveSuccessor (BB, BI->getSuccessor(i: !CondVal));
5321	continue;
5322	}
5323	} else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
5324	if (isa<UndefValue>(Val: SI->getCondition())) {
5325	// Switch on undef is UB.
5326	HandleOnlyLiveSuccessor (BB, nullptr);
5327	continue;
5328	}
5329	if (auto *Cond = dyn_cast<ConstantInt>(Val: SI->getCondition())) {
5330	HandleOnlyLiveSuccessor (BB,
5331	SI->findCaseValue(C: Cond)->getCaseSuccessor());
5332	continue;
5333	}
5334	}
5335	}
5336
5337	// Remove instructions inside unreachable blocks. This prevents the
5338	// instcombine code from having to deal with some bad special cases, and
5339	// reduces use counts of instructions.
5340	for (BasicBlock &BB : F) {
5341	if (LiveBlocks.count(Ptr: &BB))
5342	continue;
5343
5344	unsigned NumDeadInstInBB;
5345	unsigned NumDeadDbgInstInBB;
5346	std::tie(args&: NumDeadInstInBB, args&: NumDeadDbgInstInBB) =
5347	removeAllNonTerminatorAndEHPadInstructions(BB: &BB);
5348
5349	MadeIRChange \|= NumDeadInstInBB + NumDeadDbgInstInBB > `0`;
5350	NumDeadInst += NumDeadInstInBB;
5351	}
5352
5353	// Once we've found all of the instructions to add to instcombine's worklist,
5354	// add them in reverse order. This way instcombine will visit from the top
5355	// of the function down. This jives well with the way that it adds all uses
5356	// of instructions to the worklist after doing a transformation, thus avoiding
5357	// some N^2 behavior in pathological cases.
5358	Worklist.reserve(Size: InstrsForInstructionWorklist.size());
5359	for (Instruction *Inst : reverse(C&: InstrsForInstructionWorklist)) {
5360	// DCE instruction if trivially dead. As we iterate in reverse program
5361	// order here, we will clean up whole chains of dead instructions.
5362	if (isInstructionTriviallyDead(I: Inst, TLI: &TLI) \|\|
5363	SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
5364	++NumDeadInst;
5365	LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << `'\n'`);
5366	salvageDebugInfo(I&: *Inst);
5367	Inst->eraseFromParent();
5368	MadeIRChange = true;
5369	continue;
5370	}
5371
5372	Worklist.push(I: Inst);
5373	}
5374
5375	return MadeIRChange;
5376	}
5377
5378	static bool combineInstructionsOverFunction(
5379	Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA,
5380	AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
5381	DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
5382	BranchProbabilityInfo BPI, ProfileSummaryInfo PSI, LoopInfo *LI,
5383	const InstCombineOptions &Opts) {
5384	auto &DL = F.getDataLayout();
5385
5386	/// Builder - This is an IRBuilder that automatically inserts new
5387	/// instructions into the worklist when they are created.
5388	IRBuilder<TargetFolder, IRBuilderCallbackInserter> Builder(
5389	F.getContext(), TargetFolder (DL),
5390	IRBuilderCallbackInserter ([&Worklist, &AC](Instruction *I) {
5391	Worklist.add(I);
5392	if (auto *Assume = dyn_cast<AssumeInst>(Val: I))
5393	AC.registerAssumption(CI: Assume);
5394	}));
5395
5396	ReversePostOrderTraversal<BasicBlock *> RPOT(&F.front());
5397
5398	// Lower dbg.declare intrinsics otherwise their value may be clobbered
5399	// by instcombiner.
5400	bool MadeIRChange = false;
5401	if (ShouldLowerDbgDeclare)
5402	MadeIRChange = LowerDbgDeclare(F);
5403
5404	// Iterate while there is work to do.
5405	unsigned Iteration = `0`;
5406	while (true) {
5407	++Iteration;
5408
5409	if (Iteration > Opts.MaxIterations && !Opts.VerifyFixpoint) {
5410	LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations
5411	<< " on " << F.getName()
5412	<< " reached; stopping without verifying fixpoint\n");
5413	break;
5414	}
5415
5416	++NumWorklistIterations;
5417	LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
5418	<< F.getName() << "\n");
5419
5420	InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
5421	ORE, BFI, BPI, PSI, DL, LI);
5422	IC.MaxArraySizeForCombine = MaxArraySize;
5423	bool MadeChangeInThisIteration = IC.prepareWorklist(F, RPOT);
5424	MadeChangeInThisIteration \|= IC.run();
5425	if (!MadeChangeInThisIteration)
5426	break;
5427
5428	MadeIRChange = true;
5429	if (Iteration > Opts.MaxIterations) {
5430	report_fatal_error(
5431	reason: "Instruction Combining did not reach a fixpoint after " +
5432	Twine (Opts.MaxIterations) + " iterations",
5433	/GenCrashDiag=/gen_crash_diag: false);
5434	}
5435	}
5436
5437	if (Iteration == `1`)
5438	++NumOneIteration;
5439	else if (Iteration == `2`)
5440	++NumTwoIterations;
5441	else if (Iteration == `3`)
5442	++NumThreeIterations;
5443	else
5444	++NumFourOrMoreIterations;
5445
5446	return MadeIRChange;
5447	}
5448
5449	InstCombinePass::InstCombinePass(InstCombineOptions Opts) : Options (Opts) {}
5450
5451	void InstCombinePass::printPipeline(
5452	raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
5453	static_cast<PassInfoMixin<InstCombinePass> >(this*)->printPipeline(
5454	OS, MapClassName2PassName);
5455	OS << `'<'`;
5456	OS << "max-iterations=" << Options.MaxIterations << ";";
5457	OS << (Options.UseLoopInfo ? "" : "no-") << "use-loop-info;";
5458	OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint";
5459	OS << `'>'`;
5460	}
5461
5462	PreservedAnalyses InstCombinePass::run(Function &F,
5463	FunctionAnalysisManager &AM) {
5464	auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
5465	auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
5466	auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
5467	auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
5468	auto &TTI = AM.getResult<TargetIRAnalysis>(IR&: F);
5469
5470	// TODO: Only use LoopInfo when the option is set. This requires that the
5471	// callers in the pass pipeline explicitly set the option.
5472	auto *LI = AM.getCachedResult<LoopAnalysis>(IR&: F);
5473	if (!LI && Options.UseLoopInfo)
5474	LI = &AM.getResult<LoopAnalysis>(IR&: F);
5475
5476	auto *AA = &AM.getResult<AAManager>(IR&: F);
5477	auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F);
5478	ProfileSummaryInfo *PSI =
5479	MAMProxy.getCachedResult<ProfileSummaryAnalysis>(IR&: *F.getParent());
5480	auto *BFI = (PSI && PSI->hasProfileSummary()) ?
5481	&AM.getResult<BlockFrequencyAnalysis>(IR&: F) : nullptr;
5482	auto *BPI = AM.getCachedResult<BranchProbabilityAnalysis>(IR&: F);
5483
5484	if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
5485	BFI, BPI, PSI, LI, Opts: Options))
5486	// No changes, all analyses are preserved.
5487	return PreservedAnalyses::all();
5488
5489	// Mark all the analyses that instcombine updates as preserved.
5490	PreservedAnalyses PA;
5491	PA.preserveSet<CFGAnalyses>();
5492	return PA;
5493	}
5494
5495	void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
5496	AU.setPreservesCFG();
5497	AU.addRequired<AAResultsWrapperPass>();
5498	AU.addRequired<AssumptionCacheTracker>();
5499	AU.addRequired<TargetLibraryInfoWrapperPass>();
5500	AU.addRequired<TargetTransformInfoWrapperPass>();
5501	AU.addRequired<DominatorTreeWrapperPass>();
5502	AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
5503	AU.addPreserved<DominatorTreeWrapperPass>();
5504	AU.addPreserved<AAResultsWrapperPass>();
5505	AU.addPreserved<BasicAAWrapperPass>();
5506	AU.addPreserved<GlobalsAAWrapperPass>();
5507	AU.addRequired<ProfileSummaryInfoWrapperPass>();
5508	LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
5509	}
5510
5511	bool InstructionCombiningPass::runOnFunction(Function &F) {
5512	if (skipFunction(F))
5513	return false;
5514
5515	// Required analyses.
5516	auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
5517	auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
5518	auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
5519	auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
5520	auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5521	auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
5522
5523	// Optional analyses.
5524	auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
5525	auto LI = LIWP ? &LIWP->getLoopInfo() : nullptr*;
5526	ProfileSummaryInfo *PSI =
5527	&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
5528	BlockFrequencyInfo *BFI =
5529	(PSI && PSI->hasProfileSummary()) ?
5530	&getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
5531	nullptr;
5532	BranchProbabilityInfo BPI = nullptr*;
5533	if (auto *WrapperPass =
5534	getAnalysisIfAvailable<BranchProbabilityInfoWrapperPass>())
5535	BPI = &WrapperPass->getBPI();
5536
5537	return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
5538	BFI, BPI, PSI, LI,
5539	Opts: InstCombineOptions ());
5540	}
5541
5542	char InstructionCombiningPass::ID = `0`;
5543
5544	InstructionCombiningPass::InstructionCombiningPass() : FunctionPass (ID) {
5545	initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry());
5546	}
5547
5548	INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine",
5549	"Combine redundant instructions", false, false)
5550	INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
5551	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
5552	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
5553	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
5554	INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
5555	INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
5556	INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
5557	INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
5558	INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
5559	INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
5560	"Combine redundant instructions", false, false)
5561
5562	// Initialization Routines
5563	void llvm::initializeInstCombine(PassRegistry &Registry) {
5564	initializeInstructionCombiningPassPass(Registry);
5565	}
5566
5567	FunctionPass *llvm::createInstructionCombiningPass() {
5568	return new InstructionCombiningPass ();
5569	}
5570

Browse the source code of llvm_projects/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp