InstructionCombining.cpp source code [llvm_projects/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp]

1	//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// InstructionCombining - Combine instructions to form fewer, simple
10	// instructions. This pass does not modify the CFG. This pass is where
11	// algebraic simplification happens.
12	//
13	// This pass combines things like:
14	// %Y = add i32 %X, 1
15	// %Z = add i32 %Y, 1
16	// into:
17	// %Z = add i32 %X, 2
18	//
19	// This is a simple worklist driven algorithm.
20	//
21	// This pass guarantees that the following canonicalizations are performed on
22	// the program:
23	// 1. If a binary operator has a constant operand, it is moved to the RHS
24	// 2. Bitwise operators with constant operands are always grouped so that
25	// shifts are performed first, then or's, then and's, then xor's.
26	// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27	// 4. All cmp instructions on boolean values are replaced with logical ops
28	// 5. add X, X is represented as (X2) => (X << 1)*
29	// 6. Multiplies with a power-of-two constant argument are transformed into
30	// shifts.
31	// ... etc.
32	//
33	//===----------------------------------------------------------------------===//
34
35	#include "InstCombineInternal.h"
36	#include "llvm/ADT/APFloat.h"
37	#include "llvm/ADT/APInt.h"
38	#include "llvm/ADT/ArrayRef.h"
39	#include "llvm/ADT/DenseMap.h"
40	#include "llvm/ADT/SmallPtrSet.h"
41	#include "llvm/ADT/SmallVector.h"
42	#include "llvm/ADT/Statistic.h"
43	#include "llvm/Analysis/AliasAnalysis.h"
44	#include "llvm/Analysis/AssumptionCache.h"
45	#include "llvm/Analysis/BasicAliasAnalysis.h"
46	#include "llvm/Analysis/BlockFrequencyInfo.h"
47	#include "llvm/Analysis/CFG.h"
48	#include "llvm/Analysis/ConstantFolding.h"
49	#include "llvm/Analysis/GlobalsModRef.h"
50	#include "llvm/Analysis/InstructionSimplify.h"
51	#include "llvm/Analysis/LastRunTrackingAnalysis.h"
52	#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
53	#include "llvm/Analysis/MemoryBuiltins.h"
54	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
55	#include "llvm/Analysis/ProfileSummaryInfo.h"
56	#include "llvm/Analysis/TargetFolder.h"
57	#include "llvm/Analysis/TargetLibraryInfo.h"
58	#include "llvm/Analysis/TargetTransformInfo.h"
59	#include "llvm/Analysis/Utils/Local.h"
60	#include "llvm/Analysis/ValueTracking.h"
61	#include "llvm/Analysis/VectorUtils.h"
62	#include "llvm/IR/BasicBlock.h"
63	#include "llvm/IR/CFG.h"
64	#include "llvm/IR/Constant.h"
65	#include "llvm/IR/Constants.h"
66	#include "llvm/IR/DIBuilder.h"
67	#include "llvm/IR/DataLayout.h"
68	#include "llvm/IR/DebugInfo.h"
69	#include "llvm/IR/DerivedTypes.h"
70	#include "llvm/IR/Dominators.h"
71	#include "llvm/IR/EHPersonalities.h"
72	#include "llvm/IR/Function.h"
73	#include "llvm/IR/GetElementPtrTypeIterator.h"
74	#include "llvm/IR/IRBuilder.h"
75	#include "llvm/IR/InstrTypes.h"
76	#include "llvm/IR/Instruction.h"
77	#include "llvm/IR/Instructions.h"
78	#include "llvm/IR/IntrinsicInst.h"
79	#include "llvm/IR/Intrinsics.h"
80	#include "llvm/IR/Metadata.h"
81	#include "llvm/IR/Operator.h"
82	#include "llvm/IR/PassManager.h"
83	#include "llvm/IR/PatternMatch.h"
84	#include "llvm/IR/Type.h"
85	#include "llvm/IR/Use.h"
86	#include "llvm/IR/User.h"
87	#include "llvm/IR/Value.h"
88	#include "llvm/IR/ValueHandle.h"
89	#include "llvm/InitializePasses.h"
90	#include "llvm/Support/Casting.h"
91	#include "llvm/Support/CommandLine.h"
92	#include "llvm/Support/Compiler.h"
93	#include "llvm/Support/Debug.h"
94	#include "llvm/Support/DebugCounter.h"
95	#include "llvm/Support/ErrorHandling.h"
96	#include "llvm/Support/KnownBits.h"
97	#include "llvm/Support/KnownFPClass.h"
98	#include "llvm/Support/raw_ostream.h"
99	#include "llvm/Transforms/InstCombine/InstCombine.h"
100	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
101	#include "llvm/Transforms/Utils/Local.h"
102	#include <algorithm>
103	#include <cassert>
104	#include <cstdint>
105	#include <memory>
106	#include <optional>
107	#include <string>
108	#include <utility>
109
110	#define DEBUG_TYPE "instcombine"
111	#include "llvm/Transforms/Utils/InstructionWorklist.h"
112	#include <optional>
113
114	using namespace llvm;
115	using namespace llvm::PatternMatch;
116
117	STATISTIC(NumWorklistIterations,
118	"Number of instruction combining iterations performed");
119	STATISTIC(NumOneIteration, "Number of functions with one iteration");
120	STATISTIC(NumTwoIterations, "Number of functions with two iterations");
121	STATISTIC(NumThreeIterations, "Number of functions with three iterations");
122	STATISTIC(NumFourOrMoreIterations,
123	"Number of functions with four or more iterations");
124
125	STATISTIC(NumCombined , "Number of insts combined");
126	STATISTIC(NumConstProp, "Number of constant folds");
127	STATISTIC(NumDeadInst , "Number of dead inst eliminated");
128	STATISTIC(NumSunkInst , "Number of instructions sunk");
129	STATISTIC(NumExpand, "Number of expansions");
130	STATISTIC(NumFactor , "Number of factorizations");
131	STATISTIC(NumReassoc , "Number of reassociations");
132	DEBUG_COUNTER(VisitCounter, "instcombine-visit",
133	"Controls which instructions are visited");
134
135	static cl::opt<bool>
136	EnableCodeSinking("instcombine-code-sinking", cl::desc ("Enable code sinking"),
137	cl::init(Val: true));
138
139	static cl::opt<unsigned> MaxSinkNumUsers(
140	"instcombine-max-sink-users", cl::init(Val: `32`),
141	cl::desc ("Maximum number of undroppable users for instruction sinking"));
142
143	static cl::opt<unsigned>
144	MaxArraySize("instcombine-maxarray-size", cl::init(Val: `1024`),
145	cl::desc ("Maximum array size considered when doing a combine"));
146
147	// FIXME: Remove this flag when it is no longer necessary to convert
148	// llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
149	// increases variable availability at the cost of accuracy. Variables that
150	// cannot be promoted by mem2reg or SROA will be described as living in memory
151	// for their entire lifetime. However, passes like DSE and instcombine can
152	// delete stores to the alloca, leading to misleading and inaccurate debug
153	// information. This flag can be removed when those passes are fixed.
154	static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
155	cl::Hidden, cl::init(Val: true));
156
157	std::optional<Instruction *>
158	InstCombiner::targetInstCombineIntrinsic(IntrinsicInst &II) {
159	// Handle target specific intrinsics
160	if (II.getCalledFunction()->isTargetIntrinsic()) {
161	return TTIForTargetIntrinsicsOnly.instCombineIntrinsic(IC&: *this, II);
162	}
163	return std::nullopt;
164	}
165
166	std::optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic(
167	IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
168	bool &KnownBitsComputed) {
169	// Handle target specific intrinsics
170	if (II.getCalledFunction()->isTargetIntrinsic()) {
171	return TTIForTargetIntrinsicsOnly.simplifyDemandedUseBitsIntrinsic(
172	IC&: *this, II, DemandedMask, Known, KnownBitsComputed);
173	}
174	return std::nullopt;
175	}
176
177	std::optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic(
178	IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts,
179	APInt &PoisonElts2, APInt &PoisonElts3,
180	std::function<void(Instruction , unsigned*, APInt, APInt &)>
181	SimplifyAndSetOp) {
182	// Handle target specific intrinsics
183	if (II.getCalledFunction()->isTargetIntrinsic()) {
184	return TTIForTargetIntrinsicsOnly.simplifyDemandedVectorEltsIntrinsic(
185	IC&: *this, II, DemandedElts, UndefElts&: PoisonElts, UndefElts2&: PoisonElts2, UndefElts3&: PoisonElts3,
186	SimplifyAndSetOp);
187	}
188	return std::nullopt;
189	}
190
191	bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
192	// Approved exception for TTI use: This queries a legality property of the
193	// target, not an profitability heuristic. Ideally this should be part of
194	// DataLayout instead.
195	return TTIForTargetIntrinsicsOnly.isValidAddrSpaceCast(FromAS, ToAS);
196	}
197
198	Value InstCombinerImpl::EmitGEPOffset(GEPOperator GEP, bool RewriteGEP) {
199	if (!RewriteGEP)
200	return llvm::emitGEPOffset(Builder: &Builder, DL, GEP);
201
202	IRBuilderBase::InsertPointGuard Guard(Builder);
203	auto *Inst = dyn_cast<Instruction>(Val: GEP);
204	if (Inst)
205	Builder.SetInsertPoint(Inst);
206
207	Value *Offset = EmitGEPOffset(GEP);
208	// Rewrite non-trivial GEPs to avoid duplicating the offset arithmetic.
209	if (Inst && !GEP->hasAllConstantIndices() &&
210	!GEP->getSourceElementType()->isIntegerTy(Bitwidth: `8`)) {
211	replaceInstUsesWith(
212	I&: *Inst, V: Builder.CreateGEP(Ty: Builder.getInt8Ty(), Ptr: GEP->getPointerOperand(),
213	IdxList: Offset, Name: "", NW: GEP->getNoWrapFlags()));
214	eraseInstFromFunction(I&: *Inst);
215	}
216	return Offset;
217	}
218
219	Value InstCombinerImpl::EmitGEPOffsets(ArrayRef<GEPOperator > GEPs,
220	GEPNoWrapFlags NW, Type *IdxTy,
221	bool RewriteGEPs) {
222	Value Sum = nullptr*;
223	for (GEPOperator *GEP : reverse(C&: GEPs)) {
224	Value *Offset = EmitGEPOffset(GEP, RewriteGEP: RewriteGEPs);
225	if (Offset->getType() != IdxTy)
226	Offset = Builder.CreateVectorSplat(
227	EC: cast<VectorType>(Val: IdxTy)->getElementCount(), V: Offset);
228	if (Sum)
229	Sum = Builder.CreateAdd(LHS: Sum, RHS: Offset, Name: "", HasNUW: NW.hasNoUnsignedWrap(),
230	HasNSW: NW.isInBounds());
231	else
232	Sum = Offset;
233	}
234	if (!Sum)
235	return Constant::getNullValue(Ty: IdxTy);
236	return Sum;
237	}
238
239	/// Legal integers and common types are considered desirable. This is used to
240	/// avoid creating instructions with types that may not be supported well by the
241	/// the backend.
242	/// NOTE: This treats i8, i16 and i32 specially because they are common
243	/// types in frontend languages.
244	bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
245	switch (BitWidth) {
246	case `8`:
247	case `16`:
248	case `32`:
249	return true;
250	default:
251	return DL.isLegalInteger(Width: BitWidth);
252	}
253	}
254
255	/// Return true if it is desirable to convert an integer computation from a
256	/// given bit width to a new bit width.
257	/// We don't want to convert from a legal or desirable type (like i8) to an
258	/// illegal type or from a smaller to a larger illegal type. A width of '1'
259	/// is always treated as a desirable type because i1 is a fundamental type in
260	/// IR, and there are many specialized optimizations for i1 types.
261	/// Common/desirable widths are equally treated as legal to convert to, in
262	/// order to open up more combining opportunities.
263	bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
264	unsigned ToWidth) const {
265	bool FromLegal = FromWidth == `1` \|\| DL.isLegalInteger(Width: FromWidth);
266	bool ToLegal = ToWidth == `1` \|\| DL.isLegalInteger(Width: ToWidth);
267
268	// Convert to desirable widths even if they are not legal types.
269	// Only shrink types, to prevent infinite loops.
270	if (ToWidth < FromWidth && isDesirableIntType(BitWidth: ToWidth))
271	return true;
272
273	// If this is a legal or desiable integer from type, and the result would be
274	// an illegal type, don't do the transformation.
275	if ((FromLegal \|\| isDesirableIntType(BitWidth: FromWidth)) && !ToLegal)
276	return false;
277
278	// Otherwise, if both are illegal, do not increase the size of the result. We
279	// do allow things like i160 -> i64, but not i64 -> i160.
280	if (!FromLegal && !ToLegal && ToWidth > FromWidth)
281	return false;
282
283	return true;
284	}
285
286	/// Return true if it is desirable to convert a computation from 'From' to 'To'.
287	/// We don't want to convert from a legal to an illegal type or from a smaller
288	/// to a larger illegal type. i1 is always treated as a legal type because it is
289	/// a fundamental type in IR, and there are many specialized optimizations for
290	/// i1 types.
291	bool InstCombinerImpl::shouldChangeType(Type From, Type To) const {
292	// TODO: This could be extended to allow vectors. Datalayout changes might be
293	// needed to properly support that.
294	if (!From->isIntegerTy() \|\| !To->isIntegerTy())
295	return false;
296
297	unsigned FromWidth = From->getPrimitiveSizeInBits();
298	unsigned ToWidth = To->getPrimitiveSizeInBits();
299	return shouldChangeType(FromWidth, ToWidth);
300	}
301
302	// Return true, if No Signed Wrap should be maintained for I.
303	// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
304	// where both B and C should be ConstantInts, results in a constant that does
305	// not overflow. This function only handles the Add/Sub/Mul opcodes. For
306	// all other opcodes, the function conservatively returns false.
307	static bool maintainNoSignedWrap(BinaryOperator &I, Value B, Value C) {
308	auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
309	if (!OBO \|\| !OBO->hasNoSignedWrap())
310	return false;
311
312	const APInt BVal, CVal;
313	if (!match(V: B, P: m_APInt(Res&: BVal)) \|\| !match(V: C, P: m_APInt(Res&: CVal)))
314	return false;
315
316	// We reason about Add/Sub/Mul Only.
317	bool Overflow = false;
318	switch (I.getOpcode()) {
319	case Instruction::Add:
320	(void)BVal->sadd_ov(RHS: *CVal, Overflow);
321	break;
322	case Instruction::Sub:
323	(void)BVal->ssub_ov(RHS: *CVal, Overflow);
324	break;
325	case Instruction::Mul:
326	(void)BVal->smul_ov(RHS: *CVal, Overflow);
327	break;
328	default:
329	// Conservatively return false for other opcodes.
330	return false;
331	}
332	return !Overflow;
333	}
334
335	static bool hasNoUnsignedWrap(BinaryOperator &I) {
336	auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
337	return OBO && OBO->hasNoUnsignedWrap();
338	}
339
340	static bool hasNoSignedWrap(BinaryOperator &I) {
341	auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
342	return OBO && OBO->hasNoSignedWrap();
343	}
344
345	/// Conservatively clears subclassOptionalData after a reassociation or
346	/// commutation. We preserve fast-math flags when applicable as they can be
347	/// preserved.
348	static void ClearSubclassDataAfterReassociation(BinaryOperator &I) {
349	FPMathOperator *FPMO = dyn_cast<FPMathOperator>(Val: &I);
350	if (!FPMO) {
351	I.clearSubclassOptionalData();
352	return;
353	}
354
355	FastMathFlags FMF = I.getFastMathFlags();
356	I.clearSubclassOptionalData();
357	I.setFastMathFlags(FMF);
358	}
359
360	/// Combine constant operands of associative operations either before or after a
361	/// cast to eliminate one of the associative operations:
362	/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
363	/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
364	static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1,
365	InstCombinerImpl &IC) {
366	auto *Cast = dyn_cast<CastInst>(Val: BinOp1->getOperand(i_nocapture: `0`));
367	if (!Cast \|\| !Cast->hasOneUse())
368	return false;
369
370	// TODO: Enhance logic for other casts and remove this check.
371	auto CastOpcode = Cast->getOpcode();
372	if (CastOpcode != Instruction::ZExt)
373	return false;
374
375	// TODO: Enhance logic for other BinOps and remove this check.
376	if (!BinOp1->isBitwiseLogicOp())
377	return false;
378
379	auto AssocOpcode = BinOp1->getOpcode();
380	auto *BinOp2 = dyn_cast<BinaryOperator>(Val: Cast->getOperand(i_nocapture: `0`));
381	if (!BinOp2 \|\| !BinOp2->hasOneUse() \|\| BinOp2->getOpcode() != AssocOpcode)
382	return false;
383
384	Constant C1, C2;
385	if (!match(V: BinOp1->getOperand(i_nocapture: `1`), P: m_Constant(C&: C1)) \|\|
386	!match(V: BinOp2->getOperand(i_nocapture: `1`), P: m_Constant(C&: C2)))
387	return false;
388
389	// TODO: This assumes a zext cast.
390	// Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
391	// to the destination type might lose bits.
392
393	// Fold the constants together in the destination type:
394	// (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
395	const DataLayout &DL = IC.getDataLayout();
396	Type *DestTy = C1->getType();
397	Constant *CastC2 = ConstantFoldCastOperand(Opcode: CastOpcode, C: C2, DestTy, DL);
398	if (!CastC2)
399	return false;
400	Constant *FoldedC = ConstantFoldBinaryOpOperands(Opcode: AssocOpcode, LHS: C1, RHS: CastC2, DL);
401	if (!FoldedC)
402	return false;
403
404	IC.replaceOperand(I&: *Cast, OpNum: `0`, V: BinOp2->getOperand(i_nocapture: `0`));
405	IC.replaceOperand(I&: *BinOp1, OpNum: `1`, V: FoldedC);
406	BinOp1->dropPoisonGeneratingFlags();
407	Cast->dropPoisonGeneratingFlags();
408	return true;
409	}
410
411	// Simplifies IntToPtr/PtrToInt RoundTrip Cast.
412	// inttoptr ( ptrtoint (x) ) --> x
413	Value InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value Val) {
414	auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
415	if (IntToPtr && DL.getTypeSizeInBits(Ty: IntToPtr->getDestTy()) ==
416	DL.getTypeSizeInBits(Ty: IntToPtr->getSrcTy())) {
417	auto *PtrToInt = dyn_cast<PtrToIntInst>(Val: IntToPtr->getOperand(i_nocapture: `0`));
418	Type *CastTy = IntToPtr->getDestTy();
419	if (PtrToInt &&
420	CastTy->getPointerAddressSpace() ==
421	PtrToInt->getSrcTy()->getPointerAddressSpace() &&
422	DL.getTypeSizeInBits(Ty: PtrToInt->getSrcTy()) ==
423	DL.getTypeSizeInBits(Ty: PtrToInt->getDestTy()))
424	return PtrToInt->getOperand(i_nocapture: `0`);
425	}
426	return nullptr;
427	}
428
429	/// This performs a few simplifications for operators that are associative or
430	/// commutative:
431	///
432	/// Commutative operators:
433	///
434	/// 1. Order operands such that they are listed from right (least complex) to
435	/// left (most complex). This puts constants before unary operators before
436	/// binary operators.
437	///
438	/// Associative operators:
439	///
440	/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
441	/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
442	///
443	/// Associative and commutative operators:
444	///
445	/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
446	/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
447	/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
448	/// if C1 and C2 are constants.
449	bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
450	Instruction::BinaryOps Opcode = I.getOpcode();
451	bool Changed = false;
452
453	do {
454	// Order operands such that they are listed from right (least complex) to
455	// left (most complex). This puts constants before unary operators before
456	// binary operators.
457	if (I.isCommutative() && getComplexity(V: I.getOperand(i_nocapture: `0`)) <
458	getComplexity(V: I.getOperand(i_nocapture: `1`)))
459	Changed = !I.swapOperands();
460
461	if (I.isCommutative()) {
462	if (auto Pair = matchSymmetricPair(LHS: I.getOperand(i_nocapture: `0`), RHS: I.getOperand(i_nocapture: `1`))) {
463	replaceOperand(I, OpNum: `0`, V: Pair ->first);
464	replaceOperand(I, OpNum: `1`, V: Pair ->second);
465	Changed = true;
466	}
467	}
468
469	BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: I.getOperand(i_nocapture: `0`));
470	BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: I.getOperand(i_nocapture: `1`));
471
472	if (I.isAssociative()) {
473	// Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
474	if (Op0 && Op0->getOpcode() == Opcode) {
475	Value *A = Op0->getOperand(i_nocapture: `0`);
476	Value *B = Op0->getOperand(i_nocapture: `1`);
477	Value *C = I.getOperand(i_nocapture: `1`);
478
479	// Does "B op C" simplify?
480	if (Value *V = simplifyBinOp(Opcode, LHS: B, RHS: C, Q: SQ.getWithInstruction(I: &I))) {
481	// It simplifies to V. Form "A op V".
482	replaceOperand(I, OpNum: `0`, V: A);
483	replaceOperand(I, OpNum: `1`, V);
484	bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(I&: *Op0);
485	bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(I&: *Op0);
486
487	// Conservatively clear all optional flags since they may not be
488	// preserved by the reassociation. Reset nsw/nuw based on the above
489	// analysis.
490	ClearSubclassDataAfterReassociation(I);
491
492	// Note: this is only valid because SimplifyBinOp doesn't look at
493	// the operands to Op0.
494	if (IsNUW)
495	I.setHasNoUnsignedWrap(true);
496
497	if (IsNSW)
498	I.setHasNoSignedWrap(true);
499
500	Changed = true;
501	++NumReassoc;
502	continue;
503	}
504	}
505
506	// Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
507	if (Op1 && Op1->getOpcode() == Opcode) {
508	Value *A = I.getOperand(i_nocapture: `0`);
509	Value *B = Op1->getOperand(i_nocapture: `0`);
510	Value *C = Op1->getOperand(i_nocapture: `1`);
511
512	// Does "A op B" simplify?
513	if (Value *V = simplifyBinOp(Opcode, LHS: A, RHS: B, Q: SQ.getWithInstruction(I: &I))) {
514	// It simplifies to V. Form "V op C".
515	replaceOperand(I, OpNum: `0`, V);
516	replaceOperand(I, OpNum: `1`, V: C);
517	// Conservatively clear the optional flags, since they may not be
518	// preserved by the reassociation.
519	ClearSubclassDataAfterReassociation(I);
520	Changed = true;
521	++NumReassoc;
522	continue;
523	}
524	}
525	}
526
527	if (I.isAssociative() && I.isCommutative()) {
528	if (simplifyAssocCastAssoc(BinOp1: &I, IC&: *this)) {
529	Changed = true;
530	++NumReassoc;
531	continue;
532	}
533
534	// Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
535	if (Op0 && Op0->getOpcode() == Opcode) {
536	Value *A = Op0->getOperand(i_nocapture: `0`);
537	Value *B = Op0->getOperand(i_nocapture: `1`);
538	Value *C = I.getOperand(i_nocapture: `1`);
539
540	// Does "C op A" simplify?
541	if (Value *V = simplifyBinOp(Opcode, LHS: C, RHS: A, Q: SQ.getWithInstruction(I: &I))) {
542	// It simplifies to V. Form "V op B".
543	replaceOperand(I, OpNum: `0`, V);
544	replaceOperand(I, OpNum: `1`, V: B);
545	// Conservatively clear the optional flags, since they may not be
546	// preserved by the reassociation.
547	ClearSubclassDataAfterReassociation(I);
548	Changed = true;
549	++NumReassoc;
550	continue;
551	}
552	}
553
554	// Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
555	if (Op1 && Op1->getOpcode() == Opcode) {
556	Value *A = I.getOperand(i_nocapture: `0`);
557	Value *B = Op1->getOperand(i_nocapture: `0`);
558	Value *C = Op1->getOperand(i_nocapture: `1`);
559
560	// Does "C op A" simplify?
561	if (Value *V = simplifyBinOp(Opcode, LHS: C, RHS: A, Q: SQ.getWithInstruction(I: &I))) {
562	// It simplifies to V. Form "B op V".
563	replaceOperand(I, OpNum: `0`, V: B);
564	replaceOperand(I, OpNum: `1`, V);
565	// Conservatively clear the optional flags, since they may not be
566	// preserved by the reassociation.
567	ClearSubclassDataAfterReassociation(I);
568	Changed = true;
569	++NumReassoc;
570	continue;
571	}
572	}
573
574	// Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
575	// if C1 and C2 are constants.
576	Value A, B;
577	Constant C1, C2, *CRes;
578	if (Op0 && Op1 &&
579	Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
580	match(V: Op0, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: A), R: m_Constant(C&: C1)))) &&
581	match(V: Op1, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: B), R: m_Constant(C&: C2)))) &&
582	(CRes = ConstantFoldBinaryOpOperands(Opcode, LHS: C1, RHS: C2, DL))) {
583	bool IsNUW = hasNoUnsignedWrap(I) &&
584	hasNoUnsignedWrap(I&: *Op0) &&
585	hasNoUnsignedWrap(I&: *Op1);
586	BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
587	BinaryOperator::CreateNUW(Opc: Opcode, V1: A, V2: B) :
588	BinaryOperator::Create(Op: Opcode, S1: A, S2: B);
589
590	if (isa<FPMathOperator>(Val: NewBO)) {
591	FastMathFlags Flags = I.getFastMathFlags() &
592	Op0->getFastMathFlags() &
593	Op1->getFastMathFlags();
594	NewBO->setFastMathFlags(Flags);
595	}
596	InsertNewInstWith(New: NewBO, Old: I.getIterator());
597	NewBO->takeName(V: Op1);
598	replaceOperand(I, OpNum: `0`, V: NewBO);
599	replaceOperand(I, OpNum: `1`, V: CRes);
600	// Conservatively clear the optional flags, since they may not be
601	// preserved by the reassociation.
602	ClearSubclassDataAfterReassociation(I);
603	if (IsNUW)
604	I.setHasNoUnsignedWrap(true);
605
606	Changed = true;
607	continue;
608	}
609	}
610
611	// No further simplifications.
612	return Changed;
613	} while (true);
614	}
615
616	/// Return whether "X LOp (Y ROp Z)" is always equal to
617	/// "(X LOp Y) ROp (X LOp Z)".
618	static bool leftDistributesOverRight(Instruction::BinaryOps LOp,
619	Instruction::BinaryOps ROp) {
620	// X & (Y \| Z) <--> (X & Y) \| (X & Z)
621	// X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
622	if (LOp == Instruction::And)
623	return ROp == Instruction::Or \|\| ROp == Instruction::Xor;
624
625	// X \| (Y & Z) <--> (X \| Y) & (X \| Z)
626	if (LOp == Instruction::Or)
627	return ROp == Instruction::And;
628
629	// X (Y + Z) <--> (X * Y) + (X * Z)*
630	// X (Y - Z) <--> (X * Y) - (X * Z)*
631	if (LOp == Instruction::Mul)
632	return ROp == Instruction::Add \|\| ROp == Instruction::Sub;
633
634	return false;
635	}
636
637	/// Return whether "(X LOp Y) ROp Z" is always equal to
638	/// "(X ROp Z) LOp (Y ROp Z)".
639	static bool rightDistributesOverLeft(Instruction::BinaryOps LOp,
640	Instruction::BinaryOps ROp) {
641	if (Instruction::isCommutative(Opcode: ROp))
642	return leftDistributesOverRight(LOp: ROp, ROp: LOp);
643
644	// (X {&\|^} Y) >> Z <--> (X >> Z) {&\|^} (Y >> Z) for all shifts.
645	return Instruction::isBitwiseLogicOp(Opcode: LOp) && Instruction::isShift(Opcode: ROp);
646
647	// TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
648	// but this requires knowing that the addition does not overflow and other
649	// such subtleties.
650	}
651
652	/// This function returns identity value for given opcode, which can be used to
653	/// factor patterns like (X 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).*
654	static Value getIdentityValue(Instruction::BinaryOps Opcode, Value V) {
655	if (isa<Constant>(Val: V))
656	return nullptr;
657
658	return ConstantExpr::getBinOpIdentity(Opcode, Ty: V->getType());
659	}
660
661	/// This function predicates factorization using distributive laws. By default,
662	/// it just returns the 'Op' inputs. But for special-cases like
663	/// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
664	/// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
665	/// allow more factorization opportunities.
666	static Instruction::BinaryOps
667	getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op,
668	Value &LHS, Value &RHS, BinaryOperator *OtherOp) {
669	assert(Op && "Expected a binary operator");
670	LHS = Op->getOperand(i_nocapture: `0`);
671	RHS = Op->getOperand(i_nocapture: `1`);
672	if (TopOpcode == Instruction::Add \|\| TopOpcode == Instruction::Sub) {
673	Constant *C;
674	if (match(V: Op, P: m_Shl(L: m_Value(), R: m_ImmConstant(C)))) {
675	// X << C --> X (1 << C)*
676	RHS = ConstantFoldBinaryInstruction(
677	Opcode: Instruction::Shl, V1: ConstantInt::get(Ty: Op->getType(), V: `1`), V2: C);
678	assert(RHS && "Constant folding of immediate constants failed");
679	return Instruction::Mul;
680	}
681	// TODO: We can add other conversions e.g. shr => div etc.
682	}
683	if (Instruction::isBitwiseLogicOp(Opcode: TopOpcode)) {
684	if (OtherOp && OtherOp->getOpcode() == Instruction::AShr &&
685	match(V: Op, P: m_LShr(L: m_NonNegative(), R: m_Value()))) {
686	// lshr nneg C, X --> ashr nneg C, X
687	return Instruction::AShr;
688	}
689	}
690	return Op->getOpcode();
691	}
692
693	/// This tries to simplify binary operations by factorizing out common terms
694	/// (e. g. "(AB)+(AC)" -> "A(B+C)").*
695	static Value tryFactorization(BinaryOperator &I, const* SimplifyQuery &SQ,
696	InstCombiner::BuilderTy &Builder,
697	Instruction::BinaryOps InnerOpcode, Value *A,
698	Value B, Value C, Value *D) {
699	assert(A && B && C && D && "All values must be provided");
700
701	Value V = nullptr*;
702	Value RetVal = nullptr*;
703	Value LHS = I.getOperand(i_nocapture: `0`), RHS = I.getOperand(i_nocapture: `1`);
704	Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
705
706	// Does "X op' Y" always equal "Y op' X"?
707	bool InnerCommutative = Instruction::isCommutative(Opcode: InnerOpcode);
708
709	// Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
710	if (leftDistributesOverRight(LOp: InnerOpcode, ROp: TopLevelOpcode)) {
711	// Does the instruction have the form "(A op' B) op (A op' D)" or, in the
712	// commutative case, "(A op' B) op (C op' A)"?
713	if (A == C \|\| (InnerCommutative && A == D)) {
714	if (A != C)
715	std::swap(a&: C, b&: D);
716	// Consider forming "A op' (B op D)".
717	// If "B op D" simplifies then it can be formed with no cost.
718	V = simplifyBinOp(Opcode: TopLevelOpcode, LHS: B, RHS: D, Q: SQ.getWithInstruction(I: &I));
719
720	// If "B op D" doesn't simplify then only go on if one of the existing
721	// operations "A op' B" and "C op' D" will be zapped as no longer used.
722	if (!V && (LHS->hasOneUse() \|\| RHS->hasOneUse()))
723	V = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: B, RHS: D, Name: RHS->getName());
724	if (V)
725	RetVal = Builder.CreateBinOp(Opc: InnerOpcode, LHS: A, RHS: V);
726	}
727	}
728
729	// Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
730	if (!RetVal && rightDistributesOverLeft(LOp: TopLevelOpcode, ROp: InnerOpcode)) {
731	// Does the instruction have the form "(A op' B) op (C op' B)" or, in the
732	// commutative case, "(A op' B) op (B op' D)"?
733	if (B == D \|\| (InnerCommutative && B == C)) {
734	if (B != D)
735	std::swap(a&: C, b&: D);
736	// Consider forming "(A op C) op' B".
737	// If "A op C" simplifies then it can be formed with no cost.
738	V = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQ.getWithInstruction(I: &I));
739
740	// If "A op C" doesn't simplify then only go on if one of the existing
741	// operations "A op' B" and "C op' D" will be zapped as no longer used.
742	if (!V && (LHS->hasOneUse() \|\| RHS->hasOneUse()))
743	V = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C, Name: LHS->getName());
744	if (V)
745	RetVal = Builder.CreateBinOp(Opc: InnerOpcode, LHS: V, RHS: B);
746	}
747	}
748
749	if (!RetVal)
750	return nullptr;
751
752	++NumFactor;
753	RetVal->takeName(V: &I);
754
755	// Try to add no-overflow flags to the final value.
756	if (isa<BinaryOperator>(Val: RetVal)) {
757	bool HasNSW = false;
758	bool HasNUW = false;
759	if (isa<OverflowingBinaryOperator>(Val: &I)) {
760	HasNSW = I.hasNoSignedWrap();
761	HasNUW = I.hasNoUnsignedWrap();
762	}
763	if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(Val: LHS)) {
764	HasNSW &= LOBO->hasNoSignedWrap();
765	HasNUW &= LOBO->hasNoUnsignedWrap();
766	}
767
768	if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(Val: RHS)) {
769	HasNSW &= ROBO->hasNoSignedWrap();
770	HasNUW &= ROBO->hasNoUnsignedWrap();
771	}
772
773	if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
774	// We can propagate 'nsw' if we know that
775	// %Y = mul nsw i16 %X, C
776	// %Z = add nsw i16 %Y, %X
777	// =>
778	// %Z = mul nsw i16 %X, C+1
779	//
780	// iff C+1 isn't INT_MIN
781	const APInt *CInt;
782	if (match(V, P: m_APInt(Res&: CInt)) && !CInt->isMinSignedValue())
783	cast<Instruction>(Val: RetVal)->setHasNoSignedWrap(HasNSW);
784
785	// nuw can be propagated with any constant or nuw value.
786	cast<Instruction>(Val: RetVal)->setHasNoUnsignedWrap(HasNUW);
787	}
788	}
789	return RetVal;
790	}
791
792	// If `I` has one Const operand and the other matches `(ctpop (not x))`,
793	// replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`.
794	// This is only useful is the new subtract can fold so we only handle the
795	// following cases:
796	// 1) (add/sub/disjoint_or C, (ctpop (not x))
797	// -> (add/sub/disjoint_or C', (ctpop x))
798	// 1) (cmp pred C, (ctpop (not x))
799	// -> (cmp pred C', (ctpop x))
800	Instruction InstCombinerImpl::tryFoldInstWithCtpopWithNot(Instruction I) {
801	unsigned Opc = I->getOpcode();
802	unsigned ConstIdx = `1`;
803	switch (Opc) {
804	default:
805	return nullptr;
806	// (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x))
807	// We can fold the BitWidth(x) with add/sub/icmp as long the other operand
808	// is constant.
809	case Instruction::Sub:
810	ConstIdx = `0`;
811	break;
812	case Instruction::ICmp:
813	// Signed predicates aren't correct in some edge cases like for i2 types, as
814	// well since (ctpop x) is known [0, log2(BitWidth(x))] almost all signed
815	// comparisons against it are simplfied to unsigned.
816	if (cast<ICmpInst>(Val: I)->isSigned())
817	return nullptr;
818	break;
819	case Instruction::Or:
820	if (!match(V: I, P: m_DisjointOr(L: m_Value(), R: m_Value())))
821	return nullptr;
822	[[fallthrough]];
823	case Instruction::Add:
824	break;
825	}
826
827	Value *Op;
828	// Find ctpop.
829	if (!match(V: I->getOperand(i: `1` - ConstIdx),
830	P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ctpop>(Op0: m_Value(V&: Op)))))
831	return nullptr;
832
833	Constant *C;
834	// Check other operand is ImmConstant.
835	if (!match(V: I->getOperand(i: ConstIdx), P: m_ImmConstant(C)))
836	return nullptr;
837
838	Type *Ty = Op->getType();
839	Constant *BitWidthC = ConstantInt::get(Ty, V: Ty->getScalarSizeInBits());
840	// Need extra check for icmp. Note if this check is true, it generally means
841	// the icmp will simplify to true/false.
842	if (Opc == Instruction::ICmp && !cast<ICmpInst>(Val: I)->isEquality()) {
843	Constant *Cmp =
844	ConstantFoldCompareInstOperands(Predicate: ICmpInst::ICMP_UGT, LHS: C, RHS: BitWidthC, DL);
845	if (!Cmp \|\| !Cmp->isZeroValue())
846	return nullptr;
847	}
848
849	// Check we can invert `(not x)` for free.
850	bool Consumes = false;
851	if (!isFreeToInvert(V: Op, WillInvertAllUses: Op->hasOneUse(), DoesConsume&: Consumes) \|\| !Consumes)
852	return nullptr;
853	Value *NotOp = getFreelyInverted(V: Op, WillInvertAllUses: Op->hasOneUse(), Builder: &Builder);
854	assert(NotOp != nullptr &&
855	"Desync between isFreeToInvert and getFreelyInverted");
856
857	Value *CtpopOfNotOp = Builder.CreateIntrinsic(RetTy: Ty, ID: Intrinsic::ctpop, Args: NotOp);
858
859	Value R = nullptr*;
860
861	// Do the transformation here to avoid potentially introducing an infinite
862	// loop.
863	switch (Opc) {
864	case Instruction::Sub:
865	R = Builder.CreateAdd(LHS: CtpopOfNotOp, RHS: ConstantExpr::getSub(C1: C, C2: BitWidthC));
866	break;
867	case Instruction::Or:
868	case Instruction::Add:
869	R = Builder.CreateSub(LHS: ConstantExpr::getAdd(C1: C, C2: BitWidthC), RHS: CtpopOfNotOp);
870	break;
871	case Instruction::ICmp:
872	R = Builder.CreateICmp(P: cast<ICmpInst>(Val: I)->getSwappedPredicate(),
873	LHS: CtpopOfNotOp, RHS: ConstantExpr::getSub(C1: BitWidthC, C2: C));
874	break;
875	default:
876	llvm_unreachable("Unhandled Opcode");
877	}
878	assert(R != nullptr);
879	return replaceInstUsesWith(I&: *I, V: R);
880	}
881
882	// (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
883	// IFF
884	// 1) the logic_shifts match
885	// 2) either both binops are binops and one is `and` or
886	// BinOp1 is `and`
887	// (logic_shift (inv_logic_shift C1, C), C) == C1 or
888	//
889	// -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
890	//
891	// (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
892	// IFF
893	// 1) the logic_shifts match
894	// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`).
895	//
896	// -> (BinOp (logic_shift (BinOp X, Y)), Mask)
897	//
898	// (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt))
899	// IFF
900	// 1) Binop1 is bitwise logical operator `and`, `or` or `xor`
901	// 2) Binop2 is `not`
902	//
903	// -> (arithmetic_shift Binop1((not X), Y), Amt)
904
905	Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) {
906	const DataLayout &DL = I.getDataLayout();
907	auto IsValidBinOpc = [](unsigned Opc) {
908	switch (Opc) {
909	default:
910	return false;
911	case Instruction::And:
912	case Instruction::Or:
913	case Instruction::Xor:
914	case Instruction::Add:
915	// Skip Sub as we only match constant masks which will canonicalize to use
916	// add.
917	return true;
918	}
919	};
920
921	// Check if we can distribute binop arbitrarily. `add` + `lshr` has extra
922	// constraints.
923	auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2,
924	unsigned ShOpc) {
925	assert(ShOpc != Instruction::AShr);
926	return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) \|\|
927	ShOpc == Instruction::Shl;
928	};
929
930	auto GetInvShift = [](unsigned ShOpc) {
931	assert(ShOpc != Instruction::AShr);
932	return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr;
933	};
934
935	auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2,
936	unsigned ShOpc, Constant *CMask,
937	Constant *CShift) {
938	// If the BinOp1 is `and` we don't need to check the mask.
939	if (BinOpc1 == Instruction::And)
940	return true;
941
942	// For all other possible transfers we need complete distributable
943	// binop/shift (anything but `add` + `lshr`).
944	if (!IsCompletelyDistributable (BinOpc1, BinOpc2, ShOpc))
945	return false;
946
947	// If BinOp2 is `and`, any mask works (this only really helps for non-splat
948	// vecs, otherwise the mask will be simplified and the following check will
949	// handle it).
950	if (BinOpc2 == Instruction::And)
951	return true;
952
953	// Otherwise, need mask that meets the below requirement.
954	// (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
955	Constant *MaskInvShift =
956	ConstantFoldBinaryOpOperands(Opcode: GetInvShift (ShOpc), LHS: CMask, RHS: CShift, DL);
957	return ConstantFoldBinaryOpOperands(Opcode: ShOpc, LHS: MaskInvShift, RHS: CShift, DL) ==
958	CMask;
959	};
960
961	auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * {
962	Constant CMask, CShift;
963	Value X, Y, ShiftedX, Mask, *Shift;
964	if (!match(V: I.getOperand(i_nocapture: ShOpnum),
965	P: m_OneUse(SubPattern: m_Shift(L: m_Value(V&: Y), R: m_Value(V&: Shift)))))
966	return nullptr;
967	if (!match(V: I.getOperand(i_nocapture: `1` - ShOpnum),
968	P: m_c_BinOp(L: m_CombineAnd(
969	L: m_OneUse(SubPattern: m_Shift(L: m_Value(V&: X), R: m_Specific(V: Shift))),
970	R: m_Value(V&: ShiftedX)),
971	R: m_Value(V&: Mask))))
972	return nullptr;
973	// Make sure we are matching instruction shifts and not ConstantExpr
974	auto *IY = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: ShOpnum));
975	auto *IX = dyn_cast<Instruction>(Val: ShiftedX);
976	if (!IY \|\| !IX)
977	return nullptr;
978
979	// LHS and RHS need same shift opcode
980	unsigned ShOpc = IY->getOpcode();
981	if (ShOpc != IX->getOpcode())
982	return nullptr;
983
984	// Make sure binop is real instruction and not ConstantExpr
985	auto *BO2 = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: `1` - ShOpnum));
986	if (!BO2)
987	return nullptr;
988
989	unsigned BinOpc = BO2->getOpcode();
990	// Make sure we have valid binops.
991	if (!IsValidBinOpc (I.getOpcode()) \|\| !IsValidBinOpc (BinOpc))
992	return nullptr;
993
994	if (ShOpc == Instruction::AShr) {
995	if (Instruction::isBitwiseLogicOp(Opcode: I.getOpcode()) &&
996	BinOpc == Instruction::Xor && match(V: Mask, P: m_AllOnes())) {
997	Value *NotX = Builder.CreateNot(V: X);
998	Value *NewBinOp = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: Y, RHS: NotX);
999	return BinaryOperator::Create(
1000	Op: static_cast<Instruction::BinaryOps>(ShOpc), S1: NewBinOp, S2: Shift);
1001	}
1002
1003	return nullptr;
1004	}
1005
1006	// If BinOp1 == BinOp2 and it's bitwise or shl with add, then just
1007	// distribute to drop the shift irrelevant of constants.
1008	if (BinOpc == I.getOpcode() &&
1009	IsCompletelyDistributable (I.getOpcode(), BinOpc, ShOpc)) {
1010	Value *NewBinOp2 = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: X, RHS: Y);
1011	Value *NewBinOp1 = Builder.CreateBinOp(
1012	Opc: static_cast<Instruction::BinaryOps>(ShOpc), LHS: NewBinOp2, RHS: Shift);
1013	return BinaryOperator::Create(Op: I.getOpcode(), S1: NewBinOp1, S2: Mask);
1014	}
1015
1016	// Otherwise we can only distribute by constant shifting the mask, so
1017	// ensure we have constants.
1018	if (!match(V: Shift, P: m_ImmConstant(C&: CShift)))
1019	return nullptr;
1020	if (!match(V: Mask, P: m_ImmConstant(C&: CMask)))
1021	return nullptr;
1022
1023	// Check if we can distribute the binops.
1024	if (!CanDistributeBinops (I.getOpcode(), BinOpc, ShOpc, CMask, CShift))
1025	return nullptr;
1026
1027	Constant *NewCMask =
1028	ConstantFoldBinaryOpOperands(Opcode: GetInvShift (ShOpc), LHS: CMask, RHS: CShift, DL);
1029	Value *NewBinOp2 = Builder.CreateBinOp(
1030	Opc: static_cast<Instruction::BinaryOps>(BinOpc), LHS: X, RHS: NewCMask);
1031	Value *NewBinOp1 = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: Y, RHS: NewBinOp2);
1032	return BinaryOperator::Create(Op: static_cast<Instruction::BinaryOps>(ShOpc),
1033	S1: NewBinOp1, S2: CShift);
1034	};
1035
1036	if (Instruction *R = MatchBinOp (`0`))
1037	return R;
1038	return MatchBinOp (`1`);
1039	}
1040
1041	// (Binop (zext C), (select C, T, F))
1042	// -> (select C, (binop 1, T), (binop 0, F))
1043	//
1044	// (Binop (sext C), (select C, T, F))
1045	// -> (select C, (binop -1, T), (binop 0, F))
1046	//
1047	// Attempt to simplify binary operations into a select with folded args, when
1048	// one operand of the binop is a select instruction and the other operand is a
1049	// zext/sext extension, whose value is the select condition.
1050	Instruction *
1051	InstCombinerImpl::foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I) {
1052	// TODO: this simplification may be extended to any speculatable instruction,
1053	// not just binops, and would possibly be handled better in FoldOpIntoSelect.
1054	Instruction::BinaryOps Opc = I.getOpcode();
1055	Value LHS = I.getOperand(i_nocapture: `0`), RHS = I.getOperand(i_nocapture: `1`);
1056	Value A, CondVal, TrueVal, FalseVal;
1057	Value *CastOp;
1058
1059	auto MatchSelectAndCast = [&](Value CastOp, Value SelectOp) {
1060	return match(V: CastOp, P: m_ZExtOrSExt(Op: m_Value(V&: A))) &&
1061	A->getType()->getScalarSizeInBits() == `1` &&
1062	match(V: SelectOp, P: m_Select(C: m_Value(V&: CondVal), L: m_Value(V&: TrueVal),
1063	R: m_Value(V&: FalseVal)));
1064	};
1065
1066	// Make sure one side of the binop is a select instruction, and the other is a
1067	// zero/sign extension operating on a i1.
1068	if (MatchSelectAndCast (LHS, RHS))
1069	CastOp = LHS;
1070	else if (MatchSelectAndCast (RHS, LHS))
1071	CastOp = RHS;
1072	else
1073	return nullptr;
1074
1075	auto NewFoldedConst = [&](bool IsTrueArm, Value *V) {
1076	bool IsCastOpRHS = (CastOp == RHS);
1077	bool IsZExt = isa<ZExtInst>(Val: CastOp);
1078	Constant *C;
1079
1080	if (IsTrueArm) {
1081	C = Constant::getNullValue(Ty: V->getType());
1082	} else if (IsZExt) {
1083	unsigned BitWidth = V->getType()->getScalarSizeInBits();
1084	C = Constant::getIntegerValue(Ty: V->getType(), V: APInt (BitWidth, `1`));
1085	} else {
1086	C = Constant::getAllOnesValue(Ty: V->getType());
1087	}
1088
1089	return IsCastOpRHS ? Builder.CreateBinOp(Opc, LHS: V, RHS: C)
1090	: Builder.CreateBinOp(Opc, LHS: C, RHS: V);
1091	};
1092
1093	// If the value used in the zext/sext is the select condition, or the negated
1094	// of the select condition, the binop can be simplified.
1095	if (CondVal == A) {
1096	Value NewTrueVal = NewFoldedConst (false*, TrueVal);
1097	return SelectInst::Create(C: CondVal, S1: NewTrueVal,
1098	S2: NewFoldedConst (true, FalseVal));
1099	}
1100
1101	if (match(V: A, P: m_Not(V: m_Specific(V: CondVal)))) {
1102	Value NewTrueVal = NewFoldedConst (true*, TrueVal);
1103	return SelectInst::Create(C: CondVal, S1: NewTrueVal,
1104	S2: NewFoldedConst (false, FalseVal));
1105	}
1106
1107	return nullptr;
1108	}
1109
1110	Value *InstCombinerImpl::tryFactorizationFolds(BinaryOperator &I) {
1111	Value LHS = I.getOperand(i_nocapture: `0`), RHS = I.getOperand(i_nocapture: `1`);
1112	BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: LHS);
1113	BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: RHS);
1114	Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1115	Value A, B, C, D;
1116	Instruction::BinaryOps LHSOpcode, RHSOpcode;
1117
1118	if (Op0)
1119	LHSOpcode = getBinOpsForFactorization(TopOpcode: TopLevelOpcode, Op: Op0, LHS&: A, RHS&: B, OtherOp: Op1);
1120	if (Op1)
1121	RHSOpcode = getBinOpsForFactorization(TopOpcode: TopLevelOpcode, Op: Op1, LHS&: C, RHS&: D, OtherOp: Op0);
1122
1123	// The instruction has the form "(A op' B) op (C op' D)". Try to factorize
1124	// a common term.
1125	if (Op0 && Op1 && LHSOpcode == RHSOpcode)
1126	if (Value *V = tryFactorization(I, SQ, Builder, InnerOpcode: LHSOpcode, A, B, C, D))
1127	return V;
1128
1129	// The instruction has the form "(A op' B) op (C)". Try to factorize common
1130	// term.
1131	if (Op0)
1132	if (Value *Ident = getIdentityValue(Opcode: LHSOpcode, V: RHS))
1133	if (Value *V =
1134	tryFactorization(I, SQ, Builder, InnerOpcode: LHSOpcode, A, B, C: RHS, D: Ident))
1135	return V;
1136
1137	// The instruction has the form "(B) op (C op' D)". Try to factorize common
1138	// term.
1139	if (Op1)
1140	if (Value *Ident = getIdentityValue(Opcode: RHSOpcode, V: LHS))
1141	if (Value *V =
1142	tryFactorization(I, SQ, Builder, InnerOpcode: RHSOpcode, A: LHS, B: Ident, C, D))
1143	return V;
1144
1145	return nullptr;
1146	}
1147
1148	/// This tries to simplify binary operations which some other binary operation
1149	/// distributes over either by factorizing out common terms
1150	/// (eg "(AB)+(AC)" -> "A(B+C)") or expanding out if this results in*
1151	/// simplifications (eg: "A & (B \| C) -> (A&B) \| (A&C)" if this is a win).
1152	/// Returns the simplified value, or null if it didn't simplify.
1153	Value *InstCombinerImpl::foldUsingDistributiveLaws(BinaryOperator &I) {
1154	Value LHS = I.getOperand(i_nocapture: `0`), RHS = I.getOperand(i_nocapture: `1`);
1155	BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: LHS);
1156	BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: RHS);
1157	Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1158
1159	// Factorization.
1160	if (Value *R = tryFactorizationFolds(I))
1161	return R;
1162
1163	// Expansion.
1164	if (Op0 && rightDistributesOverLeft(LOp: Op0->getOpcode(), ROp: TopLevelOpcode)) {
1165	// The instruction has the form "(A op' B) op C". See if expanding it out
1166	// to "(A op C) op' (B op C)" results in simplifications.
1167	Value A = Op0->getOperand(i_nocapture: `0`), B = Op0->getOperand(i_nocapture: `1`), *C = RHS;
1168	Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
1169
1170	// Disable the use of undef because it's not safe to distribute undef.
1171	auto SQDistributive = SQ.getWithInstruction(I: &I).getWithoutUndef();
1172	Value *L = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQDistributive);
1173	Value *R = simplifyBinOp(Opcode: TopLevelOpcode, LHS: B, RHS: C, Q: SQDistributive);
1174
1175	// Do "A op C" and "B op C" both simplify?
1176	if (L && R) {
1177	// They do! Return "L op' R".
1178	++NumExpand;
1179	C = Builder.CreateBinOp(Opc: InnerOpcode, LHS: L, RHS: R);
1180	C->takeName(V: &I);
1181	return C;
1182	}
1183
1184	// Does "A op C" simplify to the identity value for the inner opcode?
1185	if (L && L == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: L->getType())) {
1186	// They do! Return "B op C".
1187	++NumExpand;
1188	C = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: B, RHS: C);
1189	C->takeName(V: &I);
1190	return C;
1191	}
1192
1193	// Does "B op C" simplify to the identity value for the inner opcode?
1194	if (R && R == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: R->getType())) {
1195	// They do! Return "A op C".
1196	++NumExpand;
1197	C = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C);
1198	C->takeName(V: &I);
1199	return C;
1200	}
1201	}
1202
1203	if (Op1 && leftDistributesOverRight(LOp: TopLevelOpcode, ROp: Op1->getOpcode())) {
1204	// The instruction has the form "A op (B op' C)". See if expanding it out
1205	// to "(A op B) op' (A op C)" results in simplifications.
1206	Value A = LHS, B = Op1->getOperand(i_nocapture: `0`), *C = Op1->getOperand(i_nocapture: `1`);
1207	Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
1208
1209	// Disable the use of undef because it's not safe to distribute undef.
1210	auto SQDistributive = SQ.getWithInstruction(I: &I).getWithoutUndef();
1211	Value *L = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: B, Q: SQDistributive);
1212	Value *R = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQDistributive);
1213
1214	// Do "A op B" and "A op C" both simplify?
1215	if (L && R) {
1216	// They do! Return "L op' R".
1217	++NumExpand;
1218	A = Builder.CreateBinOp(Opc: InnerOpcode, LHS: L, RHS: R);
1219	A->takeName(V: &I);
1220	return A;
1221	}
1222
1223	// Does "A op B" simplify to the identity value for the inner opcode?
1224	if (L && L == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: L->getType())) {
1225	// They do! Return "A op C".
1226	++NumExpand;
1227	A = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C);
1228	A->takeName(V: &I);
1229	return A;
1230	}
1231
1232	// Does "A op C" simplify to the identity value for the inner opcode?
1233	if (R && R == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: R->getType())) {
1234	// They do! Return "A op B".
1235	++NumExpand;
1236	A = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: B);
1237	A->takeName(V: &I);
1238	return A;
1239	}
1240	}
1241
1242	return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
1243	}
1244
1245	static std::optional<std::pair<Value , Value >>
1246	matchSymmetricPhiNodesPair(PHINode LHS, PHINode RHS) {
1247	if (LHS->getParent() != RHS->getParent())
1248	return std::nullopt;
1249
1250	if (LHS->getNumIncomingValues() < `2`)
1251	return std::nullopt;
1252
1253	if (!equal(LRange: LHS->blocks(), RRange: RHS->blocks()))
1254	return std::nullopt;
1255
1256	Value *L0 = LHS->getIncomingValue(i: `0`);
1257	Value *R0 = RHS->getIncomingValue(i: `0`);
1258
1259	for (unsigned I = `1`, E = LHS->getNumIncomingValues(); I != E; ++I) {
1260	Value *L1 = LHS->getIncomingValue(i: I);
1261	Value *R1 = RHS->getIncomingValue(i: I);
1262
1263	if ((L0 == L1 && R0 == R1) \|\| (L0 == R1 && R0 == L1))
1264	continue;
1265
1266	return std::nullopt;
1267	}
1268
1269	return std::optional(std::pair(L0, R0));
1270	}
1271
1272	std::optional<std::pair<Value , Value >>
1273	InstCombinerImpl::matchSymmetricPair(Value LHS, Value RHS) {
1274	Instruction *LHSInst = dyn_cast<Instruction>(Val: LHS);
1275	Instruction *RHSInst = dyn_cast<Instruction>(Val: RHS);
1276	if (!LHSInst \|\| !RHSInst \|\| LHSInst->getOpcode() != RHSInst->getOpcode())
1277	return std::nullopt;
1278	switch (LHSInst->getOpcode()) {
1279	case Instruction::PHI:
1280	return matchSymmetricPhiNodesPair(LHS: cast<PHINode>(Val: LHS), RHS: cast<PHINode>(Val: RHS));
1281	case Instruction::Select: {
1282	Value *Cond = LHSInst->getOperand(i: `0`);
1283	Value *TrueVal = LHSInst->getOperand(i: `1`);
1284	Value *FalseVal = LHSInst->getOperand(i: `2`);
1285	if (Cond == RHSInst->getOperand(i: `0`) && TrueVal == RHSInst->getOperand(i: `2`) &&
1286	FalseVal == RHSInst->getOperand(i: `1`))
1287	return std::pair(TrueVal, FalseVal);
1288	return std::nullopt;
1289	}
1290	case Instruction::Call: {
1291	// Match min(a, b) and max(a, b)
1292	MinMaxIntrinsic *LHSMinMax = dyn_cast<MinMaxIntrinsic>(Val: LHSInst);
1293	MinMaxIntrinsic *RHSMinMax = dyn_cast<MinMaxIntrinsic>(Val: RHSInst);
1294	if (LHSMinMax && RHSMinMax &&
1295	LHSMinMax->getPredicate() ==
1296	ICmpInst::getSwappedPredicate(pred: RHSMinMax->getPredicate()) &&
1297	((LHSMinMax->getLHS() == RHSMinMax->getLHS() &&
1298	LHSMinMax->getRHS() == RHSMinMax->getRHS()) \|\|
1299	(LHSMinMax->getLHS() == RHSMinMax->getRHS() &&
1300	LHSMinMax->getRHS() == RHSMinMax->getLHS())))
1301	return std::pair(LHSMinMax->getLHS(), LHSMinMax->getRHS());
1302	return std::nullopt;
1303	}
1304	default:
1305	return std::nullopt;
1306	}
1307	}
1308
1309	Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
1310	Value *LHS,
1311	Value *RHS) {
1312	Value A, B, C, D, E, F;
1313	bool LHSIsSelect = match(V: LHS, P: m_Select(C: m_Value(V&: A), L: m_Value(V&: B), R: m_Value(V&: C)));
1314	bool RHSIsSelect = match(V: RHS, P: m_Select(C: m_Value(V&: D), L: m_Value(V&: E), R: m_Value(V&: F)));
1315	if (!LHSIsSelect && !RHSIsSelect)
1316	return nullptr;
1317
1318	FastMathFlags FMF;
1319	BuilderTy::FastMathFlagGuard Guard(Builder);
1320	if (isa<FPMathOperator>(Val: &I)) {
1321	FMF = I.getFastMathFlags();
1322	Builder.setFastMathFlags(FMF);
1323	}
1324
1325	Instruction::BinaryOps Opcode = I.getOpcode();
1326	SimplifyQuery Q = SQ.getWithInstruction(I: &I);
1327
1328	Value Cond, True = nullptr, False = nullptr*;
1329
1330	// Special-case for add/negate combination. Replace the zero in the negation
1331	// with the trailing add operand:
1332	// (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
1333	// (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
1334	auto foldAddNegate = [&](Value TVal, Value FVal, Value Z) -> Value {
1335	// We need an 'add' and exactly 1 arm of the select to have been simplified.
1336	if (Opcode != Instruction::Add \|\| (!True && !False) \|\| (True && False))
1337	return nullptr;
1338
1339	Value *N;
1340	if (True && match(V: FVal, P: m_Neg(V: m_Value(V&: N)))) {
1341	Value *Sub = Builder.CreateSub(LHS: Z, RHS: N);
1342	return Builder.CreateSelect(C: Cond, True, False: Sub, Name: I.getName());
1343	}
1344	if (False && match(V: TVal, P: m_Neg(V: m_Value(V&: N)))) {
1345	Value *Sub = Builder.CreateSub(LHS: Z, RHS: N);
1346	return Builder.CreateSelect(C: Cond, True: Sub, False, Name: I.getName());
1347	}
1348	return nullptr;
1349	};
1350
1351	if (LHSIsSelect && RHSIsSelect && A == D) {
1352	// (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
1353	Cond = A;
1354	True = simplifyBinOp(Opcode, LHS: B, RHS: E, FMF, Q);
1355	False = simplifyBinOp(Opcode, LHS: C, RHS: F, FMF, Q);
1356
1357	if (LHS->hasOneUse() && RHS->hasOneUse()) {
1358	if (False && !True)
1359	True = Builder.CreateBinOp(Opc: Opcode, LHS: B, RHS: E);
1360	else if (True && !False)
1361	False = Builder.CreateBinOp(Opc: Opcode, LHS: C, RHS: F);
1362	}
1363	} else if (LHSIsSelect && LHS->hasOneUse()) {
1364	// (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
1365	Cond = A;
1366	True = simplifyBinOp(Opcode, LHS: B, RHS, FMF, Q);
1367	False = simplifyBinOp(Opcode, LHS: C, RHS, FMF, Q);
1368	if (Value *NewSel = foldAddNegate (B, C, RHS))
1369	return NewSel;
1370	} else if (RHSIsSelect && RHS->hasOneUse()) {
1371	// X op (D ? E : F) -> D ? (X op E) : (X op F)
1372	Cond = D;
1373	True = simplifyBinOp(Opcode, LHS, RHS: E, FMF, Q);
1374	False = simplifyBinOp(Opcode, LHS, RHS: F, FMF, Q);
1375	if (Value *NewSel = foldAddNegate (E, F, LHS))
1376	return NewSel;
1377	}
1378
1379	if (!True \|\| !False)
1380	return nullptr;
1381
1382	Value *SI = Builder.CreateSelect(C: Cond, True, False);
1383	SI->takeName(V: &I);
1384	return SI;
1385	}
1386
1387	/// Freely adapt every user of V as-if V was changed to !V.
1388	/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
1389	void InstCombinerImpl::freelyInvertAllUsersOf(Value I, Value IgnoredUser) {
1390	assert(!isa<Constant>(I) && "Shouldn't invert users of constant");
1391	for (User *U : make_early_inc_range(Range: I->users())) {
1392	if (U == IgnoredUser)
1393	continue; // Don't consider this user.
1394	switch (cast<Instruction>(Val: U)->getOpcode()) {
1395	case Instruction::Select: {
1396	auto *SI = cast<SelectInst>(Val: U);
1397	SI->swapValues();
1398	SI->swapProfMetadata();
1399	break;
1400	}
1401	case Instruction::Br: {
1402	BranchInst *BI = cast<BranchInst>(Val: U);
1403	BI->swapSuccessors(); // swaps prof metadata too
1404	if (BPI)
1405	BPI->swapSuccEdgesProbabilities(Src: BI->getParent());
1406	break;
1407	}
1408	case Instruction::Xor:
1409	replaceInstUsesWith(I&: cast<Instruction>(Val&: *U), V: I);
1410	// Add to worklist for DCE.
1411	addToWorklist(I: cast<Instruction>(Val: U));
1412	break;
1413	default:
1414	llvm_unreachable("Got unexpected user - out of sync with "
1415	"canFreelyInvertAllUsersOf() ?");
1416	}
1417	}
1418
1419	// Update pre-existing debug value uses.
1420	SmallVector<DbgValueInst *, `4`> DbgValues;
1421	SmallVector<DbgVariableRecord *, `4`> DbgVariableRecords;
1422	llvm::findDbgValues(DbgValues, V: I, DbgVariableRecords: &DbgVariableRecords);
1423
1424	auto InvertDbgValueUse = [&](auto *DbgVal) {
1425	SmallVector<uint64_t, `1`> Ops = {dwarf::DW_OP_not};
1426	for (unsigned Idx = `0`, End = DbgVal->getNumVariableLocationOps();
1427	Idx != End; ++Idx)
1428	if (DbgVal->getVariableLocationOp(Idx) == I)
1429	DbgVal->setExpression(
1430	DIExpression::appendOpsToArg(Expr: DbgVal->getExpression(), Ops, ArgNo: Idx));
1431	};
1432
1433	for (DbgValueInst *DVI : DbgValues)
1434	InvertDbgValueUse (DVI);
1435
1436	for (DbgVariableRecord *DVR : DbgVariableRecords)
1437	InvertDbgValueUse (DVR);
1438	}
1439
1440	/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
1441	/// constant zero (which is the 'negate' form).
1442	Value InstCombinerImpl::dyn_castNegVal(Value V) const {
1443	Value *NegV;
1444	if (match(V, P: m_Neg(V: m_Value(V&: NegV))))
1445	return NegV;
1446
1447	// Constants can be considered to be negated values if they can be folded.
1448	if (ConstantInt *C = dyn_cast<ConstantInt>(Val: V))
1449	return ConstantExpr::getNeg(C);
1450
1451	if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(Val: V))
1452	if (C->getType()->getElementType()->isIntegerTy())
1453	return ConstantExpr::getNeg(C);
1454
1455	if (ConstantVector *CV = dyn_cast<ConstantVector>(Val: V)) {
1456	for (unsigned i = `0`, e = CV->getNumOperands(); i != e; ++i) {
1457	Constant *Elt = CV->getAggregateElement(Elt: i);
1458	if (!Elt)
1459	return nullptr;
1460
1461	if (isa<UndefValue>(Val: Elt))
1462	continue;
1463
1464	if (!isa<ConstantInt>(Val: Elt))
1465	return nullptr;
1466	}
1467	return ConstantExpr::getNeg(C: CV);
1468	}
1469
1470	// Negate integer vector splats.
1471	if (auto *CV = dyn_cast<Constant>(Val: V))
1472	if (CV->getType()->isVectorTy() &&
1473	CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
1474	return ConstantExpr::getNeg(C: CV);
1475
1476	return nullptr;
1477	}
1478
1479	// Try to fold:
1480	// 1) (fp_binop ({s\|u}itofp x), ({s\|u}itofp y))
1481	// -> ({s\|u}itofp (int_binop x, y))
1482	// 2) (fp_binop ({s\|u}itofp x), FpC)
1483	// -> ({s\|u}itofp (int_binop x, (fpto{s\|u}i FpC)))
1484	//
1485	// Assuming the sign of the cast for x/y is `OpsFromSigned`.
1486	Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
1487	BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, `2`> IntOps,
1488	Constant Op1FpC, SmallVectorImpl<WithCache<const* Value *>> &OpsKnown) {
1489
1490	Type *FPTy = BO.getType();
1491	Type *IntTy = IntOps [`0`]->getType();
1492
1493	unsigned IntSz = IntTy->getScalarSizeInBits();
1494	// This is the maximum number of inuse bits by the integer where the int -> fp
1495	// casts are exact.
1496	unsigned MaxRepresentableBits =
1497	APFloat::semanticsPrecision(FPTy->getScalarType()->getFltSemantics());
1498
1499	// Preserve known number of leading bits. This can allow us to trivial nsw/nuw
1500	// checks later on.
1501	unsigned NumUsedLeadingBits[`2`] = {IntSz, IntSz};
1502
1503	// NB: This only comes up if OpsFromSigned is true, so there is no need to
1504	// cache if between calls to `foldFBinOpOfIntCastsFromSign`.
1505	auto IsNonZero = [&](unsigned OpNo) -> bool {
1506	if (OpsKnown [OpNo].hasKnownBits() &&
1507	OpsKnown [OpNo].getKnownBits(Q: SQ).isNonZero())
1508	return true;
1509	return isKnownNonZero(V: IntOps [OpNo], Q: SQ);
1510	};
1511
1512	auto IsNonNeg = [&](unsigned OpNo) -> bool {
1513	// NB: This matches the impl in ValueTracking, we just try to use cached
1514	// knownbits here. If we ever start supporting WithCache for
1515	// `isKnownNonNegative`, change this to an explicit call.
1516	return OpsKnown [OpNo].getKnownBits(Q: SQ).isNonNegative();
1517	};
1518
1519	// Check if we know for certain that ({s\|u}itofp op) is exact.
1520	auto IsValidPromotion = [&](unsigned OpNo) -> bool {
1521	// Can we treat this operand as the desired sign?
1522	if (OpsFromSigned != isa<SIToFPInst>(Val: BO.getOperand(i_nocapture: OpNo)) &&
1523	!IsNonNeg (OpNo))
1524	return false;
1525
1526	// If fp precision >= bitwidth(op) then its exact.
1527	// NB: This is slightly conservative for `sitofp`. For signed conversion, we
1528	// can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be
1529	// handled specially. We can't, however, increase the bound arbitrarily for
1530	// `sitofp` as for larger sizes, it won't sign extend.
1531	if (MaxRepresentableBits < IntSz) {
1532	// Otherwise if its signed cast check that fp precisions >= bitwidth(op) -
1533	// numSignBits(op).
1534	// TODO: If we add support for `WithCache` in `ComputeNumSignBits`, change
1535	// `IntOps[OpNo]` arguments to `KnownOps[OpNo]`.
1536	if (OpsFromSigned)
1537	NumUsedLeadingBits[OpNo] = IntSz - ComputeNumSignBits(Op: IntOps [OpNo]);
1538	// Finally for unsigned check that fp precision >= bitwidth(op) -
1539	// numLeadingZeros(op).
1540	else {
1541	NumUsedLeadingBits[OpNo] =
1542	IntSz - OpsKnown [OpNo].getKnownBits(Q: SQ).countMinLeadingZeros();
1543	}
1544	}
1545	// NB: We could also check if op is known to be a power of 2 or zero (which
1546	// will always be representable). Its unlikely, however, that is we are
1547	// unable to bound op in any way we will be able to pass the overflow checks
1548	// later on.
1549
1550	if (MaxRepresentableBits < NumUsedLeadingBits[OpNo])
1551	return false;
1552	// Signed + Mul also requires that op is non-zero to avoid -0 cases.
1553	return !OpsFromSigned \|\| BO.getOpcode() != Instruction::FMul \|\|
1554	IsNonZero (OpNo);
1555	};
1556
1557	// If we have a constant rhs, see if we can losslessly convert it to an int.
1558	if (Op1FpC != nullptr) {
1559	// Signed + Mul req non-zero
1560	if (OpsFromSigned && BO.getOpcode() == Instruction::FMul &&
1561	!match(V: Op1FpC, P: m_NonZeroFP()))
1562	return nullptr;
1563
1564	Constant *Op1IntC = ConstantFoldCastOperand(
1565	Opcode: OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, C: Op1FpC,
1566	DestTy: IntTy, DL);
1567	if (Op1IntC == nullptr)
1568	return nullptr;
1569	if (ConstantFoldCastOperand(Opcode: OpsFromSigned ? Instruction::SIToFP
1570	: Instruction::UIToFP,
1571	C: Op1IntC, DestTy: FPTy, DL) != Op1FpC)
1572	return nullptr;
1573
1574	// First try to keep sign of cast the same.
1575	IntOps [`1`] = Op1IntC;
1576	}
1577
1578	// Ensure lhs/rhs integer types match.
1579	if (IntTy != IntOps [`1`]->getType())
1580	return nullptr;
1581
1582	if (Op1FpC == nullptr) {
1583	if (!IsValidPromotion (`1`))
1584	return nullptr;
1585	}
1586	if (!IsValidPromotion (`0`))
1587	return nullptr;
1588
1589	// Final we check if the integer version of the binop will not overflow.
1590	BinaryOperator::BinaryOps IntOpc;
1591	// Because of the precision check, we can often rule out overflows.
1592	bool NeedsOverflowCheck = true;
1593	// Try to conservatively rule out overflow based on the already done precision
1594	// checks.
1595	unsigned OverflowMaxOutputBits = OpsFromSigned ? `2` : `1`;
1596	unsigned OverflowMaxCurBits =
1597	std::max(a: NumUsedLeadingBits[`0`], b: NumUsedLeadingBits[`1`]);
1598	bool OutputSigned = OpsFromSigned;
1599	switch (BO.getOpcode()) {
1600	case Instruction::FAdd:
1601	IntOpc = Instruction::Add;
1602	OverflowMaxOutputBits += OverflowMaxCurBits;
1603	break;
1604	case Instruction::FSub:
1605	IntOpc = Instruction::Sub;
1606	OverflowMaxOutputBits += OverflowMaxCurBits;
1607	break;
1608	case Instruction::FMul:
1609	IntOpc = Instruction::Mul;
1610	OverflowMaxOutputBits += OverflowMaxCurBits * `2`;
1611	break;
1612	default:
1613	llvm_unreachable("Unsupported binop");
1614	}
1615	// The precision check may have already ruled out overflow.
1616	if (OverflowMaxOutputBits < IntSz) {
1617	NeedsOverflowCheck = false;
1618	// We can bound unsigned overflow from sub to in range signed value (this is
1619	// what allows us to avoid the overflow check for sub).
1620	if (IntOpc == Instruction::Sub)
1621	OutputSigned = true;
1622	}
1623
1624	// Precision check did not rule out overflow, so need to check.
1625	// TODO: If we add support for `WithCache` in `willNotOverflow`, change
1626	// `IntOps[...]` arguments to `KnownOps[...]`.
1627	if (NeedsOverflowCheck &&
1628	!willNotOverflow(Opcode: IntOpc, LHS: IntOps [`0`], RHS: IntOps [`1`], CxtI: BO, IsSigned: OutputSigned))
1629	return nullptr;
1630
1631	Value *IntBinOp = Builder.CreateBinOp(Opc: IntOpc, LHS: IntOps [`0`], RHS: IntOps [`1`]);
1632	if (auto *IntBO = dyn_cast<BinaryOperator>(Val: IntBinOp)) {
1633	IntBO->setHasNoSignedWrap(OutputSigned);
1634	IntBO->setHasNoUnsignedWrap(!OutputSigned);
1635	}
1636	if (OutputSigned)
1637	return new SIToFPInst (IntBinOp, FPTy);
1638	return new UIToFPInst (IntBinOp, FPTy);
1639	}
1640
1641	// Try to fold:
1642	// 1) (fp_binop ({s\|u}itofp x), ({s\|u}itofp y))
1643	// -> ({s\|u}itofp (int_binop x, y))
1644	// 2) (fp_binop ({s\|u}itofp x), FpC)
1645	// -> ({s\|u}itofp (int_binop x, (fpto{s\|u}i FpC)))
1646	Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
1647	std::array<Value , `2`> IntOps = {nullptr, nullptr*};
1648	Constant Op1FpC = nullptr*;
1649	// Check for:
1650	// 1) (binop ({s\|u}itofp x), ({s\|u}itofp y))
1651	// 2) (binop ({s\|u}itofp x), FpC)
1652	if (!match(V: BO.getOperand(i_nocapture: `0`), P: m_SIToFP(Op: m_Value(V&: IntOps [`0`]))) &&
1653	!match(V: BO.getOperand(i_nocapture: `0`), P: m_UIToFP(Op: m_Value(V&: IntOps [`0`]))))
1654	return nullptr;
1655
1656	if (!match(V: BO.getOperand(i_nocapture: `1`), P: m_Constant(C&: Op1FpC)) &&
1657	!match(V: BO.getOperand(i_nocapture: `1`), P: m_SIToFP(Op: m_Value(V&: IntOps [`1`]))) &&
1658	!match(V: BO.getOperand(i_nocapture: `1`), P: m_UIToFP(Op: m_Value(V&: IntOps [`1`]))))
1659	return nullptr;
1660
1661	// Cache KnownBits a bit to potentially save some analysis.
1662	SmallVector<WithCache<const Value *>, `2`> OpsKnown = {IntOps [`0`], IntOps [`1`]};
1663
1664	// Try treating x/y as coming from both `uitofp` and `sitofp`. There are
1665	// different constraints depending on the sign of the cast.
1666	// NB: `(uitofp nneg X)` == `(sitofp nneg X)`.
1667	if (Instruction R = foldFBinOpOfIntCastsFromSign(BO, /OpsFromSigned=/*false,
1668	IntOps, Op1FpC, OpsKnown))
1669	return R;
1670	return foldFBinOpOfIntCastsFromSign(BO, /OpsFromSigned=/true, IntOps,
1671	Op1FpC, OpsKnown);
1672	}
1673
1674	/// A binop with a constant operand and a sign-extended boolean operand may be
1675	/// converted into a select of constants by applying the binary operation to
1676	/// the constant with the two possible values of the extended boolean (0 or -1).
1677	Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1678	// TODO: Handle non-commutative binop (constant is operand 0).
1679	// TODO: Handle zext.
1680	// TODO: Peek through 'not' of cast.
1681	Value *BO0 = BO.getOperand(i_nocapture: `0`);
1682	Value *BO1 = BO.getOperand(i_nocapture: `1`);
1683	Value *X;
1684	Constant *C;
1685	if (!match(V: BO0, P: m_SExt(Op: m_Value(V&: X))) \|\| !match(V: BO1, P: m_ImmConstant(C)) \|\|
1686	!X->getType()->isIntOrIntVectorTy(BitWidth: `1`))
1687	return nullptr;
1688
1689	// bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1690	Constant *Ones = ConstantInt::getAllOnesValue(Ty: BO.getType());
1691	Constant *Zero = ConstantInt::getNullValue(Ty: BO.getType());
1692	Value *TVal = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Ones, RHS: C);
1693	Value *FVal = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Zero, RHS: C);
1694	return SelectInst::Create(C: X, S1: TVal, S2: FVal);
1695	}
1696
1697	static Value simplifyOperationIntoSelectOperand(Instruction &I, SelectInst SI,
1698	bool IsTrueArm) {
1699	SmallVector<Value *> Ops;
1700	for (Value *Op : I.operands()) {
1701	Value V = nullptr*;
1702	if (Op == SI) {
1703	V = IsTrueArm ? SI->getTrueValue() : SI->getFalseValue();
1704	} else if (match(V: SI->getCondition(),
1705	P: m_SpecificICmp(MatchPred: IsTrueArm ? ICmpInst::ICMP_EQ
1706	: ICmpInst::ICMP_NE,
1707	L: m_Specific(V: Op), R: m_Value(V))) &&
1708	isGuaranteedNotToBeUndefOrPoison(V)) {
1709	// Pass
1710	} else {
1711	V = Op;
1712	}
1713	Ops.push_back(Elt: V);
1714	}
1715
1716	return simplifyInstructionWithOperands(I: &I, NewOps: Ops, Q: I.getDataLayout());
1717	}
1718
1719	static Value foldOperationIntoSelectOperand(Instruction &I, SelectInst SI,
1720	Value *NewOp, InstCombiner &IC) {
1721	Instruction *Clone = I.clone();
1722	Clone->replaceUsesOfWith(From: SI, To: NewOp);
1723	Clone->dropUBImplyingAttrsAndMetadata();
1724	IC.InsertNewInstBefore(New: Clone, Old: I.getIterator());
1725	return Clone;
1726	}
1727
1728	Instruction InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst SI,
1729	bool FoldWithMultiUse) {
1730	// Don't modify shared select instructions unless set FoldWithMultiUse
1731	if (!SI->hasOneUse() && !FoldWithMultiUse)
1732	return nullptr;
1733
1734	Value *TV = SI->getTrueValue();
1735	Value *FV = SI->getFalseValue();
1736
1737	// Bool selects with constant operands can be folded to logical ops.
1738	if (SI->getType()->isIntOrIntVectorTy(BitWidth: `1`))
1739	return nullptr;
1740
1741	// Avoid breaking min/max reduction pattern,
1742	// which is necessary for vectorization later.
1743	if (isa<MinMaxIntrinsic>(Val: &Op))
1744	for (Value *IntrinOp : Op.operands())
1745	if (auto *PN = dyn_cast<PHINode>(Val: IntrinOp))
1746	for (Value *PhiOp : PN->operands())
1747	if (PhiOp == &Op)
1748	return nullptr;
1749
1750	// Test if a FCmpInst instruction is used exclusively by a select as
1751	// part of a minimum or maximum operation. If so, refrain from doing
1752	// any other folding. This helps out other analyses which understand
1753	// non-obfuscated minimum and maximum idioms. And in this case, at
1754	// least one of the comparison operands has at least one user besides
1755	// the compare (the select), which would often largely negate the
1756	// benefit of folding anyway.
1757	if (auto *CI = dyn_cast<FCmpInst>(Val: SI->getCondition())) {
1758	if (CI->hasOneUse()) {
1759	Value Op0 = CI->getOperand(i_nocapture: `0`), Op1 = CI->getOperand(i_nocapture: `1`);
1760	if (((TV == Op0 && FV == Op1) \|\| (FV == Op0 && TV == Op1)) &&
1761	!CI->isCommutative())
1762	return nullptr;
1763	}
1764	}
1765
1766	// Make sure that one of the select arms folds successfully.
1767	Value NewTV = simplifyOperationIntoSelectOperand(I&: Op, SI, /IsTrueArm=/*true);
1768	Value *NewFV =
1769	simplifyOperationIntoSelectOperand(I&: Op, SI, /IsTrueArm=/false);
1770	if (!NewTV && !NewFV)
1771	return nullptr;
1772
1773	// Create an instruction for the arm that did not fold.
1774	if (!NewTV)
1775	NewTV = foldOperationIntoSelectOperand(I&: Op, SI, NewOp: TV, IC&: *this);
1776	if (!NewFV)
1777	NewFV = foldOperationIntoSelectOperand(I&: Op, SI, NewOp: FV, IC&: *this);
1778	return SelectInst::Create(C: SI->getCondition(), S1: NewTV, S2: NewFV, NameStr: "", InsertBefore: nullptr, MDFrom: SI);
1779	}
1780
1781	static Value simplifyInstructionWithPHI(Instruction &I, PHINode PN,
1782	Value InValue, BasicBlock InBB,
1783	const DataLayout &DL,
1784	const SimplifyQuery SQ) {
1785	// NB: It is a precondition of this transform that the operands be
1786	// phi translatable!
1787	SmallVector<Value *> Ops;
1788	for (Value *Op : I.operands()) {
1789	if (Op == PN)
1790	Ops.push_back(Elt: InValue);
1791	else
1792	Ops.push_back(Elt: Op->DoPHITranslation(CurBB: PN->getParent(), PredBB: InBB));
1793	}
1794
1795	// Don't consider the simplification successful if we get back a constant
1796	// expression. That's just an instruction in hiding.
1797	// Also reject the case where we simplify back to the phi node. We wouldn't
1798	// be able to remove it in that case.
1799	Value *NewVal = simplifyInstructionWithOperands(
1800	I: &I, NewOps: Ops, Q: SQ.getWithInstruction(I: InBB->getTerminator()));
1801	if (NewVal && NewVal != PN && !match(V: NewVal, P: m_ConstantExpr()))
1802	return NewVal;
1803
1804	// Check if incoming PHI value can be replaced with constant
1805	// based on implied condition.
1806	BranchInst *TerminatorBI = dyn_cast<BranchInst>(Val: InBB->getTerminator());
1807	const ICmpInst *ICmp = dyn_cast<ICmpInst>(Val: &I);
1808	if (TerminatorBI && TerminatorBI->isConditional() &&
1809	TerminatorBI->getSuccessor(i: `0`) != TerminatorBI->getSuccessor(i: `1`) && ICmp) {
1810	bool LHSIsTrue = TerminatorBI->getSuccessor(i: `0`) == PN->getParent();
1811	std::optional<bool> ImpliedCond = isImpliedCondition(
1812	LHS: TerminatorBI->getCondition(), RHSPred: ICmp->getCmpPredicate(), RHSOp0: Ops [`0`], RHSOp1: Ops [`1`],
1813	DL, LHSIsTrue);
1814	if (ImpliedCond)
1815	return ConstantInt::getBool(Ty: I.getType(), V: ImpliedCond.value());
1816	}
1817
1818	return nullptr;
1819	}
1820
1821	Instruction InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode PN,
1822	bool AllowMultipleUses) {
1823	unsigned NumPHIValues = PN->getNumIncomingValues();
1824	if (NumPHIValues == `0`)
1825	return nullptr;
1826
1827	// We normally only transform phis with a single use. However, if a PHI has
1828	// multiple uses and they are all the same operation, we can fold all* of the*
1829	// uses into the PHI.
1830	bool OneUse = PN->hasOneUse();
1831	bool IdenticalUsers = false;
1832	if (!AllowMultipleUses && !OneUse) {
1833	// Walk the use list for the instruction, comparing them to I.
1834	for (User *U : PN->users()) {
1835	Instruction *UI = cast<Instruction>(Val: U);
1836	if (UI != &I && !I.isIdenticalTo(I: UI))
1837	return nullptr;
1838	}
1839	// Otherwise, we can replace all* users with the new PHI we form.*
1840	IdenticalUsers = true;
1841	}
1842
1843	// Check that all operands are phi-translatable.
1844	for (Value *Op : I.operands()) {
1845	if (Op == PN)
1846	continue;
1847
1848	// Non-instructions never require phi-translation.
1849	auto *I = dyn_cast<Instruction>(Val: Op);
1850	if (!I)
1851	continue;
1852
1853	// Phi-translate can handle phi nodes in the same block.
1854	if (isa<PHINode>(Val: I))
1855	if (I->getParent() == PN->getParent())
1856	continue;
1857
1858	// Operand dominates the block, no phi-translation necessary.
1859	if (DT.dominates(Def: I, BB: PN->getParent()))
1860	continue;
1861
1862	// Not phi-translatable, bail out.
1863	return nullptr;
1864	}
1865
1866	// Check to see whether the instruction can be folded into each phi operand.
1867	// If there is one operand that does not fold, remember the BB it is in.
1868	SmallVector<Value *> NewPhiValues;
1869	SmallVector<unsigned int> OpsToMoveUseToIncomingBB;
1870	bool SeenNonSimplifiedInVal = false;
1871	for (unsigned i = `0`; i != NumPHIValues; ++i) {
1872	Value *InVal = PN->getIncomingValue(i);
1873	BasicBlock *InBB = PN->getIncomingBlock(i);
1874
1875	if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InValue: InVal, InBB, DL, SQ)) {
1876	NewPhiValues.push_back(Elt: NewVal);
1877	continue;
1878	}
1879
1880	// Handle some cases that can't be fully simplified, but where we know that
1881	// the two instructions will fold into one.
1882	auto WillFold = [&]() {
1883	if (!InVal->hasUseList() \|\| !InVal->hasOneUser())
1884	return false;
1885
1886	// icmp of ucmp/scmp with constant will fold to icmp.
1887	const APInt *Ignored;
1888	if (isa<CmpIntrinsic>(Val: InVal) &&
1889	match(V: &I, P: m_ICmp(L: m_Specific(V: PN), R: m_APInt(Res&: Ignored))))
1890	return true;
1891
1892	// icmp eq zext(bool), 0 will fold to !bool.
1893	if (isa<ZExtInst>(Val: InVal) &&
1894	cast<ZExtInst>(Val: InVal)->getSrcTy()->isIntOrIntVectorTy(BitWidth: `1`) &&
1895	match(V: &I,
1896	P: m_SpecificICmp(MatchPred: ICmpInst::ICMP_EQ, L: m_Specific(V: PN), R: m_Zero())))
1897	return true;
1898
1899	return false;
1900	};
1901
1902	if (WillFold ()) {
1903	OpsToMoveUseToIncomingBB.push_back(Elt: i);
1904	NewPhiValues.push_back(Elt: nullptr);
1905	continue;
1906	}
1907
1908	if (!OneUse && !IdenticalUsers)
1909	return nullptr;
1910
1911	if (SeenNonSimplifiedInVal)
1912	return nullptr; // More than one non-simplified value.
1913	SeenNonSimplifiedInVal = true;
1914
1915	// If there is exactly one non-simplified value, we can insert a copy of the
1916	// operation in that block. However, if this is a critical edge, we would
1917	// be inserting the computation on some other paths (e.g. inside a loop).
1918	// Only do this if the pred block is unconditionally branching into the phi
1919	// block. Also, make sure that the pred block is not dead code.
1920	BranchInst *BI = dyn_cast<BranchInst>(Val: InBB->getTerminator());
1921	if (!BI \|\| !BI->isUnconditional() \|\| !DT.isReachableFromEntry(A: InBB))
1922	return nullptr;
1923
1924	NewPhiValues.push_back(Elt: nullptr);
1925	OpsToMoveUseToIncomingBB.push_back(Elt: i);
1926
1927	// If the InVal is an invoke at the end of the pred block, then we can't
1928	// insert a computation after it without breaking the edge.
1929	if (isa<InvokeInst>(Val: InVal))
1930	if (cast<Instruction>(Val: InVal)->getParent() == InBB)
1931	return nullptr;
1932
1933	// Do not push the operation across a loop backedge. This could result in
1934	// an infinite combine loop, and is generally non-profitable (especially
1935	// if the operation was originally outside the loop).
1936	if (isBackEdge(From: InBB, To: PN->getParent()))
1937	return nullptr;
1938	}
1939
1940	// Clone the instruction that uses the phi node and move it into the incoming
1941	// BB because we know that the next iteration of InstCombine will simplify it.
1942	SmallDenseMap<BasicBlock , Instruction > Clones;
1943	for (auto OpIndex : OpsToMoveUseToIncomingBB) {
1944	Value *Op = PN->getIncomingValue(i: OpIndex);
1945	BasicBlock *OpBB = PN->getIncomingBlock(i: OpIndex);
1946
1947	Instruction *Clone = Clones.lookup(Val: OpBB);
1948	if (!Clone) {
1949	Clone = I.clone();
1950	for (Use &U : Clone->operands()) {
1951	if (U == PN)
1952	U = Op;
1953	else
1954	U = U ->DoPHITranslation(CurBB: PN->getParent(), PredBB: OpBB);
1955	}
1956	Clone = InsertNewInstBefore(New: Clone, Old: OpBB->getTerminator()->getIterator());
1957	Clones.insert(KV: {OpBB, Clone});
1958	}
1959
1960	NewPhiValues [OpIndex] = Clone;
1961	}
1962
1963	// Okay, we can do the transformation: create the new PHI node.
1964	PHINode *NewPN = PHINode::Create(Ty: I.getType(), NumReservedValues: PN->getNumIncomingValues());
1965	InsertNewInstBefore(New: NewPN, Old: PN->getIterator());
1966	NewPN->takeName(V: PN);
1967	NewPN->setDebugLoc(PN->getDebugLoc());
1968
1969	for (unsigned i = `0`; i != NumPHIValues; ++i)
1970	NewPN->addIncoming(V: NewPhiValues [i], BB: PN->getIncomingBlock(i));
1971
1972	if (IdenticalUsers) {
1973	for (User *U : make_early_inc_range(Range: PN->users())) {
1974	Instruction *User = cast<Instruction>(Val: U);
1975	if (User == &I)
1976	continue;
1977	replaceInstUsesWith(I&: *User, V: NewPN);
1978	eraseInstFromFunction(I&: *User);
1979	}
1980	OneUse = true;
1981	}
1982
1983	if (OneUse) {
1984	replaceAllDbgUsesWith(From&: const_cast<PHINode &>(*PN),
1985	To&: const_cast<PHINode &>(*NewPN),
1986	DomPoint&: const_cast<PHINode &>(*PN), DT);
1987	}
1988	return replaceInstUsesWith(I, V: NewPN);
1989	}
1990
1991	Instruction *InstCombinerImpl::foldBinopWithRecurrence(BinaryOperator &BO) {
1992	if (!BO.isAssociative())
1993	return nullptr;
1994
1995	// Find the interleaved binary ops.
1996	auto Opc = BO.getOpcode();
1997	auto *BO0 = dyn_cast<BinaryOperator>(Val: BO.getOperand(i_nocapture: `0`));
1998	auto *BO1 = dyn_cast<BinaryOperator>(Val: BO.getOperand(i_nocapture: `1`));
1999	if (!BO0 \|\| !BO1 \|\| !BO0->hasNUses(N: `2`) \|\| !BO1->hasNUses(N: `2`) \|\|
2000	BO0->getOpcode() != Opc \|\| BO1->getOpcode() != Opc \|\|
2001	!BO0->isAssociative() \|\| !BO1->isAssociative() \|\|
2002	BO0->getParent() != BO1->getParent())
2003	return nullptr;
2004
2005	assert(BO.isCommutative() && BO0->isCommutative() && BO1->isCommutative() &&
2006	"Expected commutative instructions!");
2007
2008	// Find the matching phis, forming the recurrences.
2009	PHINode PN0, PN1;
2010	Value Start0, Step0, Start1, Step1;
2011	if (!matchSimpleRecurrence(I: BO0, P&: PN0, Start&: Start0, Step&: Step0) \|\| !PN0->hasOneUse() \|\|
2012	!matchSimpleRecurrence(I: BO1, P&: PN1, Start&: Start1, Step&: Step1) \|\| !PN1->hasOneUse() \|\|
2013	PN0->getParent() != PN1->getParent())
2014	return nullptr;
2015
2016	assert(PN0->getNumIncomingValues() == `2` && PN1->getNumIncomingValues() == `2` &&
2017	"Expected PHIs with two incoming values!");
2018
2019	// Convert the start and step values to constants.
2020	auto *Init0 = dyn_cast<Constant>(Val: Start0);
2021	auto *Init1 = dyn_cast<Constant>(Val: Start1);
2022	auto *C0 = dyn_cast<Constant>(Val: Step0);
2023	auto *C1 = dyn_cast<Constant>(Val: Step1);
2024	if (!Init0 \|\| !Init1 \|\| !C0 \|\| !C1)
2025	return nullptr;
2026
2027	// Fold the recurrence constants.
2028	auto *Init = ConstantFoldBinaryInstruction(Opcode: Opc, V1: Init0, V2: Init1);
2029	auto *C = ConstantFoldBinaryInstruction(Opcode: Opc, V1: C0, V2: C1);
2030	if (!Init \|\| !C)
2031	return nullptr;
2032
2033	// Create the reduced PHI.
2034	auto *NewPN = PHINode::Create(Ty: PN0->getType(), NumReservedValues: PN0->getNumIncomingValues(),
2035	NameStr: "reduced.phi");
2036
2037	// Create the new binary op.
2038	auto *NewBO = BinaryOperator::Create(Op: Opc, S1: NewPN, S2: C);
2039	if (Opc == Instruction::FAdd \|\| Opc == Instruction::FMul) {
2040	// Intersect FMF flags for FADD and FMUL.
2041	FastMathFlags Intersect = BO0->getFastMathFlags() &
2042	BO1->getFastMathFlags() & BO.getFastMathFlags();
2043	NewBO->setFastMathFlags(Intersect);
2044	} else {
2045	OverflowTracking Flags;
2046	Flags.AllKnownNonNegative = false;
2047	Flags.AllKnownNonZero = false;
2048	Flags.mergeFlags(I&: *BO0);
2049	Flags.mergeFlags(I&: *BO1);
2050	Flags.mergeFlags(I&: BO);
2051	Flags.applyFlags(I&: *NewBO);
2052	}
2053	NewBO->takeName(V: &BO);
2054
2055	for (unsigned I = `0`, E = PN0->getNumIncomingValues(); I != E; ++I) {
2056	auto *V = PN0->getIncomingValue(i: I);
2057	auto *BB = PN0->getIncomingBlock(i: I);
2058	if (V == Init0) {
2059	assert(((PN1->getIncomingValue(`0`) == Init1 &&
2060	PN1->getIncomingBlock(`0`) == BB) \|\|
2061	(PN1->getIncomingValue(`1`) == Init1 &&
2062	PN1->getIncomingBlock(`1`) == BB)) &&
2063	"Invalid incoming block!");
2064	NewPN->addIncoming(V: Init, BB);
2065	} else if (V == BO0) {
2066	assert(((PN1->getIncomingValue(`0`) == BO1 &&
2067	PN1->getIncomingBlock(`0`) == BB) \|\|
2068	(PN1->getIncomingValue(`1`) == BO1 &&
2069	PN1->getIncomingBlock(`1`) == BB)) &&
2070	"Invalid incoming block!");
2071	NewPN->addIncoming(V: NewBO, BB);
2072	} else
2073	llvm_unreachable("Unexpected incoming value!");
2074	}
2075
2076	LLVM_DEBUG(dbgs() << " Combined " << PN0 << "\n " << BO0
2077	<< "\n with " << PN1 << "\n " << BO1
2078	<< `'\n'`);
2079
2080	// Insert the new recurrence and remove the old (dead) ones.
2081	InsertNewInstWith(New: NewPN, Old: PN0->getIterator());
2082	InsertNewInstWith(New: NewBO, Old: BO0->getIterator());
2083
2084	eraseInstFromFunction(
2085	I&: replaceInstUsesWith(I&: BO0, V: PoisonValue::get(T: BO0->getType())));
2086	eraseInstFromFunction(
2087	I&: replaceInstUsesWith(I&: BO1, V: PoisonValue::get(T: BO1->getType())));
2088	eraseInstFromFunction(I&: *PN0);
2089	eraseInstFromFunction(I&: *PN1);
2090
2091	return replaceInstUsesWith(I&: BO, V: NewBO);
2092	}
2093
2094	Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) {
2095	// Attempt to fold binary operators whose operands are simple recurrences.
2096	if (auto *NewBO = foldBinopWithRecurrence(BO))
2097	return NewBO;
2098
2099	// TODO: This should be similar to the incoming values check in foldOpIntoPhi:
2100	// we are guarding against replicating the binop in >1 predecessor.
2101	// This could miss matching a phi with 2 constant incoming values.
2102	auto *Phi0 = dyn_cast<PHINode>(Val: BO.getOperand(i_nocapture: `0`));
2103	auto *Phi1 = dyn_cast<PHINode>(Val: BO.getOperand(i_nocapture: `1`));
2104	if (!Phi0 \|\| !Phi1 \|\| !Phi0->hasOneUse() \|\| !Phi1->hasOneUse() \|\|
2105	Phi0->getNumOperands() != Phi1->getNumOperands())
2106	return nullptr;
2107
2108	// TODO: Remove the restriction for binop being in the same block as the phis.
2109	if (BO.getParent() != Phi0->getParent() \|\|
2110	BO.getParent() != Phi1->getParent())
2111	return nullptr;
2112
2113	// Fold if there is at least one specific constant value in phi0 or phi1's
2114	// incoming values that comes from the same block and this specific constant
2115	// value can be used to do optimization for specific binary operator.
2116	// For example:
2117	// %phi0 = phi i32 [0, %bb0], [%i, %bb1]
2118	// %phi1 = phi i32 [%j, %bb0], [0, %bb1]
2119	// %add = add i32 %phi0, %phi1
2120	// ==>
2121	// %add = phi i32 [%j, %bb0], [%i, %bb1]
2122	Constant *C = ConstantExpr::getBinOpIdentity(Opcode: BO.getOpcode(), Ty: BO.getType(),
2123	/AllowRHSConstant/ false);
2124	if (C) {
2125	SmallVector<Value *, `4`> NewIncomingValues;
2126	auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) {
2127	auto &Phi0Use = std::get<`0`>(t&: T);
2128	auto &Phi1Use = std::get<`1`>(t&: T);
2129	if (Phi0->getIncomingBlock(U: Phi0Use) != Phi1->getIncomingBlock(U: Phi1Use))
2130	return false;
2131	Value *Phi0UseV = Phi0Use.get();
2132	Value *Phi1UseV = Phi1Use.get();
2133	if (Phi0UseV == C)
2134	NewIncomingValues.push_back(Elt: Phi1UseV);
2135	else if (Phi1UseV == C)
2136	NewIncomingValues.push_back(Elt: Phi0UseV);
2137	else
2138	return false;
2139	return true;
2140	};
2141
2142	if (all_of(Range: zip(t: Phi0->operands(), u: Phi1->operands()),
2143	P: CanFoldIncomingValuePair)) {
2144	PHINode *NewPhi =
2145	PHINode::Create(Ty: Phi0->getType(), NumReservedValues: Phi0->getNumOperands());
2146	assert(NewIncomingValues.size() == Phi0->getNumOperands() &&
2147	"The number of collected incoming values should equal the number "
2148	"of the original PHINode operands!");
2149	for (unsigned I = `0`; I < Phi0->getNumOperands(); I++)
2150	NewPhi->addIncoming(V: NewIncomingValues [I], BB: Phi0->getIncomingBlock(i: I));
2151	return NewPhi;
2152	}
2153	}
2154
2155	if (Phi0->getNumOperands() != `2` \|\| Phi1->getNumOperands() != `2`)
2156	return nullptr;
2157
2158	// Match a pair of incoming constants for one of the predecessor blocks.
2159	BasicBlock ConstBB, OtherBB;
2160	Constant C0, C1;
2161	if (match(V: Phi0->getIncomingValue(i: `0`), P: m_ImmConstant(C&: C0))) {
2162	ConstBB = Phi0->getIncomingBlock(i: `0`);
2163	OtherBB = Phi0->getIncomingBlock(i: `1`);
2164	} else if (match(V: Phi0->getIncomingValue(i: `1`), P: m_ImmConstant(C&: C0))) {
2165	ConstBB = Phi0->getIncomingBlock(i: `1`);
2166	OtherBB = Phi0->getIncomingBlock(i: `0`);
2167	} else {
2168	return nullptr;
2169	}
2170	if (!match(V: Phi1->getIncomingValueForBlock(BB: ConstBB), P: m_ImmConstant(C&: C1)))
2171	return nullptr;
2172
2173	// The block that we are hoisting to must reach here unconditionally.
2174	// Otherwise, we could be speculatively executing an expensive or
2175	// non-speculative op.
2176	auto *PredBlockBranch = dyn_cast<BranchInst>(Val: OtherBB->getTerminator());
2177	if (!PredBlockBranch \|\| PredBlockBranch->isConditional() \|\|
2178	!DT.isReachableFromEntry(A: OtherBB))
2179	return nullptr;
2180
2181	// TODO: This check could be tightened to only apply to binops (div/rem) that
2182	// are not safe to speculatively execute. But that could allow hoisting
2183	// potentially expensive instructions (fdiv for example).
2184	for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
2185	if (!isGuaranteedToTransferExecutionToSuccessor(I: &*BBIter))
2186	return nullptr;
2187
2188	// Fold constants for the predecessor block with constant incoming values.
2189	Constant *NewC = ConstantFoldBinaryOpOperands(Opcode: BO.getOpcode(), LHS: C0, RHS: C1, DL);
2190	if (!NewC)
2191	return nullptr;
2192
2193	// Make a new binop in the predecessor block with the non-constant incoming
2194	// values.
2195	Builder.SetInsertPoint(PredBlockBranch);
2196	Value *NewBO = Builder.CreateBinOp(Opc: BO.getOpcode(),
2197	LHS: Phi0->getIncomingValueForBlock(BB: OtherBB),
2198	RHS: Phi1->getIncomingValueForBlock(BB: OtherBB));
2199	if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(Val: NewBO))
2200	NotFoldedNewBO->copyIRFlags(V: &BO);
2201
2202	// Replace the binop with a phi of the new values. The old phis are dead.
2203	PHINode *NewPhi = PHINode::Create(Ty: BO.getType(), NumReservedValues: `2`);
2204	NewPhi->addIncoming(V: NewBO, BB: OtherBB);
2205	NewPhi->addIncoming(V: NewC, BB: ConstBB);
2206	return NewPhi;
2207	}
2208
2209	Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
2210	if (!isa<Constant>(Val: I.getOperand(i_nocapture: `1`)))
2211	return nullptr;
2212
2213	if (auto *Sel = dyn_cast<SelectInst>(Val: I.getOperand(i_nocapture: `0`))) {
2214	if (Instruction *NewSel = FoldOpIntoSelect(Op&: I, SI: Sel))
2215	return NewSel;
2216	} else if (auto *PN = dyn_cast<PHINode>(Val: I.getOperand(i_nocapture: `0`))) {
2217	if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
2218	return NewPhi;
2219	}
2220	return nullptr;
2221	}
2222
2223	static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
2224	// If this GEP has only 0 indices, it is the same pointer as
2225	// Src. If Src is not a trivial GEP too, don't combine
2226	// the indices.
2227	if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
2228	!Src.hasOneUse())
2229	return false;
2230	return true;
2231	}
2232
2233	/// Find a constant NewC that has property:
2234	/// shuffle(NewC, ShMask) = C
2235	/// Returns nullptr if such a constant does not exist e.g. ShMask=<0,0> C=<1,2>
2236	///
2237	/// A 1-to-1 mapping is not required. Example:
2238	/// ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <poison,5,6,poison>
2239	Constant InstCombinerImpl::unshuffleConstant(ArrayRef<int> ShMask, Constant C,
2240	VectorType *NewCTy) {
2241	if (isa<ScalableVectorType>(Val: NewCTy)) {
2242	Constant *Splat = C->getSplatValue();
2243	if (!Splat)
2244	return nullptr;
2245	return ConstantVector::getSplat(EC: NewCTy->getElementCount(), Elt: Splat);
2246	}
2247
2248	if (cast<FixedVectorType>(Val: NewCTy)->getNumElements() >
2249	cast<FixedVectorType>(Val: C->getType())->getNumElements())
2250	return nullptr;
2251
2252	unsigned NewCNumElts = cast<FixedVectorType>(Val: NewCTy)->getNumElements();
2253	PoisonValue *PoisonScalar = PoisonValue::get(T: C->getType()->getScalarType());
2254	SmallVector<Constant *, `16`> NewVecC(NewCNumElts, PoisonScalar);
2255	unsigned NumElts = cast<FixedVectorType>(Val: C->getType())->getNumElements();
2256	for (unsigned I = `0`; I < NumElts; ++I) {
2257	Constant *CElt = C->getAggregateElement(Elt: I);
2258	if (ShMask [I] >= `0`) {
2259	assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
2260	Constant *NewCElt = NewVecC [ShMask [I]];
2261	// Bail out if:
2262	// 1. The constant vector contains a constant expression.
2263	// 2. The shuffle needs an element of the constant vector that can't
2264	// be mapped to a new constant vector.
2265	// 3. This is a widening shuffle that copies elements of V1 into the
2266	// extended elements (extending with poison is allowed).
2267	if (!CElt \|\| (!isa<PoisonValue>(Val: NewCElt) && NewCElt != CElt) \|\|
2268	I >= NewCNumElts)
2269	return nullptr;
2270	NewVecC [ShMask [I]] = CElt;
2271	}
2272	}
2273	return ConstantVector::get(V: NewVecC);
2274	}
2275
2276	Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
2277	if (!isa<VectorType>(Val: Inst.getType()))
2278	return nullptr;
2279
2280	BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
2281	Value LHS = Inst.getOperand(i_nocapture: `0`), RHS = Inst.getOperand(i_nocapture: `1`);
2282	assert(cast<VectorType>(LHS->getType())->getElementCount() ==
2283	cast<VectorType>(Inst.getType())->getElementCount());
2284	assert(cast<VectorType>(RHS->getType())->getElementCount() ==
2285	cast<VectorType>(Inst.getType())->getElementCount());
2286
2287	// If both operands of the binop are vector concatenations, then perform the
2288	// narrow binop on each pair of the source operands followed by concatenation
2289	// of the results.
2290	Value L0, L1, R0, R1;
2291	ArrayRef<int> Mask;
2292	if (match(V: LHS, P: m_Shuffle(v1: m_Value(V&: L0), v2: m_Value(V&: L1), mask: m_Mask (Mask))) &&
2293	match(V: RHS, P: m_Shuffle(v1: m_Value(V&: R0), v2: m_Value(V&: R1), mask: m_SpecificMask (Mask))) &&
2294	LHS->hasOneUse() && RHS->hasOneUse() &&
2295	cast<ShuffleVectorInst>(Val: LHS)->isConcat() &&
2296	cast<ShuffleVectorInst>(Val: RHS)->isConcat()) {
2297	// This transform does not have the speculative execution constraint as
2298	// below because the shuffle is a concatenation. The new binops are
2299	// operating on exactly the same elements as the existing binop.
2300	// TODO: We could ease the mask requirement to allow different undef lanes,
2301	// but that requires an analysis of the binop-with-undef output value.
2302	Value *NewBO0 = Builder.CreateBinOp(Opc: Opcode, LHS: L0, RHS: R0);
2303	if (auto *BO = dyn_cast<BinaryOperator>(Val: NewBO0))
2304	BO->copyIRFlags(V: &Inst);
2305	Value *NewBO1 = Builder.CreateBinOp(Opc: Opcode, LHS: L1, RHS: R1);
2306	if (auto *BO = dyn_cast<BinaryOperator>(Val: NewBO1))
2307	BO->copyIRFlags(V: &Inst);
2308	return new ShuffleVectorInst (NewBO0, NewBO1, Mask);
2309	}
2310
2311	auto createBinOpReverse = [&](Value X, Value Y) {
2312	Value *V = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y, Name: Inst.getName());
2313	if (auto *BO = dyn_cast<BinaryOperator>(Val: V))
2314	BO->copyIRFlags(V: &Inst);
2315	Module *M = Inst.getModule();
2316	Function *F = Intrinsic::getOrInsertDeclaration(
2317	M, id: Intrinsic::vector_reverse, Tys: V->getType());
2318	return CallInst::Create(Func: F, Args: V);
2319	};
2320
2321	// NOTE: Reverse shuffles don't require the speculative execution protection
2322	// below because they don't affect which lanes take part in the computation.
2323
2324	Value V1, V2;
2325	if (match(V: LHS, P: m_VecReverse(Op0: m_Value(V&: V1)))) {
2326	// Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2327	if (match(V: RHS, P: m_VecReverse(Op0: m_Value(V&: V2))) &&
2328	(LHS->hasOneUse() \|\| RHS->hasOneUse() \|\|
2329	(LHS == RHS && LHS->hasNUses(N: `2`))))
2330	return createBinOpReverse (V1, V2);
2331
2332	// Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2333	if (LHS->hasOneUse() && isSplatValue(V: RHS))
2334	return createBinOpReverse (V1, RHS);
2335	}
2336	// Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2337	else if (isSplatValue(V: LHS) && match(V: RHS, P: m_OneUse(SubPattern: m_VecReverse(Op0: m_Value(V&: V2)))))
2338	return createBinOpReverse (LHS, V2);
2339
2340	auto createBinOpVPReverse = [&](Value X, Value Y, Value *EVL) {
2341	Value *V = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y, Name: Inst.getName());
2342	if (auto *BO = dyn_cast<BinaryOperator>(Val: V))
2343	BO->copyIRFlags(V: &Inst);
2344
2345	ElementCount EC = cast<VectorType>(Val: V->getType())->getElementCount();
2346	Value *AllTrueMask = Builder.CreateVectorSplat(EC, V: Builder.getTrue());
2347	Module *M = Inst.getModule();
2348	Function *F = Intrinsic::getOrInsertDeclaration(
2349	M, id: Intrinsic::experimental_vp_reverse, Tys: V->getType());
2350	return CallInst::Create(Func: F, Args: {V, AllTrueMask, EVL});
2351	};
2352
2353	Value *EVL;
2354	if (match(V: LHS, P: m_Intrinsic<Intrinsic::experimental_vp_reverse>(
2355	Op0: m_Value(V&: V1), Op1: m_AllOnes(), Op2: m_Value(V&: EVL)))) {
2356	// Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2357	if (match(V: RHS, P: m_Intrinsic<Intrinsic::experimental_vp_reverse>(
2358	Op0: m_Value(V&: V2), Op1: m_AllOnes(), Op2: m_Specific(V: EVL))) &&
2359	(LHS->hasOneUse() \|\| RHS->hasOneUse() \|\|
2360	(LHS == RHS && LHS->hasNUses(N: `2`))))
2361	return createBinOpVPReverse (V1, V2, EVL);
2362
2363	// Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2364	if (LHS->hasOneUse() && isSplatValue(V: RHS))
2365	return createBinOpVPReverse (V1, RHS, EVL);
2366	}
2367	// Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2368	else if (isSplatValue(V: LHS) &&
2369	match(V: RHS, P: m_Intrinsic<Intrinsic::experimental_vp_reverse>(
2370	Op0: m_Value(V&: V2), Op1: m_AllOnes(), Op2: m_Value(V&: EVL))))
2371	return createBinOpVPReverse (LHS, V2, EVL);
2372
2373	// It may not be safe to reorder shuffles and things like div, urem, etc.
2374	// because we may trap when executing those ops on unknown vector elements.
2375	// See PR20059.
2376	if (!isSafeToSpeculativelyExecuteWithVariableReplaced(I: &Inst))
2377	return nullptr;
2378
2379	auto createBinOpShuffle = [&](Value X, Value Y, ArrayRef<int> M) {
2380	Value *XY = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y);
2381	if (auto *BO = dyn_cast<BinaryOperator>(Val: XY))
2382	BO->copyIRFlags(V: &Inst);
2383	return new ShuffleVectorInst (XY, M);
2384	};
2385
2386	// If both arguments of the binary operation are shuffles that use the same
2387	// mask and shuffle within a single vector, move the shuffle after the binop.
2388	if (match(V: LHS, P: m_Shuffle(v1: m_Value(V&: V1), v2: m_Poison(), mask: m_Mask (Mask))) &&
2389	match(V: RHS, P: m_Shuffle(v1: m_Value(V&: V2), v2: m_Poison(), mask: m_SpecificMask (Mask))) &&
2390	V1->getType() == V2->getType() &&
2391	(LHS->hasOneUse() \|\| RHS->hasOneUse() \|\| LHS == RHS)) {
2392	// Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
2393	return createBinOpShuffle (V1, V2, Mask);
2394	}
2395
2396	// If both arguments of a commutative binop are select-shuffles that use the
2397	// same mask with commuted operands, the shuffles are unnecessary.
2398	if (Inst.isCommutative() &&
2399	match(V: LHS, P: m_Shuffle(v1: m_Value(V&: V1), v2: m_Value(V&: V2), mask: m_Mask (Mask))) &&
2400	match(V: RHS,
2401	P: m_Shuffle(v1: m_Specific(V: V2), v2: m_Specific(V: V1), mask: m_SpecificMask (Mask)))) {
2402	auto *LShuf = cast<ShuffleVectorInst>(Val: LHS);
2403	auto *RShuf = cast<ShuffleVectorInst>(Val: RHS);
2404	// TODO: Allow shuffles that contain undefs in the mask?
2405	// That is legal, but it reduces undef knowledge.
2406	// TODO: Allow arbitrary shuffles by shuffling after binop?
2407	// That might be legal, but we have to deal with poison.
2408	if (LShuf->isSelect() &&
2409	!is_contained(Range: LShuf->getShuffleMask(), Element: PoisonMaskElem) &&
2410	RShuf->isSelect() &&
2411	!is_contained(Range: RShuf->getShuffleMask(), Element: PoisonMaskElem)) {
2412	// Example:
2413	// LHS = shuffle V1, V2, <0, 5, 6, 3>
2414	// RHS = shuffle V2, V1, <0, 5, 6, 3>
2415	// LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
2416	Instruction *NewBO = BinaryOperator::Create(Op: Opcode, S1: V1, S2: V2);
2417	NewBO->copyIRFlags(V: &Inst);
2418	return NewBO;
2419	}
2420	}
2421
2422	// If one argument is a shuffle within one vector and the other is a constant,
2423	// try moving the shuffle after the binary operation. This canonicalization
2424	// intends to move shuffles closer to other shuffles and binops closer to
2425	// other binops, so they can be folded. It may also enable demanded elements
2426	// transforms.
2427	Constant *C;
2428	if (match(V: &Inst, P: m_c_BinOp(L: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: V1), v2: m_Poison(),
2429	mask: m_Mask (Mask))),
2430	R: m_ImmConstant(C)))) {
2431	assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() &&
2432	"Shuffle should not change scalar type");
2433
2434	bool ConstOp1 = isa<Constant>(Val: RHS);
2435	if (Constant *NewC =
2436	unshuffleConstant(ShMask: Mask, C, NewCTy: cast<VectorType>(Val: V1->getType()))) {
2437	// For fixed vectors, lanes of NewC not used by the shuffle will be poison
2438	// which will cause UB for div/rem. Mask them with a safe constant.
2439	if (isa<FixedVectorType>(Val: V1->getType()) && Inst.isIntDivRem())
2440	NewC = getSafeVectorConstantForBinop(Opcode, In: NewC, IsRHSConstant: ConstOp1);
2441
2442	// Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
2443	// Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
2444	Value *NewLHS = ConstOp1 ? V1 : NewC;
2445	Value *NewRHS = ConstOp1 ? NewC : V1;
2446	return createBinOpShuffle (NewLHS, NewRHS, Mask);
2447	}
2448	}
2449
2450	// Try to reassociate to sink a splat shuffle after a binary operation.
2451	if (Inst.isAssociative() && Inst.isCommutative()) {
2452	// Canonicalize shuffle operand as LHS.
2453	if (isa<ShuffleVectorInst>(Val: RHS))
2454	std::swap(a&: LHS, b&: RHS);
2455
2456	Value *X;
2457	ArrayRef<int> MaskC;
2458	int SplatIndex;
2459	Value Y, OtherOp;
2460	if (!match(V: LHS,
2461	P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: X), v2: m_Undef(), mask: m_Mask (MaskC)))) \|\|
2462	!match(Mask: MaskC, P: m_SplatOrPoisonMask (SplatIndex)) \|\|
2463	X->getType() != Inst.getType() \|\|
2464	!match(V: RHS, P: m_OneUse(SubPattern: m_BinOp(Opcode, L: m_Value(V&: Y), R: m_Value(V&: OtherOp)))))
2465	return nullptr;
2466
2467	// FIXME: This may not be safe if the analysis allows undef elements. By
2468	// moving 'Y' before the splat shuffle, we are implicitly assuming
2469	// that it is not undef/poison at the splat index.
2470	if (isSplatValue(V: OtherOp, Index: SplatIndex)) {
2471	std::swap(a&: Y, b&: OtherOp);
2472	} else if (!isSplatValue(V: Y, Index: SplatIndex)) {
2473	return nullptr;
2474	}
2475
2476	// X and Y are splatted values, so perform the binary operation on those
2477	// values followed by a splat followed by the 2nd binary operation:
2478	// bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
2479	Value *NewBO = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y);
2480	SmallVector<int, `8`> NewMask(MaskC.size(), SplatIndex);
2481	Value *NewSplat = Builder.CreateShuffleVector(V: NewBO, Mask: NewMask);
2482	Instruction *R = BinaryOperator::Create(Op: Opcode, S1: NewSplat, S2: OtherOp);
2483
2484	// Intersect FMF on both new binops. Other (poison-generating) flags are
2485	// dropped to be safe.
2486	if (isa<FPMathOperator>(Val: R)) {
2487	R->copyFastMathFlags(I: &Inst);
2488	R->andIRFlags(V: RHS);
2489	}
2490	if (auto *NewInstBO = dyn_cast<BinaryOperator>(Val: NewBO))
2491	NewInstBO->copyIRFlags(V: R);
2492	return R;
2493	}
2494
2495	return nullptr;
2496	}
2497
2498	/// Try to narrow the width of a binop if at least 1 operand is an extend of
2499	/// of a value. This requires a potentially expensive known bits check to make
2500	/// sure the narrow op does not overflow.
2501	Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
2502	// We need at least one extended operand.
2503	Value Op0 = BO.getOperand(i_nocapture: `0`), Op1 = BO.getOperand(i_nocapture: `1`);
2504
2505	// If this is a sub, we swap the operands since we always want an extension
2506	// on the RHS. The LHS can be an extension or a constant.
2507	if (BO.getOpcode() == Instruction::Sub)
2508	std::swap(a&: Op0, b&: Op1);
2509
2510	Value *X;
2511	bool IsSext = match(V: Op0, P: m_SExt(Op: m_Value(V&: X)));
2512	if (!IsSext && !match(V: Op0, P: m_ZExt(Op: m_Value(V&: X))))
2513	return nullptr;
2514
2515	// If both operands are the same extension from the same source type and we
2516	// can eliminate at least one (hasOneUse), this might work.
2517	CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
2518	Value *Y;
2519	if (!(match(V: Op1, P: m_ZExtOrSExt(Op: m_Value(V&: Y))) && X->getType() == Y->getType() &&
2520	cast<Operator>(Val: Op1)->getOpcode() == CastOpc &&
2521	(Op0->hasOneUse() \|\| Op1->hasOneUse()))) {
2522	// If that did not match, see if we have a suitable constant operand.
2523	// Truncating and extending must produce the same constant.
2524	Constant *WideC;
2525	if (!Op0->hasOneUse() \|\| !match(V: Op1, P: m_Constant(C&: WideC)))
2526	return nullptr;
2527	Constant *NarrowC = getLosslessTrunc(C: WideC, TruncTy: X->getType(), ExtOp: CastOpc);
2528	if (!NarrowC)
2529	return nullptr;
2530	Y = NarrowC;
2531	}
2532
2533	// Swap back now that we found our operands.
2534	if (BO.getOpcode() == Instruction::Sub)
2535	std::swap(a&: X, b&: Y);
2536
2537	// Both operands have narrow versions. Last step: the math must not overflow
2538	// in the narrow width.
2539	if (!willNotOverflow(Opcode: BO.getOpcode(), LHS: X, RHS: Y, CxtI: BO, IsSigned: IsSext))
2540	return nullptr;
2541
2542	// bo (ext X), (ext Y) --> ext (bo X, Y)
2543	// bo (ext X), C --> ext (bo X, C')
2544	Value *NarrowBO = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: X, RHS: Y, Name: "narrow");
2545	if (auto *NewBinOp = dyn_cast<BinaryOperator>(Val: NarrowBO)) {
2546	if (IsSext)
2547	NewBinOp->setHasNoSignedWrap();
2548	else
2549	NewBinOp->setHasNoUnsignedWrap();
2550	}
2551	return CastInst::Create(CastOpc, S: NarrowBO, Ty: BO.getType());
2552	}
2553
2554	/// Determine nowrap flags for (gep (gep p, x), y) to (gep p, (x + y))
2555	/// transform.
2556	static GEPNoWrapFlags getMergedGEPNoWrapFlags(GEPOperator &GEP1,
2557	GEPOperator &GEP2) {
2558	return GEP1.getNoWrapFlags().intersectForOffsetAdd(Other: GEP2.getNoWrapFlags());
2559	}
2560
2561	/// Thread a GEP operation with constant indices through the constant true/false
2562	/// arms of a select.
2563	static Instruction *foldSelectGEP(GetElementPtrInst &GEP,
2564	InstCombiner::BuilderTy &Builder) {
2565	if (!GEP.hasAllConstantIndices())
2566	return nullptr;
2567
2568	Instruction *Sel;
2569	Value *Cond;
2570	Constant TrueC, FalseC;
2571	if (!match(V: GEP.getPointerOperand(), P: m_Instruction(I&: Sel)) \|\|
2572	!match(V: Sel,
2573	P: m_Select(C: m_Value(V&: Cond), L: m_Constant(C&: TrueC), R: m_Constant(C&: FalseC))))
2574	return nullptr;
2575
2576	// gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
2577	// Propagate 'inbounds' and metadata from existing instructions.
2578	// Note: using IRBuilder to create the constants for efficiency.
2579	SmallVector<Value *, `4`> IndexC(GEP.indices());
2580	GEPNoWrapFlags NW = GEP.getNoWrapFlags();
2581	Type *Ty = GEP.getSourceElementType();
2582	Value *NewTrueC = Builder.CreateGEP(Ty, Ptr: TrueC, IdxList: IndexC, Name: "", NW);
2583	Value *NewFalseC = Builder.CreateGEP(Ty, Ptr: FalseC, IdxList: IndexC, Name: "", NW);
2584	return SelectInst::Create(C: Cond, S1: NewTrueC, S2: NewFalseC, NameStr: "", InsertBefore: nullptr, MDFrom: Sel);
2585	}
2586
2587	// Canonicalization:
2588	// gep T, (gep i8, base, C1), (Index + C2) into
2589	// gep T, (gep i8, base, C1 + C2 sizeof(T)), Index*
2590	static Instruction *canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP,
2591	GEPOperator *Src,
2592	InstCombinerImpl &IC) {
2593	if (GEP.getNumIndices() != `1`)
2594	return nullptr;
2595	auto &DL = IC.getDataLayout();
2596	Value *Base;
2597	const APInt *C1;
2598	if (!match(V: Src, P: m_PtrAdd(PointerOp: m_Value(V&: Base), OffsetOp: m_APInt(Res&: C1))))
2599	return nullptr;
2600	Value *VarIndex;
2601	const APInt *C2;
2602	Type *PtrTy = Src->getType()->getScalarType();
2603	unsigned IndexSizeInBits = DL.getIndexTypeSizeInBits(Ty: PtrTy);
2604	if (!match(V: GEP.getOperand(i_nocapture: `1`), P: m_AddLike(L: m_Value(V&: VarIndex), R: m_APInt(Res&: C2))))
2605	return nullptr;
2606	if (C1->getBitWidth() != IndexSizeInBits \|\|
2607	C2->getBitWidth() != IndexSizeInBits)
2608	return nullptr;
2609	Type *BaseType = GEP.getSourceElementType();
2610	if (isa<ScalableVectorType>(Val: BaseType))
2611	return nullptr;
2612	APInt TypeSize(IndexSizeInBits, DL.getTypeAllocSize(Ty: BaseType));
2613	APInt NewOffset = TypeSize * C2 + C1;
2614	if (NewOffset.isZero() \|\|
2615	(Src->hasOneUse() && GEP.getOperand(i_nocapture: `1`)->hasOneUse())) {
2616	Value *GEPConst =
2617	IC.Builder.CreatePtrAdd(Ptr: Base, Offset: IC.Builder.getInt(AI: NewOffset));
2618	return GetElementPtrInst::Create(PointeeType: BaseType, Ptr: GEPConst, IdxList: VarIndex);
2619	}
2620
2621	return nullptr;
2622	}
2623
2624	Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
2625	GEPOperator *Src) {
2626	// Combine Indices - If the source pointer to this getelementptr instruction
2627	// is a getelementptr instruction with matching element type, combine the
2628	// indices of the two getelementptr instructions into a single instruction.
2629	if (!shouldMergeGEPs(GEP&: cast<GEPOperator>(Val: &GEP), Src&: Src))
2630	return nullptr;
2631
2632	if (auto I = canonicalizeGEPOfConstGEPI8(GEP, Src, IC&: this))
2633	return I;
2634
2635	// For constant GEPs, use a more general offset-based folding approach.
2636	Type *PtrTy = Src->getType()->getScalarType();
2637	if (GEP.hasAllConstantIndices() &&
2638	(Src->hasOneUse() \|\| Src->hasAllConstantIndices())) {
2639	// Split Src into a variable part and a constant suffix.
2640	gep_type_iterator GTI = gep_type_begin(GEP: *Src);
2641	Type *BaseType = GTI.getIndexedType();
2642	bool IsFirstType = true;
2643	unsigned NumVarIndices = `0`;
2644	for (auto Pair : enumerate(First: Src->indices())) {
2645	if (!isa<ConstantInt>(Val: Pair.value())) {
2646	BaseType = GTI.getIndexedType();
2647	IsFirstType = false;
2648	NumVarIndices = Pair.index() + `1`;
2649	}
2650	++GTI;
2651	}
2652
2653	// Determine the offset for the constant suffix of Src.
2654	APInt Offset(DL.getIndexTypeSizeInBits(Ty: PtrTy), `0`);
2655	if (NumVarIndices != Src->getNumIndices()) {
2656	// FIXME: getIndexedOffsetInType() does not handled scalable vectors.
2657	if (BaseType->isScalableTy())
2658	return nullptr;
2659
2660	SmallVector<Value *> ConstantIndices;
2661	if (!IsFirstType)
2662	ConstantIndices.push_back(
2663	Elt: Constant::getNullValue(Ty: Type::getInt32Ty(C&: GEP.getContext())));
2664	append_range(C&: ConstantIndices, R: drop_begin(RangeOrContainer: Src->indices(), N: NumVarIndices));
2665	Offset += DL.getIndexedOffsetInType(ElemTy: BaseType, Indices: ConstantIndices);
2666	}
2667
2668	// Add the offset for GEP (which is fully constant).
2669	if (!GEP.accumulateConstantOffset(DL, Offset))
2670	return nullptr;
2671
2672	// Convert the total offset back into indices.
2673	SmallVector<APInt> ConstIndices =
2674	DL.getGEPIndicesForOffset(ElemTy&: BaseType, Offset);
2675	if (!Offset.isZero() \|\| (!IsFirstType && !ConstIndices [`0`].isZero()))
2676	return nullptr;
2677
2678	GEPNoWrapFlags NW = getMergedGEPNoWrapFlags(GEP1&: Src, GEP2&: cast<GEPOperator>(Val: &GEP));
2679	SmallVector<Value *> Indices(
2680	drop_end(RangeOrContainer: Src->indices(), N: Src->getNumIndices() - NumVarIndices));
2681	for (const APInt &Idx : drop_begin(RangeOrContainer&: ConstIndices, N: !IsFirstType)) {
2682	Indices.push_back(Elt: ConstantInt::get(Context&: GEP.getContext(), V: Idx));
2683	// Even if the total offset is inbounds, we may end up representing it
2684	// by first performing a larger negative offset, and then a smaller
2685	// positive one. The large negative offset might go out of bounds. Only
2686	// preserve inbounds if all signs are the same.
2687	if (Idx.isNonNegative() != ConstIndices [`0`].isNonNegative())
2688	NW = NW.withoutNoUnsignedSignedWrap();
2689	if (!Idx.isNonNegative())
2690	NW = NW.withoutNoUnsignedWrap();
2691	}
2692
2693	return replaceInstUsesWith(
2694	I&: GEP, V: Builder.CreateGEP(Ty: Src->getSourceElementType(), Ptr: Src->getOperand(i_nocapture: `0`),
2695	IdxList: Indices, Name: "", NW));
2696	}
2697
2698	if (Src->getResultElementType() != GEP.getSourceElementType())
2699	return nullptr;
2700
2701	SmallVector<Value*, `8`> Indices;
2702
2703	// Find out whether the last index in the source GEP is a sequential idx.
2704	bool EndsWithSequential = false;
2705	for (gep_type_iterator I = gep_type_begin(GEP: Src), E = gep_type_end(GEP: Src);
2706	I != E; ++I)
2707	EndsWithSequential = I.isSequential();
2708
2709	// Can we combine the two pointer arithmetics offsets?
2710	if (EndsWithSequential) {
2711	// Replace: gep (gep %P, long B), long A, ...
2712	// With: T = long A+B; gep %P, T, ...
2713	Value *SO1 = Src->getOperand(i_nocapture: Src->getNumOperands()-`1`);
2714	Value *GO1 = GEP.getOperand(i_nocapture: `1`);
2715
2716	// If they aren't the same type, then the input hasn't been processed
2717	// by the loop above yet (which canonicalizes sequential index types to
2718	// intptr_t). Just avoid transforming this until the input has been
2719	// normalized.
2720	if (SO1->getType() != GO1->getType())
2721	return nullptr;
2722
2723	Value *Sum =
2724	simplifyAddInst(LHS: GO1, RHS: SO1, IsNSW: false, IsNUW: false, Q: SQ.getWithInstruction(I: &GEP));
2725	// Only do the combine when we are sure the cost after the
2726	// merge is never more than that before the merge.
2727	if (Sum == nullptr)
2728	return nullptr;
2729
2730	Indices.append(in_start: Src->op_begin()+`1`, in_end: Src->op_end()-`1`);
2731	Indices.push_back(Elt: Sum);
2732	Indices.append(in_start: GEP.op_begin()+`2`, in_end: GEP.op_end());
2733	} else if (isa<Constant>(Val: *GEP.idx_begin()) &&
2734	cast<Constant>(Val&: *GEP.idx_begin())->isNullValue() &&
2735	Src->getNumOperands() != `1`) {
2736	// Otherwise we can do the fold if the first index of the GEP is a zero
2737	Indices.append(in_start: Src->op_begin()+`1`, in_end: Src->op_end());
2738	Indices.append(in_start: GEP.idx_begin()+`1`, in_end: GEP.idx_end());
2739	}
2740
2741	if (!Indices.empty())
2742	return replaceInstUsesWith(
2743	I&: GEP, V: Builder.CreateGEP(
2744	Ty: Src->getSourceElementType(), Ptr: Src->getOperand(i_nocapture: `0`), IdxList: Indices, Name: "",
2745	NW: getMergedGEPNoWrapFlags(GEP1&: Src, GEP2&: cast<GEPOperator>(Val: &GEP))));
2746
2747	return nullptr;
2748	}
2749
2750	Value InstCombiner::getFreelyInvertedImpl(Value V, bool WillInvertAllUses,
2751	BuilderTy *Builder,
2752	bool &DoesConsume, unsigned Depth) {
2753	static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(`1`));
2754	// ~(~(X)) -> X.
2755	Value A, B;
2756	if (match(V, P: m_Not(V: m_Value(V&: A)))) {
2757	DoesConsume = true;
2758	return A;
2759	}
2760
2761	Constant *C;
2762	// Constants can be considered to be not'ed values.
2763	if (match(V, P: m_ImmConstant(C)))
2764	return ConstantExpr::getNot(C);
2765
2766	if (Depth++ >= MaxAnalysisRecursionDepth)
2767	return nullptr;
2768
2769	// The rest of the cases require that we invert all uses so don't bother
2770	// doing the analysis if we know we can't use the result.
2771	if (!WillInvertAllUses)
2772	return nullptr;
2773
2774	// Compares can be inverted if all of their uses are being modified to use
2775	// the ~V.
2776	if (auto *I = dyn_cast<CmpInst>(Val: V)) {
2777	if (Builder != nullptr)
2778	return Builder->CreateCmp(Pred: I->getInversePredicate(), LHS: I->getOperand(i_nocapture: `0`),
2779	RHS: I->getOperand(i_nocapture: `1`));
2780	return NonNull;
2781	}
2782
2783	// If `V` is of the form `A + B` then `-1 - V` can be folded into
2784	// `(-1 - B) - A` if we are willing to invert all of the uses.
2785	if (match(V, P: m_Add(L: m_Value(V&: A), R: m_Value(V&: B)))) {
2786	if (auto *BV = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
2787	DoesConsume, Depth))
2788	return Builder ? Builder->CreateSub(LHS: BV, RHS: A) : NonNull;
2789	if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2790	DoesConsume, Depth))
2791	return Builder ? Builder->CreateSub(LHS: AV, RHS: B) : NonNull;
2792	return nullptr;
2793	}
2794
2795	// If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded
2796	// into `A ^ B` if we are willing to invert all of the uses.
2797	if (match(V, P: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B)))) {
2798	if (auto *BV = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
2799	DoesConsume, Depth))
2800	return Builder ? Builder->CreateXor(LHS: A, RHS: BV) : NonNull;
2801	if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2802	DoesConsume, Depth))
2803	return Builder ? Builder->CreateXor(LHS: AV, RHS: B) : NonNull;
2804	return nullptr;
2805	}
2806
2807	// If `V` is of the form `B - A` then `-1 - V` can be folded into
2808	// `A + (-1 - B)` if we are willing to invert all of the uses.
2809	if (match(V, P: m_Sub(L: m_Value(V&: A), R: m_Value(V&: B)))) {
2810	if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2811	DoesConsume, Depth))
2812	return Builder ? Builder->CreateAdd(LHS: AV, RHS: B) : NonNull;
2813	return nullptr;
2814	}
2815
2816	// If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded
2817	// into `A s>> B` if we are willing to invert all of the uses.
2818	if (match(V, P: m_AShr(L: m_Value(V&: A), R: m_Value(V&: B)))) {
2819	if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2820	DoesConsume, Depth))
2821	return Builder ? Builder->CreateAShr(LHS: AV, RHS: B) : NonNull;
2822	return nullptr;
2823	}
2824
2825	Value *Cond;
2826	// LogicOps are special in that we canonicalize them at the cost of an
2827	// instruction.
2828	bool IsSelect = match(V, P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: A), R: m_Value(V&: B))) &&
2829	!shouldAvoidAbsorbingNotIntoSelect(SI: *cast<SelectInst>(Val: V));
2830	// Selects/min/max with invertible operands are freely invertible
2831	if (IsSelect \|\| match(V, P: m_MaxOrMin(L: m_Value(V&: A), R: m_Value(V&: B)))) {
2832	bool LocalDoesConsume = DoesConsume;
2833	if (!getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), /Builder/ nullptr,
2834	DoesConsume&: LocalDoesConsume, Depth))
2835	return nullptr;
2836	if (Value *NotA = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2837	DoesConsume&: LocalDoesConsume, Depth)) {
2838	DoesConsume = LocalDoesConsume;
2839	if (Builder != nullptr) {
2840	Value *NotB = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
2841	DoesConsume, Depth);
2842	assert(NotB != nullptr &&
2843	"Unable to build inverted value for known freely invertable op");
2844	if (auto *II = dyn_cast<IntrinsicInst>(Val: V))
2845	return Builder->CreateBinaryIntrinsic(
2846	ID: getInverseMinMaxIntrinsic(MinMaxID: II->getIntrinsicID()), LHS: NotA, RHS: NotB);
2847	return Builder->CreateSelect(C: Cond, True: NotA, False: NotB);
2848	}
2849	return NonNull;
2850	}
2851	}
2852
2853	if (PHINode *PN = dyn_cast<PHINode>(Val: V)) {
2854	bool LocalDoesConsume = DoesConsume;
2855	SmallVector<std::pair<Value , BasicBlock >, `8`> IncomingValues;
2856	for (Use &U : PN->operands()) {
2857	BasicBlock *IncomingBlock = PN->getIncomingBlock(U);
2858	Value *NewIncomingVal = getFreelyInvertedImpl(
2859	V: U.get(), /WillInvertAllUses=/false,
2860	/Builder=/nullptr, DoesConsume&: LocalDoesConsume, Depth: MaxAnalysisRecursionDepth - `1`);
2861	if (NewIncomingVal == nullptr)
2862	return nullptr;
2863	// Make sure that we can safely erase the original PHI node.
2864	if (NewIncomingVal == V)
2865	return nullptr;
2866	if (Builder != nullptr)
2867	IncomingValues.emplace_back(Args&: NewIncomingVal, Args&: IncomingBlock);
2868	}
2869
2870	DoesConsume = LocalDoesConsume;
2871	if (Builder != nullptr) {
2872	IRBuilderBase::InsertPointGuard Guard(*Builder);
2873	Builder->SetInsertPoint(PN);
2874	PHINode *NewPN =
2875	Builder->CreatePHI(Ty: PN->getType(), NumReservedValues: PN->getNumIncomingValues());
2876	for (auto [Val, Pred] : IncomingValues)
2877	NewPN->addIncoming(V: Val, BB: Pred);
2878	return NewPN;
2879	}
2880	return NonNull;
2881	}
2882
2883	if (match(V, P: m_SExtLike(Op: m_Value(V&: A)))) {
2884	if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2885	DoesConsume, Depth))
2886	return Builder ? Builder->CreateSExt(V: AV, DestTy: V->getType()) : NonNull;
2887	return nullptr;
2888	}
2889
2890	if (match(V, P: m_Trunc(Op: m_Value(V&: A)))) {
2891	if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2892	DoesConsume, Depth))
2893	return Builder ? Builder->CreateTrunc(V: AV, DestTy: V->getType()) : NonNull;
2894	return nullptr;
2895	}
2896
2897	// De Morgan's Laws:
2898	// (~(A \| B)) -> (~A & ~B)
2899	// (~(A & B)) -> (~A \| ~B)
2900	auto TryInvertAndOrUsingDeMorgan = [&](Instruction::BinaryOps Opcode,
2901	bool IsLogical, Value *A,
2902	Value B) -> Value {
2903	bool LocalDoesConsume = DoesConsume;
2904	if (!getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), /Builder=/nullptr,
2905	DoesConsume&: LocalDoesConsume, Depth))
2906	return nullptr;
2907	if (auto *NotA = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2908	DoesConsume&: LocalDoesConsume, Depth)) {
2909	auto *NotB = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
2910	DoesConsume&: LocalDoesConsume, Depth);
2911	DoesConsume = LocalDoesConsume;
2912	if (IsLogical)
2913	return Builder ? Builder->CreateLogicalOp(Opc: Opcode, Cond1: NotA, Cond2: NotB) : NonNull;
2914	return Builder ? Builder->CreateBinOp(Opc: Opcode, LHS: NotA, RHS: NotB) : NonNull;
2915	}
2916
2917	return nullptr;
2918	};
2919
2920	if (match(V, P: m_Or(L: m_Value(V&: A), R: m_Value(V&: B))))
2921	return TryInvertAndOrUsingDeMorgan (Instruction::And, /IsLogical=/false, A,
2922	B);
2923
2924	if (match(V, P: m_And(L: m_Value(V&: A), R: m_Value(V&: B))))
2925	return TryInvertAndOrUsingDeMorgan (Instruction::Or, /IsLogical=/false, A,
2926	B);
2927
2928	if (match(V, P: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B))))
2929	return TryInvertAndOrUsingDeMorgan (Instruction::And, /IsLogical=/true, A,
2930	B);
2931
2932	if (match(V, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B))))
2933	return TryInvertAndOrUsingDeMorgan (Instruction::Or, /IsLogical=/true, A,
2934	B);
2935
2936	return nullptr;
2937	}
2938
2939	/// Return true if we should canonicalize the gep to an i8 ptradd.
2940	static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) {
2941	Value *PtrOp = GEP.getOperand(i_nocapture: `0`);
2942	Type *GEPEltType = GEP.getSourceElementType();
2943	if (GEPEltType->isIntegerTy(Bitwidth: `8`))
2944	return false;
2945
2946	// Canonicalize scalable GEPs to an explicit offset using the llvm.vscale
2947	// intrinsic. This has better support in BasicAA.
2948	if (GEPEltType->isScalableTy())
2949	return true;
2950
2951	// gep i32 p, mul(O, C) -> gep i8, p, mul(O, C4) to fold the two multiplies*
2952	// together.
2953	if (GEP.getNumIndices() == `1` &&
2954	match(V: GEP.getOperand(i_nocapture: `1`),
2955	P: m_OneUse(SubPattern: m_CombineOr(L: m_Mul(L: m_Value(), R: m_ConstantInt()),
2956	R: m_Shl(L: m_Value(), R: m_ConstantInt())))))
2957	return true;
2958
2959	// gep (gep %p, C1), %x, C2 is expanded so the two constants can
2960	// possibly be merged together.
2961	auto PtrOpGep = dyn_cast<GEPOperator>(Val: PtrOp);
2962	return PtrOpGep && PtrOpGep->hasAllConstantIndices() &&
2963	any_of(Range: GEP.indices(), P: [](Value *V) {
2964	const APInt *C;
2965	return match(V, P: m_APInt(Res&: C)) && !C->isZero();
2966	});
2967	}
2968
2969	static Instruction foldGEPOfPhi(GetElementPtrInst &GEP, PHINode PN,
2970	IRBuilderBase &Builder) {
2971	auto *Op1 = dyn_cast<GetElementPtrInst>(Val: PN->getOperand(i_nocapture: `0`));
2972	if (!Op1)
2973	return nullptr;
2974
2975	// Don't fold a GEP into itself through a PHI node. This can only happen
2976	// through the back-edge of a loop. Folding a GEP into itself means that
2977	// the value of the previous iteration needs to be stored in the meantime,
2978	// thus requiring an additional register variable to be live, but not
2979	// actually achieving anything (the GEP still needs to be executed once per
2980	// loop iteration).
2981	if (Op1 == &GEP)
2982	return nullptr;
2983	GEPNoWrapFlags NW = Op1->getNoWrapFlags();
2984
2985	int DI = -`1`;
2986
2987	for (auto I = PN->op_begin()+`1`, E = PN->op_end(); I !=E; ++I) {
2988	auto Op2 = dyn_cast<GetElementPtrInst>(Val&: I);
2989	if (!Op2 \|\| Op1->getNumOperands() != Op2->getNumOperands() \|\|
2990	Op1->getSourceElementType() != Op2->getSourceElementType())
2991	return nullptr;
2992
2993	// As for Op1 above, don't try to fold a GEP into itself.
2994	if (Op2 == &GEP)
2995	return nullptr;
2996
2997	// Keep track of the type as we walk the GEP.
2998	Type CurTy = nullptr*;
2999
3000	for (unsigned J = `0`, F = Op1->getNumOperands(); J != F; ++J) {
3001	if (Op1->getOperand(i_nocapture: J)->getType() != Op2->getOperand(i_nocapture: J)->getType())
3002	return nullptr;
3003
3004	if (Op1->getOperand(i_nocapture: J) != Op2->getOperand(i_nocapture: J)) {
3005	if (DI == -`1`) {
3006	// We have not seen any differences yet in the GEPs feeding the
3007	// PHI yet, so we record this one if it is allowed to be a
3008	// variable.
3009
3010	// The first two arguments can vary for any GEP, the rest have to be
3011	// static for struct slots
3012	if (J > `1`) {
3013	assert(CurTy && "No current type?");
3014	if (CurTy->isStructTy())
3015	return nullptr;
3016	}
3017
3018	DI = J;
3019	} else {
3020	// The GEP is different by more than one input. While this could be
3021	// extended to support GEPs that vary by more than one variable it
3022	// doesn't make sense since it greatly increases the complexity and
3023	// would result in an R+R+R addressing mode which no backend
3024	// directly supports and would need to be broken into several
3025	// simpler instructions anyway.
3026	return nullptr;
3027	}
3028	}
3029
3030	// Sink down a layer of the type for the next iteration.
3031	if (J > `0`) {
3032	if (J == `1`) {
3033	CurTy = Op1->getSourceElementType();
3034	} else {
3035	CurTy =
3036	GetElementPtrInst::getTypeAtIndex(Ty: CurTy, Idx: Op1->getOperand(i_nocapture: J));
3037	}
3038	}
3039	}
3040
3041	NW &= Op2->getNoWrapFlags();
3042	}
3043
3044	// If not all GEPs are identical we'll have to create a new PHI node.
3045	// Check that the old PHI node has only one use so that it will get
3046	// removed.
3047	if (DI != -`1` && !PN->hasOneUse())
3048	return nullptr;
3049
3050	auto *NewGEP = cast<GetElementPtrInst>(Val: Op1->clone());
3051	NewGEP->setNoWrapFlags(NW);
3052
3053	if (DI == -`1`) {
3054	// All the GEPs feeding the PHI are identical. Clone one down into our
3055	// BB so that it can be merged with the current GEP.
3056	} else {
3057	// All the GEPs feeding the PHI differ at a single offset. Clone a GEP
3058	// into the current block so it can be merged, and create a new PHI to
3059	// set that index.
3060	PHINode *NewPN;
3061	{
3062	IRBuilderBase::InsertPointGuard Guard(Builder);
3063	Builder.SetInsertPoint(PN);
3064	NewPN = Builder.CreatePHI(Ty: Op1->getOperand(i_nocapture: DI)->getType(),
3065	NumReservedValues: PN->getNumOperands());
3066	}
3067
3068	for (auto &I : PN->operands())
3069	NewPN->addIncoming(V: cast<GEPOperator>(Val&: I)->getOperand(i_nocapture: DI),
3070	BB: PN->getIncomingBlock(U: I));
3071
3072	NewGEP->setOperand(i_nocapture: DI, Val_nocapture: NewPN);
3073	}
3074
3075	NewGEP->insertBefore(BB&: *GEP.getParent(), InsertPos: GEP.getParent()->getFirstInsertionPt());
3076	return NewGEP;
3077	}
3078
3079	Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
3080	Value *PtrOp = GEP.getOperand(i_nocapture: `0`);
3081	SmallVector<Value *, `8`> Indices(GEP.indices());
3082	Type *GEPType = GEP.getType();
3083	Type *GEPEltType = GEP.getSourceElementType();
3084	if (Value *V =
3085	simplifyGEPInst(SrcTy: GEPEltType, Ptr: PtrOp, Indices, NW: GEP.getNoWrapFlags(),
3086	Q: SQ.getWithInstruction(I: &GEP)))
3087	return replaceInstUsesWith(I&: GEP, V);
3088
3089	// For vector geps, use the generic demanded vector support.
3090	// Skip if GEP return type is scalable. The number of elements is unknown at
3091	// compile-time.
3092	if (auto *GEPFVTy = dyn_cast<FixedVectorType>(Val: GEPType)) {
3093	auto VWidth = GEPFVTy->getNumElements();
3094	APInt PoisonElts(VWidth, `0`);
3095	APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
3096	if (Value *V = SimplifyDemandedVectorElts(V: &GEP, DemandedElts: AllOnesEltMask,
3097	PoisonElts)) {
3098	if (V != &GEP)
3099	return replaceInstUsesWith(I&: GEP, V);
3100	return &GEP;
3101	}
3102	}
3103
3104	// Eliminate unneeded casts for indices, and replace indices which displace
3105	// by multiples of a zero size type with zero.
3106	bool MadeChange = false;
3107
3108	// Index width may not be the same width as pointer width.
3109	// Data layout chooses the right type based on supported integer types.
3110	Type *NewScalarIndexTy =
3111	DL.getIndexType(PtrTy: GEP.getPointerOperandType()->getScalarType());
3112
3113	gep_type_iterator GTI = gep_type_begin(GEP);
3114	for (User::op_iterator I = GEP.op_begin() + `1`, E = GEP.op_end(); I != E;
3115	++I, ++GTI) {
3116	// Skip indices into struct types.
3117	if (GTI.isStruct())
3118	continue;
3119
3120	Type IndexTy = (I)->getType();
3121	Type *NewIndexType =
3122	IndexTy->isVectorTy()
3123	? VectorType::get(ElementType: NewScalarIndexTy,
3124	EC: cast<VectorType>(Val: IndexTy)->getElementCount())
3125	: NewScalarIndexTy;
3126
3127	// If the element type has zero size then any index over it is equivalent
3128	// to an index of zero, so replace it with zero if it is not zero already.
3129	Type *EltTy = GTI.getIndexedType();
3130	if (EltTy->isSized() && DL.getTypeAllocSize(Ty: EltTy).isZero())
3131	if (!isa<Constant>(Val: *I) \|\| !match(V: I->get(), P: m_Zero())) {
3132	*I = Constant::getNullValue(Ty: NewIndexType);
3133	MadeChange = true;
3134	}
3135
3136	if (IndexTy != NewIndexType) {
3137	// If we are using a wider index than needed for this platform, shrink
3138	// it to what we need. If narrower, sign-extend it to what we need.
3139	// This explicit cast can make subsequent optimizations more obvious.
3140	I = Builder.CreateIntCast(V: I, DestTy: NewIndexType, isSigned: true);
3141	MadeChange = true;
3142	}
3143	}
3144	if (MadeChange)
3145	return &GEP;
3146
3147	// Canonicalize constant GEPs to i8 type.
3148	if (!GEPEltType->isIntegerTy(Bitwidth: `8`) && GEP.hasAllConstantIndices()) {
3149	APInt Offset(DL.getIndexTypeSizeInBits(Ty: GEPType), `0`);
3150	if (GEP.accumulateConstantOffset(DL, Offset))
3151	return replaceInstUsesWith(
3152	I&: GEP, V: Builder.CreatePtrAdd(Ptr: PtrOp, Offset: Builder.getInt(AI: Offset), Name: "",
3153	NW: GEP.getNoWrapFlags()));
3154	}
3155
3156	if (shouldCanonicalizeGEPToPtrAdd(GEP)) {
3157	Value *Offset = EmitGEPOffset(GEP: cast<GEPOperator>(Val: &GEP));
3158	Value *NewGEP =
3159	Builder.CreatePtrAdd(Ptr: PtrOp, Offset, Name: "", NW: GEP.getNoWrapFlags());
3160	return replaceInstUsesWith(I&: GEP, V: NewGEP);
3161	}
3162
3163	// Scalarize vector operands; prefer splat-of-gep.as canonical form.
3164	// Note that this looses information about undef lanes; we run it after
3165	// demanded bits to partially mitigate that loss.
3166	if (GEPType->isVectorTy() && llvm::any_of(Range: GEP.operands(), P: [](Value *Op) {
3167	return Op->getType()->isVectorTy() && getSplatValue(V: Op);
3168	})) {
3169	SmallVector<Value *> NewOps;
3170	for (auto &Op : GEP.operands()) {
3171	if (Op ->getType()->isVectorTy())
3172	if (Value *Scalar = getSplatValue(V: Op)) {
3173	NewOps.push_back(Elt: Scalar);
3174	continue;
3175	}
3176	NewOps.push_back(Elt: Op);
3177	}
3178
3179	Value *Res = Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: NewOps [`0`],
3180	IdxList: ArrayRef(NewOps).drop_front(), Name: GEP.getName(),
3181	NW: GEP.getNoWrapFlags());
3182	if (!Res->getType()->isVectorTy()) {
3183	ElementCount EC = cast<VectorType>(Val: GEPType)->getElementCount();
3184	Res = Builder.CreateVectorSplat(EC, V: Res);
3185	}
3186	return replaceInstUsesWith(I&: GEP, V: Res);
3187	}
3188
3189	// Check to see if the inputs to the PHI node are getelementptr instructions.
3190	if (auto *PN = dyn_cast<PHINode>(Val: PtrOp)) {
3191	if (Value *NewPtrOp = foldGEPOfPhi(GEP, PN, Builder))
3192	return replaceOperand(I&: GEP, OpNum: `0`, V: NewPtrOp);
3193	}
3194
3195	if (auto *Src = dyn_cast<GEPOperator>(Val: PtrOp))
3196	if (Instruction *I = visitGEPOfGEP(GEP, Src))
3197	return I;
3198
3199	if (GEP.getNumIndices() == `1`) {
3200	unsigned AS = GEP.getPointerAddressSpace();
3201	if (GEP.getOperand(i_nocapture: `1`)->getType()->getScalarSizeInBits() ==
3202	DL.getIndexSizeInBits(AS)) {
3203	uint64_t TyAllocSize = DL.getTypeAllocSize(Ty: GEPEltType).getFixedValue();
3204
3205	if (TyAllocSize == `1`) {
3206	// Canonicalize (gep i8 X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),*
3207	// but only if the result pointer is only used as if it were an integer,
3208	// or both point to the same underlying object (otherwise provenance is
3209	// not necessarily retained).
3210	Value *X = GEP.getPointerOperand();
3211	Value *Y;
3212	if (match(V: GEP.getOperand(i_nocapture: `1`),
3213	P: m_Sub(L: m_PtrToInt(Op: m_Value(V&: Y)), R: m_PtrToInt(Op: m_Specific(V: X)))) &&
3214	GEPType == Y->getType()) {
3215	bool HasSameUnderlyingObject =
3216	getUnderlyingObject(V: X) == getUnderlyingObject(V: Y);
3217	bool Changed = false;
3218	GEP.replaceUsesWithIf(New: Y, ShouldReplace: [&](Use &U) {
3219	bool ShouldReplace = HasSameUnderlyingObject \|\|
3220	isa<ICmpInst>(Val: U.getUser()) \|\|
3221	isa<PtrToIntInst>(Val: U.getUser());
3222	Changed \|= ShouldReplace;
3223	return ShouldReplace;
3224	});
3225	return Changed ? &GEP : nullptr;
3226	}
3227	} else if (auto *ExactIns =
3228	dyn_cast<PossiblyExactOperator>(Val: GEP.getOperand(i_nocapture: `1`))) {
3229	// Canonicalize (gep T X, V / sizeof(T)) to (gep i8* X, V)*
3230	Value *V;
3231	if (ExactIns->isExact()) {
3232	if ((has_single_bit(Value: TyAllocSize) &&
3233	match(V: GEP.getOperand(i_nocapture: `1`),
3234	P: m_Shr(L: m_Value(V),
3235	R: m_SpecificInt(V: countr_zero(Val: TyAllocSize))))) \|\|
3236	match(V: GEP.getOperand(i_nocapture: `1`),
3237	P: m_IDiv(L: m_Value(V), R: m_SpecificInt(V: TyAllocSize)))) {
3238	return GetElementPtrInst::Create(PointeeType: Builder.getInt8Ty(),
3239	Ptr: GEP.getPointerOperand(), IdxList: V,
3240	NW: GEP.getNoWrapFlags());
3241	}
3242	}
3243	if (ExactIns->isExact() && ExactIns->hasOneUse()) {
3244	// Try to canonicalize non-i8 element type to i8 if the index is an
3245	// exact instruction. If the index is an exact instruction (div/shr)
3246	// with a constant RHS, we can fold the non-i8 element scale into the
3247	// div/shr (similiar to the mul case, just inverted).
3248	const APInt *C;
3249	std::optional<APInt> NewC;
3250	if (has_single_bit(Value: TyAllocSize) &&
3251	match(V: ExactIns, P: m_Shr(L: m_Value(V), R: m_APInt(Res&: C))) &&
3252	C->uge(RHS: countr_zero(Val: TyAllocSize)))
3253	NewC = *C - countr_zero(Val: TyAllocSize);
3254	else if (match(V: ExactIns, P: m_UDiv(L: m_Value(V), R: m_APInt(Res&: C)))) {
3255	APInt Quot;
3256	uint64_t Rem;
3257	APInt::udivrem(LHS: *C, RHS: TyAllocSize, Quotient&: Quot, Remainder&: Rem);
3258	if (Rem == `0`)
3259	NewC = Quot;
3260	} else if (match(V: ExactIns, P: m_SDiv(L: m_Value(V), R: m_APInt(Res&: C)))) {
3261	APInt Quot;
3262	int64_t Rem;
3263	APInt::sdivrem(LHS: *C, RHS: TyAllocSize, Quotient&: Quot, Remainder&: Rem);
3264	// For sdiv we need to make sure we arent creating INT_MIN / -1.
3265	if (!Quot.isAllOnes() && Rem == `0`)
3266	NewC = Quot;
3267	}
3268
3269	if (NewC.has_value()) {
3270	Value *NewOp = Builder.CreateBinOp(
3271	Opc: static_cast<Instruction::BinaryOps>(ExactIns->getOpcode()), LHS: V,
3272	RHS: ConstantInt::get(Ty: V->getType(), V: *NewC));
3273	cast<BinaryOperator>(Val: NewOp)->setIsExact();
3274	return GetElementPtrInst::Create(PointeeType: Builder.getInt8Ty(),
3275	Ptr: GEP.getPointerOperand(), IdxList: NewOp,
3276	NW: GEP.getNoWrapFlags());
3277	}
3278	}
3279	}
3280	}
3281	}
3282	// We do not handle pointer-vector geps here.
3283	if (GEPType->isVectorTy())
3284	return nullptr;
3285
3286	if (!GEP.isInBounds()) {
3287	unsigned IdxWidth =
3288	DL.getIndexSizeInBits(AS: PtrOp->getType()->getPointerAddressSpace());
3289	APInt BasePtrOffset(IdxWidth, `0`);
3290	Value *UnderlyingPtrOp =
3291	PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL, Offset&: BasePtrOffset);
3292	bool CanBeNull, CanBeFreed;
3293	uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes(
3294	DL, CanBeNull, CanBeFreed);
3295	if (!CanBeNull && !CanBeFreed && DerefBytes != `0`) {
3296	if (GEP.accumulateConstantOffset(DL, Offset&: BasePtrOffset) &&
3297	BasePtrOffset.isNonNegative()) {
3298	APInt AllocSize(IdxWidth, DerefBytes);
3299	if (BasePtrOffset.ule(RHS: AllocSize)) {
3300	return GetElementPtrInst::CreateInBounds(
3301	PointeeType: GEP.getSourceElementType(), Ptr: PtrOp, IdxList: Indices, NameStr: GEP.getName());
3302	}
3303	}
3304	}
3305	}
3306
3307	// nusw + nneg -> nuw
3308	if (GEP.hasNoUnsignedSignedWrap() && !GEP.hasNoUnsignedWrap() &&
3309	all_of(Range: GEP.indices(), P: [&](Value *Idx) {
3310	return isKnownNonNegative(V: Idx, SQ: SQ.getWithInstruction(I: &GEP));
3311	})) {
3312	GEP.setNoWrapFlags(GEP.getNoWrapFlags() \| GEPNoWrapFlags::noUnsignedWrap());
3313	return &GEP;
3314	}
3315
3316	// These rewrites are trying to preserve inbounds/nuw attributes. So we want
3317	// to do this after having tried to derive "nuw" above.
3318	if (GEP.getNumIndices() == `1`) {
3319	// Given (gep p, x+y) we want to determine the common nowrap flags for both
3320	// geps if transforming into (gep (gep p, x), y).
3321	auto GetPreservedNoWrapFlags = [&](bool AddIsNUW) {
3322	// We can preserve both "inbounds nuw", "nusw nuw" and "nuw" if we know
3323	// that x + y does not have unsigned wrap.
3324	if (GEP.hasNoUnsignedWrap() && AddIsNUW)
3325	return GEP.getNoWrapFlags();
3326	return GEPNoWrapFlags::none();
3327	};
3328
3329	// Try to replace ADD + GEP with GEP + GEP.
3330	Value Idx1, Idx2;
3331	if (match(V: GEP.getOperand(i_nocapture: `1`),
3332	P: m_OneUse(SubPattern: m_AddLike(L: m_Value(V&: Idx1), R: m_Value(V&: Idx2))))) {
3333	// %idx = add i64 %idx1, %idx2
3334	// %gep = getelementptr i32, ptr %ptr, i64 %idx
3335	// as:
3336	// %newptr = getelementptr i32, ptr %ptr, i64 %idx1
3337	// %newgep = getelementptr i32, ptr %newptr, i64 %idx2
3338	bool NUW = match(V: GEP.getOperand(i_nocapture: `1`), P: m_NUWAddLike(L: m_Value(), R: m_Value()));
3339	GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags (NUW);
3340	auto *NewPtr =
3341	Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: GEP.getPointerOperand(),
3342	IdxList: Idx1, Name: "", NW: NWFlags);
3343	return replaceInstUsesWith(I&: GEP,
3344	V: Builder.CreateGEP(Ty: GEP.getSourceElementType(),
3345	Ptr: NewPtr, IdxList: Idx2, Name: "", NW: NWFlags));
3346	}
3347	ConstantInt *C;
3348	if (match(V: GEP.getOperand(i_nocapture: `1`), P: m_OneUse(SubPattern: m_SExtLike(Op: m_OneUse(SubPattern: m_NSWAddLike(
3349	L: m_Value(V&: Idx1), R: m_ConstantInt(CI&: C))))))) {
3350	// %add = add nsw i32 %idx1, idx2
3351	// %sidx = sext i32 %add to i64
3352	// %gep = getelementptr i32, ptr %ptr, i64 %sidx
3353	// as:
3354	// %newptr = getelementptr i32, ptr %ptr, i32 %idx1
3355	// %newgep = getelementptr i32, ptr %newptr, i32 idx2
3356	bool NUW = match(V: GEP.getOperand(i_nocapture: `1`),
3357	P: m_NNegZExt(Op: m_NUWAddLike(L: m_Value(), R: m_Value())));
3358	GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags (NUW);
3359	auto *NewPtr = Builder.CreateGEP(
3360	Ty: GEP.getSourceElementType(), Ptr: GEP.getPointerOperand(),
3361	IdxList: Builder.CreateSExt(V: Idx1, DestTy: GEP.getOperand(i_nocapture: `1`)->getType()), Name: "", NW: NWFlags);
3362	return replaceInstUsesWith(
3363	I&: GEP,
3364	V: Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: NewPtr,
3365	IdxList: Builder.CreateSExt(V: C, DestTy: GEP.getOperand(i_nocapture: `1`)->getType()),
3366	Name: "", NW: NWFlags));
3367	}
3368	}
3369
3370	if (Instruction *R = foldSelectGEP(GEP, Builder))
3371	return R;
3372
3373	return nullptr;
3374	}
3375
3376	static bool isNeverEqualToUnescapedAlloc(Value V, const* TargetLibraryInfo &TLI,
3377	Instruction *AI) {
3378	if (isa<ConstantPointerNull>(Val: V))
3379	return true;
3380	if (auto *LI = dyn_cast<LoadInst>(Val: V))
3381	return isa<GlobalVariable>(Val: LI->getPointerOperand());
3382	// Two distinct allocations will never be equal.
3383	return isAllocLikeFn(V, TLI: &TLI) && V != AI;
3384	}
3385
3386	/// Given a call CB which uses an address UsedV, return true if we can prove the
3387	/// call's only possible effect is storing to V.
3388	static bool isRemovableWrite(CallBase &CB, Value *UsedV,
3389	const TargetLibraryInfo &TLI) {
3390	if (!CB.use_empty())
3391	// TODO: add recursion if returned attribute is present
3392	return false;
3393
3394	if (CB.isTerminator())
3395	// TODO: remove implementation restriction
3396	return false;
3397
3398	if (!CB.willReturn() \|\| !CB.doesNotThrow())
3399	return false;
3400
3401	// If the only possible side effect of the call is writing to the alloca,
3402	// and the result isn't used, we can safely remove any reads implied by the
3403	// call including those which might read the alloca itself.
3404	std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CI: &CB, TLI);
3405	return Dest && Dest ->Ptr == UsedV;
3406	}
3407
3408	static std::optional<ModRefInfo>
3409	isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakTrackingVH> &Users,
3410	const TargetLibraryInfo &TLI, bool KnowInit) {
3411	SmallVector<Instruction*, `4`> Worklist;
3412	const std::optional<StringRef> Family = getAllocationFamily(I: AI, TLI: &TLI);
3413	Worklist.push_back(Elt: AI);
3414	ModRefInfo Access = KnowInit ? ModRefInfo::NoModRef : ModRefInfo::Mod;
3415
3416	do {
3417	Instruction *PI = Worklist.pop_back_val();
3418	for (User *U : PI->users()) {
3419	Instruction *I = cast<Instruction>(Val: U);
3420	switch (I->getOpcode()) {
3421	default:
3422	// Give up the moment we see something we can't handle.
3423	return std::nullopt;
3424
3425	case Instruction::AddrSpaceCast:
3426	case Instruction::BitCast:
3427	case Instruction::GetElementPtr:
3428	Users.emplace_back(Args&: I);
3429	Worklist.push_back(Elt: I);
3430	continue;
3431
3432	case Instruction::ICmp: {
3433	ICmpInst *ICI = cast<ICmpInst>(Val: I);
3434	// We can fold eq/ne comparisons with null to false/true, respectively.
3435	// We also fold comparisons in some conditions provided the alloc has
3436	// not escaped (see isNeverEqualToUnescapedAlloc).
3437	if (!ICI->isEquality())
3438	return std::nullopt;
3439	unsigned OtherIndex = (ICI->getOperand(i_nocapture: `0`) == PI) ? `1` : `0`;
3440	if (!isNeverEqualToUnescapedAlloc(V: ICI->getOperand(i_nocapture: OtherIndex), TLI, AI))
3441	return std::nullopt;
3442
3443	// Do not fold compares to aligned_alloc calls, as they may have to
3444	// return null in case the required alignment cannot be satisfied,
3445	// unless we can prove that both alignment and size are valid.
3446	auto AlignmentAndSizeKnownValid = [](CallBase *CB) {
3447	// Check if alignment and size of a call to aligned_alloc is valid,
3448	// that is alignment is a power-of-2 and the size is a multiple of the
3449	// alignment.
3450	const APInt *Alignment;
3451	const APInt *Size;
3452	return match(V: CB->getArgOperand(i: `0`), P: m_APInt(Res&: Alignment)) &&
3453	match(V: CB->getArgOperand(i: `1`), P: m_APInt(Res&: Size)) &&
3454	Alignment->isPowerOf2() && Size->urem(RHS: *Alignment).isZero();
3455	};
3456	auto *CB = dyn_cast<CallBase>(Val: AI);
3457	LibFunc TheLibFunc;
3458	if (CB && TLI.getLibFunc(FDecl: *CB->getCalledFunction(), F&: TheLibFunc) &&
3459	TLI.has(F: TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc &&
3460	!AlignmentAndSizeKnownValid (CB))
3461	return std::nullopt;
3462	Users.emplace_back(Args&: I);
3463	continue;
3464	}
3465
3466	case Instruction::Call:
3467	// Ignore no-op and store intrinsics.
3468	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
3469	switch (II->getIntrinsicID()) {
3470	default:
3471	return std::nullopt;
3472
3473	case Intrinsic::memmove:
3474	case Intrinsic::memcpy:
3475	case Intrinsic::memset: {
3476	MemIntrinsic *MI = cast<MemIntrinsic>(Val: II);
3477	if (MI->isVolatile())
3478	return std::nullopt;
3479	// Note: this could also be ModRef, but we can still interpret that
3480	// as just Mod in that case.
3481	ModRefInfo NewAccess =
3482	MI->getRawDest() == PI ? ModRefInfo::Mod : ModRefInfo::Ref;
3483	if ((Access & ~NewAccess) != ModRefInfo::NoModRef)
3484	return std::nullopt;
3485	Access \|= NewAccess;
3486	[[fallthrough]];
3487	}
3488	case Intrinsic::assume:
3489	case Intrinsic::invariant_start:
3490	case Intrinsic::invariant_end:
3491	case Intrinsic::lifetime_start:
3492	case Intrinsic::lifetime_end:
3493	case Intrinsic::objectsize:
3494	Users.emplace_back(Args&: I);
3495	continue;
3496	case Intrinsic::launder_invariant_group:
3497	case Intrinsic::strip_invariant_group:
3498	Users.emplace_back(Args&: I);
3499	Worklist.push_back(Elt: I);
3500	continue;
3501	}
3502	}
3503
3504	if (Family && getFreedOperand(CB: cast<CallBase>(Val: I), TLI: &TLI) == PI &&
3505	getAllocationFamily(I, TLI: &TLI) == Family) {
3506	Users.emplace_back(Args&: I);
3507	continue;
3508	}
3509
3510	if (Family && getReallocatedOperand(CB: cast<CallBase>(Val: I)) == PI &&
3511	getAllocationFamily(I, TLI: &TLI) == Family) {
3512	Users.emplace_back(Args&: I);
3513	Worklist.push_back(Elt: I);
3514	continue;
3515	}
3516
3517	if (!isRefSet(MRI: Access) &&
3518	isRemovableWrite(CB&: *cast<CallBase>(Val: I), UsedV: PI, TLI)) {
3519	Access \|= ModRefInfo::Mod;
3520	Users.emplace_back(Args&: I);
3521	continue;
3522	}
3523
3524	return std::nullopt;
3525
3526	case Instruction::Store: {
3527	StoreInst *SI = cast<StoreInst>(Val: I);
3528	if (SI->isVolatile() \|\| SI->getPointerOperand() != PI)
3529	return std::nullopt;
3530	if (isRefSet(MRI: Access))
3531	return std::nullopt;
3532	Access \|= ModRefInfo::Mod;
3533	Users.emplace_back(Args&: I);
3534	continue;
3535	}
3536
3537	case Instruction::Load: {
3538	LoadInst *LI = cast<LoadInst>(Val: I);
3539	if (LI->isVolatile() \|\| LI->getPointerOperand() != PI)
3540	return std::nullopt;
3541	if (isModSet(MRI: Access))
3542	return std::nullopt;
3543	Access \|= ModRefInfo::Ref;
3544	Users.emplace_back(Args&: I);
3545	continue;
3546	}
3547	}
3548	llvm_unreachable("missing a return?");
3549	}
3550	} while (!Worklist.empty());
3551
3552	assert(Access != ModRefInfo::ModRef);
3553	return Access;
3554	}
3555
3556	Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
3557	assert(isa<AllocaInst>(MI) \|\| isRemovableAlloc(&cast<CallBase>(MI), &TLI));
3558
3559	// If we have a malloc call which is only used in any amount of comparisons to
3560	// null and free calls, delete the calls and replace the comparisons with true
3561	// or false as appropriate.
3562
3563	// This is based on the principle that we can substitute our own allocation
3564	// function (which will never return null) rather than knowledge of the
3565	// specific function being called. In some sense this can change the permitted
3566	// outputs of a program (when we convert a malloc to an alloca, the fact that
3567	// the allocation is now on the stack is potentially visible, for example),
3568	// but we believe in a permissible manner.
3569	SmallVector<WeakTrackingVH, `64`> Users;
3570
3571	// If we are removing an alloca with a dbg.declare, insert dbg.value calls
3572	// before each store.
3573	SmallVector<DbgVariableIntrinsic *, `8`> DVIs;
3574	SmallVector<DbgVariableRecord *, `8`> DVRs;
3575	std::unique_ptr<DIBuilder> DIB;
3576	if (isa<AllocaInst>(Val: MI)) {
3577	findDbgUsers(DbgInsts&: DVIs, V: &MI, DbgVariableRecords: &DVRs);
3578	DIB.reset(p: new DIBuilder (MI.getModule(), /AllowUnresolved=/*false));
3579	}
3580
3581	// Determine what getInitialValueOfAllocation would return without actually
3582	// allocating the result.
3583	bool KnowInitUndef = false;
3584	bool KnowInitZero = false;
3585	Constant *Init =
3586	getInitialValueOfAllocation(V: &MI, TLI: &TLI, Ty: Type::getInt8Ty(C&: MI.getContext()));
3587	if (Init) {
3588	if (isa<UndefValue>(Val: Init))
3589	KnowInitUndef = true;
3590	else if (Init->isNullValue())
3591	KnowInitZero = true;
3592	}
3593	// The various sanitizers don't actually return undef memory, but rather
3594	// memory initialized with special forms of runtime poison
3595	auto &F = *MI.getFunction();
3596	if (F.hasFnAttribute(Kind: Attribute::SanitizeMemory) \|\|
3597	F.hasFnAttribute(Kind: Attribute::SanitizeAddress))
3598	KnowInitUndef = false;
3599
3600	auto Removable =
3601	isAllocSiteRemovable(AI: &MI, Users, TLI, KnowInit: KnowInitZero \| KnowInitUndef);
3602	if (Removable) {
3603	for (WeakTrackingVH &User : Users) {
3604	// Lowering all @llvm.objectsize and MTI calls first because they may use
3605	// a bitcast/GEP of the alloca we are removing.
3606	if (!User)
3607	continue;
3608
3609	Instruction I = cast<Instruction>(Val: &User);
3610
3611	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
3612	if (II->getIntrinsicID() == Intrinsic::objectsize) {
3613	SmallVector<Instruction *> InsertedInstructions;
3614	Value *Result = lowerObjectSizeCall(
3615	ObjectSize: II, DL, TLI: &TLI, AA, /MustSucceed=/true, InsertedInstructions: &InsertedInstructions);
3616	for (Instruction *Inserted : InsertedInstructions)
3617	Worklist.add(I: Inserted);
3618	replaceInstUsesWith(I&: *I, V: Result);
3619	eraseInstFromFunction(I&: *I);
3620	User = nullptr; // Skip examining in the next loop.
3621	continue;
3622	}
3623	if (auto *MTI = dyn_cast<MemTransferInst>(Val: I)) {
3624	if (KnowInitZero && isRefSet(MRI: *Removable)) {
3625	IRBuilderBase::InsertPointGuard Guard(Builder);
3626	Builder.SetInsertPoint(MTI);
3627	auto *M = Builder.CreateMemSet(
3628	Ptr: MTI->getRawDest(),
3629	Val: ConstantInt::get(Ty: Type::getInt8Ty(C&: MI.getContext()), V: `0`),
3630	Size: MTI->getLength(), Align: MTI->getDestAlign());
3631	M->copyMetadata(SrcInst: *MTI);
3632	}
3633	}
3634	}
3635	}
3636	for (WeakTrackingVH &User : Users) {
3637	if (!User)
3638	continue;
3639
3640	Instruction I = cast<Instruction>(Val: &User);
3641
3642	if (ICmpInst *C = dyn_cast<ICmpInst>(Val: I)) {
3643	replaceInstUsesWith(I&: *C,
3644	V: ConstantInt::get(Ty: Type::getInt1Ty(C&: C->getContext()),
3645	V: C->isFalseWhenEqual()));
3646	} else if (auto *SI = dyn_cast<StoreInst>(Val: I)) {
3647	for (auto *DVI : DVIs)
3648	if (DVI->isAddressOfVariable())
3649	ConvertDebugDeclareToDebugValue(DII: DVI, SI, Builder&: *DIB);
3650	for (auto *DVR : DVRs)
3651	if (DVR->isAddressOfVariable())
3652	ConvertDebugDeclareToDebugValue(DVR, SI, Builder&: *DIB);
3653	} else {
3654	// Casts, GEP, or anything else: we're about to delete this instruction,
3655	// so it can not have any valid uses.
3656	Constant *Replace;
3657	if (isa<LoadInst>(Val: I)) {
3658	assert(KnowInitZero \|\| KnowInitUndef);
3659	Replace = KnowInitUndef ? UndefValue::get(T: I->getType())
3660	: Constant::getNullValue(Ty: I->getType());
3661	} else
3662	Replace = PoisonValue::get(T: I->getType());
3663	replaceInstUsesWith(I&: *I, V: Replace);
3664	}
3665	eraseInstFromFunction(I&: *I);
3666	}
3667
3668	if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &MI)) {
3669	// Replace invoke with a NOP intrinsic to maintain the original CFG
3670	Module *M = II->getModule();
3671	Function *F = Intrinsic::getOrInsertDeclaration(M, id: Intrinsic::donothing);
3672	auto *NewII = InvokeInst::Create(
3673	Func: F, IfNormal: II->getNormalDest(), IfException: II->getUnwindDest(), Args: {}, NameStr: "", InsertBefore: II->getParent());
3674	NewII->setDebugLoc(II->getDebugLoc());
3675	}
3676
3677	// Remove debug intrinsics which describe the value contained within the
3678	// alloca. In addition to removing dbg.{declare,addr} which simply point to
3679	// the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
3680	//
3681	// ```
3682	// define void @foo(i32 %0) {
3683	// %a = alloca i32 ; Deleted.
3684	// store i32 %0, i32 %a*
3685	// dbg.value(i32 %0, "arg0") ; Not deleted.
3686	// dbg.value(i32 %a, "arg0", DW_OP_deref) ; Deleted.*
3687	// call void @trivially_inlinable_no_op(i32 %a)*
3688	// ret void
3689	// }
3690	// ```
3691	//
3692	// This may not be required if we stop describing the contents of allocas
3693	// using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
3694	// the LowerDbgDeclare utility.
3695	//
3696	// If there is a dead store to `%a` in @trivially_inlinable_no_op, the
3697	// "arg0" dbg.value may be stale after the call. However, failing to remove
3698	// the DW_OP_deref dbg.value causes large gaps in location coverage.
3699	//
3700	// FIXME: the Assignment Tracking project has now likely made this
3701	// redundant (and it's sometimes harmful).
3702	for (auto *DVI : DVIs)
3703	if (DVI->isAddressOfVariable() \|\| DVI->getExpression()->startsWithDeref())
3704	DVI->eraseFromParent();
3705	for (auto *DVR : DVRs)
3706	if (DVR->isAddressOfVariable() \|\| DVR->getExpression()->startsWithDeref())
3707	DVR->eraseFromParent();
3708
3709	return eraseInstFromFunction(I&: MI);
3710	}
3711	return nullptr;
3712	}
3713
3714	/// Move the call to free before a NULL test.
3715	///
3716	/// Check if this free is accessed after its argument has been test
3717	/// against NULL (property 0).
3718	/// If yes, it is legal to move this call in its predecessor block.
3719	///
3720	/// The move is performed only if the block containing the call to free
3721	/// will be removed, i.e.:
3722	/// 1. it has only one predecessor P, and P has two successors
3723	/// 2. it contains the call, noops, and an unconditional branch
3724	/// 3. its successor is the same as its predecessor's successor
3725	///
3726	/// The profitability is out-of concern here and this function should
3727	/// be called only if the caller knows this transformation would be
3728	/// profitable (e.g., for code size).
3729	static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI,
3730	const DataLayout &DL) {
3731	Value *Op = FI.getArgOperand(i: `0`);
3732	BasicBlock *FreeInstrBB = FI.getParent();
3733	BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
3734
3735	// Validate part of constraint #1: Only one predecessor
3736	// FIXME: We can extend the number of predecessor, but in that case, we
3737	// would duplicate the call to free in each predecessor and it may
3738	// not be profitable even for code size.
3739	if (!PredBB)
3740	return nullptr;
3741
3742	// Validate constraint #2: Does this block contains only the call to
3743	// free, noops, and an unconditional branch?
3744	BasicBlock *SuccBB;
3745	Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
3746	if (!match(V: FreeInstrBBTerminator, P: m_UnconditionalBr(Succ&: SuccBB)))
3747	return nullptr;
3748
3749	// If there are only 2 instructions in the block, at this point,
3750	// this is the call to free and unconditional.
3751	// If there are more than 2 instructions, check that they are noops
3752	// i.e., they won't hurt the performance of the generated code.
3753	if (FreeInstrBB->size() != `2`) {
3754	for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) {
3755	if (&Inst == &FI \|\| &Inst == FreeInstrBBTerminator)
3756	continue;
3757	auto *Cast = dyn_cast<CastInst>(Val: &Inst);
3758	if (!Cast \|\| !Cast->isNoopCast(DL))
3759	return nullptr;
3760	}
3761	}
3762	// Validate the rest of constraint #1 by matching on the pred branch.
3763	Instruction *TI = PredBB->getTerminator();
3764	BasicBlock TrueBB, FalseBB;
3765	CmpPredicate Pred;
3766	if (!match(V: TI, P: m_Br(C: m_ICmp(Pred,
3767	L: m_CombineOr(L: m_Specific(V: Op),
3768	R: m_Specific(V: Op->stripPointerCasts())),
3769	R: m_Zero()),
3770	T&: TrueBB, F&: FalseBB)))
3771	return nullptr;
3772	if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
3773	return nullptr;
3774
3775	// Validate constraint #3: Ensure the null case just falls through.
3776	if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
3777	return nullptr;
3778	assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
3779	"Broken CFG: missing edge from predecessor to successor");
3780
3781	// At this point, we know that everything in FreeInstrBB can be moved
3782	// before TI.
3783	for (Instruction &Instr : llvm::make_early_inc_range(Range&: *FreeInstrBB)) {
3784	if (&Instr == FreeInstrBBTerminator)
3785	break;
3786	Instr.moveBeforePreserving(MovePos: TI->getIterator());
3787	}
3788	assert(FreeInstrBB->size() == `1` &&
3789	"Only the branch instruction should remain");
3790
3791	// Now that we've moved the call to free before the NULL check, we have to
3792	// remove any attributes on its parameter that imply it's non-null, because
3793	// those attributes might have only been valid because of the NULL check, and
3794	// we can get miscompiles if we keep them. This is conservative if non-null is
3795	// also implied by something other than the NULL check, but it's guaranteed to
3796	// be correct, and the conservativeness won't matter in practice, since the
3797	// attributes are irrelevant for the call to free itself and the pointer
3798	// shouldn't be used after the call.
3799	AttributeList Attrs = FI.getAttributes();
3800	Attrs = Attrs.removeParamAttribute(C&: FI.getContext(), ArgNo: `0`, Kind: Attribute::NonNull);
3801	Attribute Dereferenceable = Attrs.getParamAttr(ArgNo: `0`, Kind: Attribute::Dereferenceable);
3802	if (Dereferenceable.isValid()) {
3803	uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
3804	Attrs = Attrs.removeParamAttribute(C&: FI.getContext(), ArgNo: `0`,
3805	Kind: Attribute::Dereferenceable);
3806	Attrs = Attrs.addDereferenceableOrNullParamAttr(C&: FI.getContext(), ArgNo: `0`, Bytes);
3807	}
3808	FI.setAttributes(Attrs);
3809
3810	return &FI;
3811	}
3812
3813	Instruction InstCombinerImpl::visitFree(CallInst &FI, Value Op) {
3814	// free undef -> unreachable.
3815	if (isa<UndefValue>(Val: Op)) {
3816	// Leave a marker since we can't modify the CFG here.
3817	CreateNonTerminatorUnreachable(InsertAt: &FI);
3818	return eraseInstFromFunction(I&: FI);
3819	}
3820
3821	// If we have 'free null' delete the instruction. This can happen in stl code
3822	// when lots of inlining happens.
3823	if (isa<ConstantPointerNull>(Val: Op))
3824	return eraseInstFromFunction(I&: FI);
3825
3826	// If we had free(realloc(...)) with no intervening uses, then eliminate the
3827	// realloc() entirely.
3828	CallInst *CI = dyn_cast<CallInst>(Val: Op);
3829	if (CI && CI->hasOneUse())
3830	if (Value *ReallocatedOp = getReallocatedOperand(CB: CI))
3831	return eraseInstFromFunction(I&: replaceInstUsesWith(I&: CI, V: ReallocatedOp));
3832
3833	// If we optimize for code size, try to move the call to free before the null
3834	// test so that simplify cfg can remove the empty block and dead code
3835	// elimination the branch. I.e., helps to turn something like:
3836	// if (foo) free(foo);
3837	// into
3838	// free(foo);
3839	//
3840	// Note that we can only do this for 'free' and not for any flavor of
3841	// 'operator delete'; there is no 'operator delete' symbol for which we are
3842	// permitted to invent a call, even if we're passing in a null pointer.
3843	if (MinimizeSize) {
3844	LibFunc Func;
3845	if (TLI.getLibFunc(CB: FI, F&: Func) && TLI.has(F: Func) && Func == LibFunc_free)
3846	if (Instruction *I = tryToMoveFreeBeforeNullTest(FI, DL))
3847	return I;
3848	}
3849
3850	return nullptr;
3851	}
3852
3853	Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) {
3854	Value *RetVal = RI.getReturnValue();
3855	if (!RetVal)
3856	return nullptr;
3857
3858	Function *F = RI.getFunction();
3859	Type *RetTy = RetVal->getType();
3860	if (RetTy->isPointerTy()) {
3861	bool HasDereferenceable =
3862	F->getAttributes().getRetDereferenceableBytes() > `0`;
3863	if (F->hasRetAttribute(Kind: Attribute::NonNull) \|\|
3864	(HasDereferenceable &&
3865	!NullPointerIsDefined(F, AS: RetTy->getPointerAddressSpace()))) {
3866	if (Value *V = simplifyNonNullOperand(V: RetVal, HasDereferenceable))
3867	return replaceOperand(I&: RI, OpNum: `0`, V);
3868	}
3869	}
3870
3871	if (!AttributeFuncs::isNoFPClassCompatibleType(Ty: RetTy))
3872	return nullptr;
3873
3874	FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass();
3875	if (ReturnClass == fcNone)
3876	return nullptr;
3877
3878	KnownFPClass KnownClass;
3879	Value *Simplified =
3880	SimplifyDemandedUseFPClass(V: RetVal, DemandedMask: ~ReturnClass, Known&: KnownClass, CxtI: &RI);
3881	if (!Simplified)
3882	return nullptr;
3883
3884	return ReturnInst::Create(C&: RI.getContext(), retVal: Simplified);
3885	}
3886
3887	// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
3888	bool InstCombinerImpl::removeInstructionsBeforeUnreachable(Instruction &I) {
3889	// Try to remove the previous instruction if it must lead to unreachable.
3890	// This includes instructions like stores and "llvm.assume" that may not get
3891	// removed by simple dead code elimination.
3892	bool Changed = false;
3893	while (Instruction *Prev = I.getPrevNonDebugInstruction()) {
3894	// While we theoretically can erase EH, that would result in a block that
3895	// used to start with an EH no longer starting with EH, which is invalid.
3896	// To make it valid, we'd need to fixup predecessors to no longer refer to
3897	// this block, but that changes CFG, which is not allowed in InstCombine.
3898	if (Prev->isEHPad())
3899	break; // Can not drop any more instructions. We're done here.
3900
3901	if (!isGuaranteedToTransferExecutionToSuccessor(I: Prev))
3902	break; // Can not drop any more instructions. We're done here.
3903	// Otherwise, this instruction can be freely erased,
3904	// even if it is not side-effect free.
3905
3906	// A value may still have uses before we process it here (for example, in
3907	// another unreachable block), so convert those to poison.
3908	replaceInstUsesWith(I&: *Prev, V: PoisonValue::get(T: Prev->getType()));
3909	eraseInstFromFunction(I&: *Prev);
3910	Changed = true;
3911	}
3912	return Changed;
3913	}
3914
3915	Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) {
3916	removeInstructionsBeforeUnreachable(I);
3917	return nullptr;
3918	}
3919
3920	Instruction *InstCombinerImpl::visitUnconditionalBranchInst(BranchInst &BI) {
3921	assert(BI.isUnconditional() && "Only for unconditional branches.");
3922
3923	// If this store is the second-to-last instruction in the basic block
3924	// (excluding debug info) and if the block ends with
3925	// an unconditional branch, try to move the store to the successor block.
3926
3927	auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
3928	BasicBlock::iterator FirstInstr = BBI ->getParent()->begin();
3929	do {
3930	if (BBI != FirstInstr)
3931	--BBI;
3932	} while (BBI != FirstInstr && BBI ->isDebugOrPseudoInst());
3933
3934	return dyn_cast<StoreInst>(Val&: BBI);
3935	};
3936
3937	if (StoreInst *SI = GetLastSinkableStore (BasicBlock::iterator (BI)))
3938	if (mergeStoreIntoSuccessor(SI&: *SI))
3939	return &BI;
3940
3941	return nullptr;
3942	}
3943
3944	void InstCombinerImpl::addDeadEdge(BasicBlock From, BasicBlock To,
3945	SmallVectorImpl<BasicBlock *> &Worklist) {
3946	if (!DeadEdges.insert(V: {From, To}).second)
3947	return;
3948
3949	// Replace phi node operands in successor with poison.
3950	for (PHINode &PN : To->phis())
3951	for (Use &U : PN.incoming_values())
3952	if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(Val: U)) {
3953	replaceUse(U, NewValue: PoisonValue::get(T: PN.getType()));
3954	addToWorklist(I: &PN);
3955	MadeIRChange = true;
3956	}
3957
3958	Worklist.push_back(Elt: To);
3959	}
3960
3961	// Under the assumption that I is unreachable, remove it and following
3962	// instructions. Changes are reported directly to MadeIRChange.
3963	void InstCombinerImpl::handleUnreachableFrom(
3964	Instruction I, SmallVectorImpl<BasicBlock > &Worklist) {
3965	BasicBlock *BB = I->getParent();
3966	for (Instruction &Inst : make_early_inc_range(
3967	Range: make_range(x: std::next(x: BB->getTerminator()->getReverseIterator()),
3968	y: std::next(x: I->getReverseIterator())))) {
3969	if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {
3970	replaceInstUsesWith(I&: Inst, V: PoisonValue::get(T: Inst.getType()));
3971	MadeIRChange = true;
3972	}
3973	if (Inst.isEHPad() \|\| Inst.getType()->isTokenTy())
3974	continue;
3975	// RemoveDIs: erase debug-info on this instruction manually.
3976	Inst.dropDbgRecords();
3977	eraseInstFromFunction(I&: Inst);
3978	MadeIRChange = true;
3979	}
3980
3981	SmallVector<Value *> Changed;
3982	if (handleUnreachableTerminator(I: BB->getTerminator(), PoisonedValues&: Changed)) {
3983	MadeIRChange = true;
3984	for (Value *V : Changed)
3985	addToWorklist(I: cast<Instruction>(Val: V));
3986	}
3987
3988	// Handle potentially dead successors.
3989	for (BasicBlock *Succ : successors(BB))
3990	addDeadEdge(From: BB, To: Succ, Worklist);
3991	}
3992
3993	void InstCombinerImpl::handlePotentiallyDeadBlocks(
3994	SmallVectorImpl<BasicBlock *> &Worklist) {
3995	while (!Worklist.empty()) {
3996	BasicBlock *BB = Worklist.pop_back_val();
3997	if (!all_of(Range: predecessors(BB), P: [&](BasicBlock *Pred) {
3998	return DeadEdges.contains(V: {Pred, BB}) \|\| DT.dominates(A: BB, B: Pred);
3999	}))
4000	continue;
4001
4002	handleUnreachableFrom(I: &BB->front(), Worklist);
4003	}
4004	}
4005
4006	void InstCombinerImpl::handlePotentiallyDeadSuccessors(BasicBlock *BB,
4007	BasicBlock *LiveSucc) {
4008	SmallVector<BasicBlock *> Worklist;
4009	for (BasicBlock *Succ : successors(BB)) {
4010	// The live successor isn't dead.
4011	if (Succ == LiveSucc)
4012	continue;
4013
4014	addDeadEdge(From: BB, To: Succ, Worklist);
4015	}
4016
4017	handlePotentiallyDeadBlocks(Worklist);
4018	}
4019
4020	Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) {
4021	if (BI.isUnconditional())
4022	return visitUnconditionalBranchInst(BI);
4023
4024	// Change br (not X), label True, label False to: br X, label False, True
4025	Value *Cond = BI.getCondition();
4026	Value *X;
4027	if (match(V: Cond, P: m_Not(V: m_Value(V&: X))) && !isa<Constant>(Val: X)) {
4028	// Swap Destinations and condition...
4029	BI.swapSuccessors();
4030	if (BPI)
4031	BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
4032	return replaceOperand(I&: BI, OpNum: `0`, V: X);
4033	}
4034
4035	// Canonicalize logical-and-with-invert as logical-or-with-invert.
4036	// This is done by inverting the condition and swapping successors:
4037	// br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X \|\| Y), F, T
4038	Value *Y;
4039	if (isa<SelectInst>(Val: Cond) &&
4040	match(V: Cond,
4041	P: m_OneUse(SubPattern: m_LogicalAnd(L: m_Value(V&: X), R: m_OneUse(SubPattern: m_Not(V: m_Value(V&: Y))))))) {
4042	Value *NotX = Builder.CreateNot(V: X, Name: "not." + X->getName());
4043	Value *Or = Builder.CreateLogicalOr(Cond1: NotX, Cond2: Y);
4044	BI.swapSuccessors();
4045	if (BPI)
4046	BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
4047	return replaceOperand(I&: BI, OpNum: `0`, V: Or);
4048	}
4049
4050	// If the condition is irrelevant, remove the use so that other
4051	// transforms on the condition become more effective.
4052	if (!isa<ConstantInt>(Val: Cond) && BI.getSuccessor(i: `0`) == BI.getSuccessor(i: `1`))
4053	return replaceOperand(I&: BI, OpNum: `0`, V: ConstantInt::getFalse(Ty: Cond->getType()));
4054
4055	// Canonicalize, for example, fcmp_one -> fcmp_oeq.
4056	CmpPredicate Pred;
4057	if (match(V: Cond, P: m_OneUse(SubPattern: m_FCmp(Pred, L: m_Value(), R: m_Value()))) &&
4058	!isCanonicalPredicate(Pred)) {
4059	// Swap destinations and condition.
4060	auto *Cmp = cast<CmpInst>(Val: Cond);
4061	Cmp->setPredicate(CmpInst::getInversePredicate(pred: Pred));
4062	BI.swapSuccessors();
4063	if (BPI)
4064	BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
4065	Worklist.push(I: Cmp);
4066	return &BI;
4067	}
4068
4069	if (isa<UndefValue>(Val: Cond)) {
4070	handlePotentiallyDeadSuccessors(BB: BI.getParent(), /LiveSucc/ nullptr);
4071	return nullptr;
4072	}
4073	if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) {
4074	handlePotentiallyDeadSuccessors(BB: BI.getParent(),
4075	LiveSucc: BI.getSuccessor(i: !CI->getZExtValue()));
4076	return nullptr;
4077	}
4078
4079	// Replace all dominated uses of the condition with true/false
4080	// Ignore constant expressions to avoid iterating over uses on other
4081	// functions.
4082	if (!isa<Constant>(Val: Cond) && BI.getSuccessor(i: `0`) != BI.getSuccessor(i: `1`)) {
4083	for (auto &U : make_early_inc_range(Range: Cond->uses())) {
4084	BasicBlockEdge Edge0(BI.getParent(), BI.getSuccessor(i: `0`));
4085	if (DT.dominates(BBE: Edge0, U)) {
4086	replaceUse(U, NewValue: ConstantInt::getTrue(Ty: Cond->getType()));
4087	addToWorklist(I: cast<Instruction>(Val: U.getUser()));
4088	continue;
4089	}
4090	BasicBlockEdge Edge1(BI.getParent(), BI.getSuccessor(i: `1`));
4091	if (DT.dominates(BBE: Edge1, U)) {
4092	replaceUse(U, NewValue: ConstantInt::getFalse(Ty: Cond->getType()));
4093	addToWorklist(I: cast<Instruction>(Val: U.getUser()));
4094	}
4095	}
4096	}
4097
4098	DC.registerBranch(BI: &BI);
4099	return nullptr;
4100	}
4101
4102	// Replaces (switch (select cond, X, C)/(select cond, C, X)) with (switch X) if
4103	// we can prove that both (switch C) and (switch X) go to the default when cond
4104	// is false/true.
4105	static Value *simplifySwitchOnSelectUsingRanges(SwitchInst &SI,
4106	SelectInst *Select,
4107	bool IsTrueArm) {
4108	unsigned CstOpIdx = IsTrueArm ? `1` : `2`;
4109	auto *C = dyn_cast<ConstantInt>(Val: Select->getOperand(i_nocapture: CstOpIdx));
4110	if (!C)
4111	return nullptr;
4112
4113	BasicBlock *CstBB = SI.findCaseValue(C)->getCaseSuccessor();
4114	if (CstBB != SI.getDefaultDest())
4115	return nullptr;
4116	Value *X = Select->getOperand(i_nocapture: `3` - CstOpIdx);
4117	CmpPredicate Pred;
4118	const APInt *RHSC;
4119	if (!match(V: Select->getCondition(),
4120	P: m_ICmp(Pred, L: m_Specific(V: X), R: m_APInt(Res&: RHSC))))
4121	return nullptr;
4122	if (IsTrueArm)
4123	Pred = ICmpInst::getInversePredicate(pred: Pred);
4124
4125	// See whether we can replace the select with X
4126	ConstantRange CR = ConstantRange::makeExactICmpRegion(Pred, Other: *RHSC);
4127	for (auto Case : SI.cases())
4128	if (!CR.contains(Val: Case.getCaseValue()->getValue()))
4129	return nullptr;
4130
4131	return X;
4132	}
4133
4134	Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) {
4135	Value *Cond = SI.getCondition();
4136	Value *Op0;
4137	ConstantInt *AddRHS;
4138	if (match(V: Cond, P: m_Add(L: m_Value(V&: Op0), R: m_ConstantInt(CI&: AddRHS)))) {
4139	// Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
4140	for (auto Case : SI.cases()) {
4141	Constant *NewCase = ConstantExpr::getSub(C1: Case.getCaseValue(), C2: AddRHS);
4142	assert(isa<ConstantInt>(NewCase) &&
4143	"Result of expression should be constant");
4144	Case.setValue(cast<ConstantInt>(Val: NewCase));
4145	}
4146	return replaceOperand(I&: SI, OpNum: `0`, V: Op0);
4147	}
4148
4149	ConstantInt *SubLHS;
4150	if (match(V: Cond, P: m_Sub(L: m_ConstantInt(CI&: SubLHS), R: m_Value(V&: Op0)))) {
4151	// Change 'switch (1-X) case 1:' into 'switch (X) case 0'.
4152	for (auto Case : SI.cases()) {
4153	Constant *NewCase = ConstantExpr::getSub(C1: SubLHS, C2: Case.getCaseValue());
4154	assert(isa<ConstantInt>(NewCase) &&
4155	"Result of expression should be constant");
4156	Case.setValue(cast<ConstantInt>(Val: NewCase));
4157	}
4158	return replaceOperand(I&: SI, OpNum: `0`, V: Op0);
4159	}
4160
4161	uint64_t ShiftAmt;
4162	if (match(V: Cond, P: m_Shl(L: m_Value(V&: Op0), R: m_ConstantInt(V&: ShiftAmt))) &&
4163	ShiftAmt < Op0->getType()->getScalarSizeInBits() &&
4164	all_of(Range: SI.cases(), P: [&](const auto &Case) {
4165	return Case.getCaseValue()->getValue().countr_zero() >= ShiftAmt;
4166	})) {
4167	// Change 'switch (X << 2) case 4:' into 'switch (X) case 1:'.
4168	OverflowingBinaryOperator *Shl = cast<OverflowingBinaryOperator>(Val: Cond);
4169	if (Shl->hasNoUnsignedWrap() \|\| Shl->hasNoSignedWrap() \|\|
4170	Shl->hasOneUse()) {
4171	Value *NewCond = Op0;
4172	if (!Shl->hasNoUnsignedWrap() && !Shl->hasNoSignedWrap()) {
4173	// If the shift may wrap, we need to mask off the shifted bits.
4174	unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
4175	NewCond = Builder.CreateAnd(
4176	LHS: Op0, RHS: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - ShiftAmt));
4177	}
4178	for (auto Case : SI.cases()) {
4179	const APInt &CaseVal = Case.getCaseValue()->getValue();
4180	APInt ShiftedCase = Shl->hasNoSignedWrap() ? CaseVal.ashr(ShiftAmt)
4181	: CaseVal.lshr(shiftAmt: ShiftAmt);
4182	Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: ShiftedCase));
4183	}
4184	return replaceOperand(I&: SI, OpNum: `0`, V: NewCond);
4185	}
4186	}
4187
4188	// Fold switch(zext/sext(X)) into switch(X) if possible.
4189	if (match(V: Cond, P: m_ZExtOrSExt(Op: m_Value(V&: Op0)))) {
4190	bool IsZExt = isa<ZExtInst>(Val: Cond);
4191	Type *SrcTy = Op0->getType();
4192	unsigned NewWidth = SrcTy->getScalarSizeInBits();
4193
4194	if (all_of(Range: SI.cases(), P: [&](const auto &Case) {
4195	const APInt &CaseVal = Case.getCaseValue()->getValue();
4196	return IsZExt ? CaseVal.isIntN(N: NewWidth)
4197	: CaseVal.isSignedIntN(N: NewWidth);
4198	})) {
4199	for (auto &Case : SI.cases()) {
4200	APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(width: NewWidth);
4201	Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: TruncatedCase));
4202	}
4203	return replaceOperand(I&: SI, OpNum: `0`, V: Op0);
4204	}
4205	}
4206
4207	// Fold switch(select cond, X, Y) into switch(X/Y) if possible
4208	if (auto *Select = dyn_cast<SelectInst>(Val: Cond)) {
4209	if (Value *V =
4210	simplifySwitchOnSelectUsingRanges(SI, Select, /IsTrueArm=/true))
4211	return replaceOperand(I&: SI, OpNum: `0`, V);
4212	if (Value *V =
4213	simplifySwitchOnSelectUsingRanges(SI, Select, /IsTrueArm=/false))
4214	return replaceOperand(I&: SI, OpNum: `0`, V);
4215	}
4216
4217	KnownBits Known = computeKnownBits(V: Cond, CxtI: &SI);
4218	unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
4219	unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
4220
4221	// Compute the number of leading bits we can ignore.
4222	// TODO: A better way to determine this would use ComputeNumSignBits().
4223	for (const auto &C : SI.cases()) {
4224	LeadingKnownZeros =
4225	std::min(a: LeadingKnownZeros, b: C.getCaseValue()->getValue().countl_zero());
4226	LeadingKnownOnes =
4227	std::min(a: LeadingKnownOnes, b: C.getCaseValue()->getValue().countl_one());
4228	}
4229
4230	unsigned NewWidth = Known.getBitWidth() - std::max(a: LeadingKnownZeros, b: LeadingKnownOnes);
4231
4232	// Shrink the condition operand if the new type is smaller than the old type.
4233	// But do not shrink to a non-standard type, because backend can't generate
4234	// good code for that yet.
4235	// TODO: We can make it aggressive again after fixing PR39569.
4236	if (NewWidth > `0` && NewWidth < Known.getBitWidth() &&
4237	shouldChangeType(FromWidth: Known.getBitWidth(), ToWidth: NewWidth)) {
4238	IntegerType *Ty = IntegerType::get(C&: SI.getContext(), NumBits: NewWidth);
4239	Builder.SetInsertPoint(&SI);
4240	Value *NewCond = Builder.CreateTrunc(V: Cond, DestTy: Ty, Name: "trunc");
4241
4242	for (auto Case : SI.cases()) {
4243	APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(width: NewWidth);
4244	Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: TruncatedCase));
4245	}
4246	return replaceOperand(I&: SI, OpNum: `0`, V: NewCond);
4247	}
4248
4249	if (isa<UndefValue>(Val: Cond)) {
4250	handlePotentiallyDeadSuccessors(BB: SI.getParent(), /LiveSucc/ nullptr);
4251	return nullptr;
4252	}
4253	if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) {
4254	handlePotentiallyDeadSuccessors(BB: SI.getParent(),
4255	LiveSucc: SI.findCaseValue(C: CI)->getCaseSuccessor());
4256	return nullptr;
4257	}
4258
4259	return nullptr;
4260	}
4261
4262	Instruction *
4263	InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
4264	auto *WO = dyn_cast<WithOverflowInst>(Val: EV.getAggregateOperand());
4265	if (!WO)
4266	return nullptr;
4267
4268	Intrinsic::ID OvID = WO->getIntrinsicID();
4269	const APInt C = nullptr*;
4270	if (match(V: WO->getRHS(), P: m_APIntAllowPoison(Res&: C))) {
4271	if (*EV.idx_begin() == `0` && (OvID == Intrinsic::smul_with_overflow \|\|
4272	OvID == Intrinsic::umul_with_overflow)) {
4273	// extractvalue (any_mul_with_overflow X, -1), 0 --> -X
4274	if (C->isAllOnes())
4275	return BinaryOperator::CreateNeg(Op: WO->getLHS());
4276	// extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
4277	if (C->isPowerOf2()) {
4278	return BinaryOperator::CreateShl(
4279	V1: WO->getLHS(),
4280	V2: ConstantInt::get(Ty: WO->getLHS()->getType(), V: C->logBase2()));
4281	}
4282	}
4283	}
4284
4285	// We're extracting from an overflow intrinsic. See if we're the only user.
4286	// That allows us to simplify multiple result intrinsics to simpler things
4287	// that just get one value.
4288	if (!WO->hasOneUse())
4289	return nullptr;
4290
4291	// Check if we're grabbing only the result of a 'with overflow' intrinsic
4292	// and replace it with a traditional binary instruction.
4293	if (*EV.idx_begin() == `0`) {
4294	Instruction::BinaryOps BinOp = WO->getBinaryOp();
4295	Value LHS = WO->getLHS(), RHS = WO->getRHS();
4296	// Replace the old instruction's uses with poison.
4297	replaceInstUsesWith(I&: *WO, V: PoisonValue::get(T: WO->getType()));
4298	eraseInstFromFunction(I&: *WO);
4299	return BinaryOperator::Create(Op: BinOp, S1: LHS, S2: RHS);
4300	}
4301
4302	assert(*EV.idx_begin() == `1` && "Unexpected extract index for overflow inst");
4303
4304	// (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
4305	if (OvID == Intrinsic::usub_with_overflow)
4306	return new ICmpInst (ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
4307
4308	// smul with i1 types overflows when both sides are set: -1 -1 == +1, but*
4309	// +1 is not possible because we assume signed values.
4310	if (OvID == Intrinsic::smul_with_overflow &&
4311	WO->getLHS()->getType()->isIntOrIntVectorTy(BitWidth: `1`))
4312	return BinaryOperator::CreateAnd(V1: WO->getLHS(), V2: WO->getRHS());
4313
4314	// extractvalue (umul_with_overflow X, X), 1 -> X u> 2^(N/2)-1
4315	if (OvID == Intrinsic::umul_with_overflow && WO->getLHS() == WO->getRHS()) {
4316	unsigned BitWidth = WO->getLHS()->getType()->getScalarSizeInBits();
4317	// Only handle even bitwidths for performance reasons.
4318	if (BitWidth % `2` == `0`)
4319	return new ICmpInst (
4320	ICmpInst::ICMP_UGT, WO->getLHS(),
4321	ConstantInt::get(Ty: WO->getLHS()->getType(),
4322	V: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth / `2`)));
4323	}
4324
4325	// If only the overflow result is used, and the right hand side is a
4326	// constant (or constant splat), we can remove the intrinsic by directly
4327	// checking for overflow.
4328	if (C) {
4329	// Compute the no-wrap range for LHS given RHS=C, then construct an
4330	// equivalent icmp, potentially using an offset.
4331	ConstantRange NWR = ConstantRange::makeExactNoWrapRegion(
4332	BinOp: WO->getBinaryOp(), Other: *C, NoWrapKind: WO->getNoWrapKind());
4333
4334	CmpInst::Predicate Pred;
4335	APInt NewRHSC, Offset;
4336	NWR.getEquivalentICmp(Pred, RHS&: NewRHSC, Offset);
4337	auto *OpTy = WO->getRHS()->getType();
4338	auto *NewLHS = WO->getLHS();
4339	if (Offset != `0`)
4340	NewLHS = Builder.CreateAdd(LHS: NewLHS, RHS: ConstantInt::get(Ty: OpTy, V: Offset));
4341	return new ICmpInst (ICmpInst::getInversePredicate(pred: Pred), NewLHS,
4342	ConstantInt::get(Ty: OpTy, V: NewRHSC));
4343	}
4344
4345	return nullptr;
4346	}
4347
4348	static Value foldFrexpOfSelect(ExtractValueInst &EV, IntrinsicInst FrexpCall,
4349	SelectInst *SelectInst,
4350	InstCombiner::BuilderTy &Builder) {
4351	// Helper to fold frexp of select to select of frexp.
4352
4353	if (!SelectInst->hasOneUse() \|\| !FrexpCall->hasOneUse())
4354	return nullptr;
4355	Value *Cond = SelectInst->getCondition();
4356	Value *TrueVal = SelectInst->getTrueValue();
4357	Value *FalseVal = SelectInst->getFalseValue();
4358
4359	const APFloat ConstVal = nullptr*;
4360	Value VarOp = nullptr*;
4361	bool ConstIsTrue = false;
4362
4363	if (match(V: TrueVal, P: m_APFloat(Res&: ConstVal))) {
4364	VarOp = FalseVal;
4365	ConstIsTrue = true;
4366	} else if (match(V: FalseVal, P: m_APFloat(Res&: ConstVal))) {
4367	VarOp = TrueVal;
4368	ConstIsTrue = false;
4369	} else {
4370	return nullptr;
4371	}
4372
4373	Builder.SetInsertPoint(&EV);
4374
4375	CallInst *NewFrexp =
4376	Builder.CreateCall(Callee: FrexpCall->getCalledFunction(), Args: {VarOp}, Name: "frexp");
4377	NewFrexp->copyIRFlags(V: FrexpCall);
4378
4379	Value *NewEV = Builder.CreateExtractValue(Agg: NewFrexp, Idxs: `0`, Name: "mantissa");
4380
4381	int Exp;
4382	APFloat Mantissa = frexp(X: *ConstVal, Exp, RM: APFloat::rmNearestTiesToEven);
4383
4384	Constant *ConstantMantissa = ConstantFP::get(Ty: TrueVal->getType(), V: Mantissa);
4385
4386	Value *NewSel = Builder.CreateSelectFMF(
4387	C: Cond, True: ConstIsTrue ? ConstantMantissa : NewEV,
4388	False: ConstIsTrue ? NewEV : ConstantMantissa, FMFSource: SelectInst, Name: "select.frexp");
4389	return NewSel;
4390	}
4391	Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) {
4392	Value *Agg = EV.getAggregateOperand();
4393
4394	if (!EV.hasIndices())
4395	return replaceInstUsesWith(I&: EV, V: Agg);
4396
4397	if (Value *V = simplifyExtractValueInst(Agg, Idxs: EV.getIndices(),
4398	Q: SQ.getWithInstruction(I: &EV)))
4399	return replaceInstUsesWith(I&: EV, V);
4400
4401	Value Cond, TrueVal, *FalseVal;
4402	if (match(V: &EV, P: m_ExtractValue<`0`>(V: m_Intrinsic<Intrinsic::frexp>(Op0: m_Select(
4403	C: m_Value(V&: Cond), L: m_Value(V&: TrueVal), R: m_Value(V&: FalseVal)))))) {
4404	auto *SelInst =
4405	cast<SelectInst>(Val: cast<IntrinsicInst>(Val: Agg)->getArgOperand(i: `0`));
4406	if (Value *Result =
4407	foldFrexpOfSelect(EV, FrexpCall: cast<IntrinsicInst>(Val: Agg), SelectInst: SelInst, Builder))
4408	return replaceInstUsesWith(I&: EV, V: Result);
4409	}
4410	if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Val: Agg)) {
4411	// We're extracting from an insertvalue instruction, compare the indices
4412	const unsigned exti, exte, insi, inse;
4413	for (exti = EV.idx_begin(), insi = IV->idx_begin(),
4414	exte = EV.idx_end(), inse = IV->idx_end();
4415	exti != exte && insi != inse;
4416	++exti, ++insi) {
4417	if (insi != exti)
4418	// The insert and extract both reference distinctly different elements.
4419	// This means the extract is not influenced by the insert, and we can
4420	// replace the aggregate operand of the extract with the aggregate
4421	// operand of the insert. i.e., replace
4422	// %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
4423	// %E = extractvalue { i32, { i32 } } %I, 0
4424	// with
4425	// %E = extractvalue { i32, { i32 } } %A, 0
4426	return ExtractValueInst::Create(Agg: IV->getAggregateOperand(),
4427	Idxs: EV.getIndices());
4428	}
4429	if (exti == exte && insi == inse)
4430	// Both iterators are at the end: Index lists are identical. Replace
4431	// %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
4432	// %C = extractvalue { i32, { i32 } } %B, 1, 0
4433	// with "i32 42"
4434	return replaceInstUsesWith(I&: EV, V: IV->getInsertedValueOperand());
4435	if (exti == exte) {
4436	// The extract list is a prefix of the insert list. i.e. replace
4437	// %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
4438	// %E = extractvalue { i32, { i32 } } %I, 1
4439	// with
4440	// %X = extractvalue { i32, { i32 } } %A, 1
4441	// %E = insertvalue { i32 } %X, i32 42, 0
4442	// by switching the order of the insert and extract (though the
4443	// insertvalue should be left in, since it may have other uses).
4444	Value *NewEV = Builder.CreateExtractValue(Agg: IV->getAggregateOperand(),
4445	Idxs: EV.getIndices());
4446	return InsertValueInst::Create(Agg: NewEV, Val: IV->getInsertedValueOperand(),
4447	Idxs: ArrayRef(insi, inse));
4448	}
4449	if (insi == inse)
4450	// The insert list is a prefix of the extract list
4451	// We can simply remove the common indices from the extract and make it
4452	// operate on the inserted value instead of the insertvalue result.
4453	// i.e., replace
4454	// %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
4455	// %E = extractvalue { i32, { i32 } } %I, 1, 0
4456	// with
4457	// %E extractvalue { i32 } { i32 42 }, 0
4458	return ExtractValueInst::Create(Agg: IV->getInsertedValueOperand(),
4459	Idxs: ArrayRef(exti, exte));
4460	}
4461
4462	if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
4463	return R;
4464
4465	if (LoadInst *L = dyn_cast<LoadInst>(Val: Agg)) {
4466	// Bail out if the aggregate contains scalable vector type
4467	if (auto *STy = dyn_cast<StructType>(Val: Agg->getType());
4468	STy && STy->isScalableTy())
4469	return nullptr;
4470
4471	// If the (non-volatile) load only has one use, we can rewrite this to a
4472	// load from a GEP. This reduces the size of the load. If a load is used
4473	// only by extractvalue instructions then this either must have been
4474	// optimized before, or it is a struct with padding, in which case we
4475	// don't want to do the transformation as it loses padding knowledge.
4476	if (L->isSimple() && L->hasOneUse()) {
4477	// extractvalue has integer indices, getelementptr has Values. Convert.*
4478	SmallVector<Value*, `4`> Indices;
4479	// Prefix an i32 0 since we need the first element.
4480	Indices.push_back(Elt: Builder.getInt32(C: `0`));
4481	for (unsigned Idx : EV.indices())
4482	Indices.push_back(Elt: Builder.getInt32(C: Idx));
4483
4484	// We need to insert these at the location of the old load, not at that of
4485	// the extractvalue.
4486	Builder.SetInsertPoint(L);
4487	Value *GEP = Builder.CreateInBoundsGEP(Ty: L->getType(),
4488	Ptr: L->getPointerOperand(), IdxList: Indices);
4489	Instruction *NL = Builder.CreateLoad(Ty: EV.getType(), Ptr: GEP);
4490	// Whatever aliasing information we had for the orignal load must also
4491	// hold for the smaller load, so propagate the annotations.
4492	NL->setAAMetadata(L->getAAMetadata());
4493	// Returning the load directly will cause the main loop to insert it in
4494	// the wrong spot, so use replaceInstUsesWith().
4495	return replaceInstUsesWith(I&: EV, V: NL);
4496	}
4497	}
4498
4499	if (auto *PN = dyn_cast<PHINode>(Val: Agg))
4500	if (Instruction *Res = foldOpIntoPhi(I&: EV, PN))
4501	return Res;
4502
4503	// Canonicalize extract (select Cond, TV, FV)
4504	// -> select cond, (extract TV), (extract FV)
4505	if (auto *SI = dyn_cast<SelectInst>(Val: Agg))
4506	if (Instruction R = FoldOpIntoSelect(Op&: EV, SI, /FoldWithMultiUse=/*true))
4507	return R;
4508
4509	// We could simplify extracts from other values. Note that nested extracts may
4510	// already be simplified implicitly by the above: extract (extract (insert) )
4511	// will be translated into extract ( insert ( extract ) ) first and then just
4512	// the value inserted, if appropriate. Similarly for extracts from single-use
4513	// loads: extract (extract (load)) will be translated to extract (load (gep))
4514	// and if again single-use then via load (gep (gep)) to load (gep).
4515	// However, double extracts from e.g. function arguments or return values
4516	// aren't handled yet.
4517	return nullptr;
4518	}
4519
4520	/// Return 'true' if the given typeinfo will match anything.
4521	static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
4522	switch (Personality) {
4523	case EHPersonality::GNU_C:
4524	case EHPersonality::GNU_C_SjLj:
4525	case EHPersonality::Rust:
4526	// The GCC C EH and Rust personality only exists to support cleanups, so
4527	// it's not clear what the semantics of catch clauses are.
4528	return false;
4529	case EHPersonality::Unknown:
4530	return false;
4531	case EHPersonality::GNU_Ada:
4532	// While __gnat_all_others_value will match any Ada exception, it doesn't
4533	// match foreign exceptions (or didn't, before gcc-4.7).
4534	return false;
4535	case EHPersonality::GNU_CXX:
4536	case EHPersonality::GNU_CXX_SjLj:
4537	case EHPersonality::GNU_ObjC:
4538	case EHPersonality::MSVC_X86SEH:
4539	case EHPersonality::MSVC_TableSEH:
4540	case EHPersonality::MSVC_CXX:
4541	case EHPersonality::CoreCLR:
4542	case EHPersonality::Wasm_CXX:
4543	case EHPersonality::XL_CXX:
4544	case EHPersonality::ZOS_CXX:
4545	return TypeInfo->isNullValue();
4546	}
4547	llvm_unreachable("invalid enum");
4548	}
4549
4550	static bool shorter_filter(const Value LHS, const* Value *RHS) {
4551	return
4552	cast<ArrayType>(Val: LHS->getType())->getNumElements()
4553	<
4554	cast<ArrayType>(Val: RHS->getType())->getNumElements();
4555	}
4556
4557	Instruction *InstCombinerImpl::visitLandingPadInst(LandingPadInst &LI) {
4558	// The logic here should be correct for any real-world personality function.
4559	// However if that turns out not to be true, the offending logic can always
4560	// be conditioned on the personality function, like the catch-all logic is.
4561	EHPersonality Personality =
4562	classifyEHPersonality(Pers: LI.getParent()->getParent()->getPersonalityFn());
4563
4564	// Simplify the list of clauses, eg by removing repeated catch clauses
4565	// (these are often created by inlining).
4566	bool MakeNewInstruction = false; // If true, recreate using the following:
4567	SmallVector<Constant , `16`> NewClauses; // - Clauses for the new instruction;*
4568	bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
4569
4570	SmallPtrSet<Value , `16`> AlreadyCaught; // Typeinfos known caught already.*
4571	for (unsigned i = `0`, e = LI.getNumClauses(); i != e; ++i) {
4572	bool isLastClause = i + `1` == e;
4573	if (LI.isCatch(Idx: i)) {
4574	// A catch clause.
4575	Constant *CatchClause = LI.getClause(Idx: i);
4576	Constant *TypeInfo = CatchClause->stripPointerCasts();
4577
4578	// If we already saw this clause, there is no point in having a second
4579	// copy of it.
4580	if (AlreadyCaught.insert(Ptr: TypeInfo).second) {
4581	// This catch clause was not already seen.
4582	NewClauses.push_back(Elt: CatchClause);
4583	} else {
4584	// Repeated catch clause - drop the redundant copy.
4585	MakeNewInstruction = true;
4586	}
4587
4588	// If this is a catch-all then there is no point in keeping any following
4589	// clauses or marking the landingpad as having a cleanup.
4590	if (isCatchAll(Personality, TypeInfo)) {
4591	if (!isLastClause)
4592	MakeNewInstruction = true;
4593	CleanupFlag = false;
4594	break;
4595	}
4596	} else {
4597	// A filter clause. If any of the filter elements were already caught
4598	// then they can be dropped from the filter. It is tempting to try to
4599	// exploit the filter further by saying that any typeinfo that does not
4600	// occur in the filter can't be caught later (and thus can be dropped).
4601	// However this would be wrong, since typeinfos can match without being
4602	// equal (for example if one represents a C++ class, and the other some
4603	// class derived from it).
4604	assert(LI.isFilter(i) && "Unsupported landingpad clause!");
4605	Constant *FilterClause = LI.getClause(Idx: i);
4606	ArrayType *FilterType = cast<ArrayType>(Val: FilterClause->getType());
4607	unsigned NumTypeInfos = FilterType->getNumElements();
4608
4609	// An empty filter catches everything, so there is no point in keeping any
4610	// following clauses or marking the landingpad as having a cleanup. By
4611	// dealing with this case here the following code is made a bit simpler.
4612	if (!NumTypeInfos) {
4613	NewClauses.push_back(Elt: FilterClause);
4614	if (!isLastClause)
4615	MakeNewInstruction = true;
4616	CleanupFlag = false;
4617	break;
4618	}
4619
4620	bool MakeNewFilter = false; // If true, make a new filter.
4621	SmallVector<Constant , `16`> NewFilterElts; // New elements.*
4622	if (isa<ConstantAggregateZero>(Val: FilterClause)) {
4623	// Not an empty filter - it contains at least one null typeinfo.
4624	assert(NumTypeInfos > `0` && "Should have handled empty filter already!");
4625	Constant *TypeInfo =
4626	Constant::getNullValue(Ty: FilterType->getElementType());
4627	// If this typeinfo is a catch-all then the filter can never match.
4628	if (isCatchAll(Personality, TypeInfo)) {
4629	// Throw the filter away.
4630	MakeNewInstruction = true;
4631	continue;
4632	}
4633
4634	// There is no point in having multiple copies of this typeinfo, so
4635	// discard all but the first copy if there is more than one.
4636	NewFilterElts.push_back(Elt: TypeInfo);
4637	if (NumTypeInfos > `1`)
4638	MakeNewFilter = true;
4639	} else {
4640	ConstantArray *Filter = cast<ConstantArray>(Val: FilterClause);
4641	SmallPtrSet<Value , `16`> SeenInFilter; // For uniquing the elements.*
4642	NewFilterElts.reserve(N: NumTypeInfos);
4643
4644	// Remove any filter elements that were already caught or that already
4645	// occurred in the filter. While there, see if any of the elements are
4646	// catch-alls. If so, the filter can be discarded.
4647	bool SawCatchAll = false;
4648	for (unsigned j = `0`; j != NumTypeInfos; ++j) {
4649	Constant *Elt = Filter->getOperand(i_nocapture: j);
4650	Constant *TypeInfo = Elt->stripPointerCasts();
4651	if (isCatchAll(Personality, TypeInfo)) {
4652	// This element is a catch-all. Bail out, noting this fact.
4653	SawCatchAll = true;
4654	break;
4655	}
4656
4657	// Even if we've seen a type in a catch clause, we don't want to
4658	// remove it from the filter. An unexpected type handler may be
4659	// set up for a call site which throws an exception of the same
4660	// type caught. In order for the exception thrown by the unexpected
4661	// handler to propagate correctly, the filter must be correctly
4662	// described for the call site.
4663	//
4664	// Example:
4665	//
4666	// void unexpected() { throw 1;}
4667	// void foo() throw (int) {
4668	// std::set_unexpected(unexpected);
4669	// try {
4670	// throw 2.0;
4671	// } catch (int i) {}
4672	// }
4673
4674	// There is no point in having multiple copies of the same typeinfo in
4675	// a filter, so only add it if we didn't already.
4676	if (SeenInFilter.insert(Ptr: TypeInfo).second)
4677	NewFilterElts.push_back(Elt: cast<Constant>(Val: Elt));
4678	}
4679	// A filter containing a catch-all cannot match anything by definition.
4680	if (SawCatchAll) {
4681	// Throw the filter away.
4682	MakeNewInstruction = true;
4683	continue;
4684	}
4685
4686	// If we dropped something from the filter, make a new one.
4687	if (NewFilterElts.size() < NumTypeInfos)
4688	MakeNewFilter = true;
4689	}
4690	if (MakeNewFilter) {
4691	FilterType = ArrayType::get(ElementType: FilterType->getElementType(),
4692	NumElements: NewFilterElts.size());
4693	FilterClause = ConstantArray::get(T: FilterType, V: NewFilterElts);
4694	MakeNewInstruction = true;
4695	}
4696
4697	NewClauses.push_back(Elt: FilterClause);
4698
4699	// If the new filter is empty then it will catch everything so there is
4700	// no point in keeping any following clauses or marking the landingpad
4701	// as having a cleanup. The case of the original filter being empty was
4702	// already handled above.
4703	if (MakeNewFilter && !NewFilterElts.size()) {
4704	assert(MakeNewInstruction && "New filter but not a new instruction!");
4705	CleanupFlag = false;
4706	break;
4707	}
4708	}
4709	}
4710
4711	// If several filters occur in a row then reorder them so that the shortest
4712	// filters come first (those with the smallest number of elements). This is
4713	// advantageous because shorter filters are more likely to match, speeding up
4714	// unwinding, but mostly because it increases the effectiveness of the other
4715	// filter optimizations below.
4716	for (unsigned i = `0`, e = NewClauses.size(); i + `1` < e; ) {
4717	unsigned j;
4718	// Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
4719	for (j = i; j != e; ++j)
4720	if (!isa<ArrayType>(Val: NewClauses [j]->getType()))
4721	break;
4722
4723	// Check whether the filters are already sorted by length. We need to know
4724	// if sorting them is actually going to do anything so that we only make a
4725	// new landingpad instruction if it does.
4726	for (unsigned k = i; k + `1` < j; ++k)
4727	if (shorter_filter(LHS: NewClauses [k+`1`], RHS: NewClauses [k])) {
4728	// Not sorted, so sort the filters now. Doing an unstable sort would be
4729	// correct too but reordering filters pointlessly might confuse users.
4730	std::stable_sort(first: NewClauses.begin() + i, last: NewClauses.begin() + j,
4731	comp: shorter_filter);
4732	MakeNewInstruction = true;
4733	break;
4734	}
4735
4736	// Look for the next batch of filters.
4737	i = j + `1`;
4738	}
4739
4740	// If typeinfos matched if and only if equal, then the elements of a filter L
4741	// that occurs later than a filter F could be replaced by the intersection of
4742	// the elements of F and L. In reality two typeinfos can match without being
4743	// equal (for example if one represents a C++ class, and the other some class
4744	// derived from it) so it would be wrong to perform this transform in general.
4745	// However the transform is correct and useful if F is a subset of L. In that
4746	// case L can be replaced by F, and thus removed altogether since repeating a
4747	// filter is pointless. So here we look at all pairs of filters F and L where
4748	// L follows F in the list of clauses, and remove L if every element of F is
4749	// an element of L. This can occur when inlining C++ functions with exception
4750	// specifications.
4751	for (unsigned i = `0`; i + `1` < NewClauses.size(); ++i) {
4752	// Examine each filter in turn.
4753	Value *Filter = NewClauses [i];
4754	ArrayType *FTy = dyn_cast<ArrayType>(Val: Filter->getType());
4755	if (!FTy)
4756	// Not a filter - skip it.
4757	continue;
4758	unsigned FElts = FTy->getNumElements();
4759	// Examine each filter following this one. Doing this backwards means that
4760	// we don't have to worry about filters disappearing under us when removed.
4761	for (unsigned j = NewClauses.size() - `1`; j != i; --j) {
4762	Value *LFilter = NewClauses [j];
4763	ArrayType *LTy = dyn_cast<ArrayType>(Val: LFilter->getType());
4764	if (!LTy)
4765	// Not a filter - skip it.
4766	continue;
4767	// If Filter is a subset of LFilter, i.e. every element of Filter is also
4768	// an element of LFilter, then discard LFilter.
4769	SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
4770	// If Filter is empty then it is a subset of LFilter.
4771	if (!FElts) {
4772	// Discard LFilter.
4773	NewClauses.erase(CI: J);
4774	MakeNewInstruction = true;
4775	// Move on to the next filter.
4776	continue;
4777	}
4778	unsigned LElts = LTy->getNumElements();
4779	// If Filter is longer than LFilter then it cannot be a subset of it.
4780	if (FElts > LElts)
4781	// Move on to the next filter.
4782	continue;
4783	// At this point we know that LFilter has at least one element.
4784	if (isa<ConstantAggregateZero>(Val: LFilter)) { // LFilter only contains zeros.
4785	// Filter is a subset of LFilter iff Filter contains only zeros (as we
4786	// already know that Filter is not longer than LFilter).
4787	if (isa<ConstantAggregateZero>(Val: Filter)) {
4788	assert(FElts <= LElts && "Should have handled this case earlier!");
4789	// Discard LFilter.
4790	NewClauses.erase(CI: J);
4791	MakeNewInstruction = true;
4792	}
4793	// Move on to the next filter.
4794	continue;
4795	}
4796	ConstantArray *LArray = cast<ConstantArray>(Val: LFilter);
4797	if (isa<ConstantAggregateZero>(Val: Filter)) { // Filter only contains zeros.
4798	// Since Filter is non-empty and contains only zeros, it is a subset of
4799	// LFilter iff LFilter contains a zero.
4800	assert(FElts > `0` && "Should have eliminated the empty filter earlier!");
4801	for (unsigned l = `0`; l != LElts; ++l)
4802	if (LArray->getOperand(i_nocapture: l)->isNullValue()) {
4803	// LFilter contains a zero - discard it.
4804	NewClauses.erase(CI: J);
4805	MakeNewInstruction = true;
4806	break;
4807	}
4808	// Move on to the next filter.
4809	continue;
4810	}
4811	// At this point we know that both filters are ConstantArrays. Loop over
4812	// operands to see whether every element of Filter is also an element of
4813	// LFilter. Since filters tend to be short this is probably faster than
4814	// using a method that scales nicely.
4815	ConstantArray *FArray = cast<ConstantArray>(Val: Filter);
4816	bool AllFound = true;
4817	for (unsigned f = `0`; f != FElts; ++f) {
4818	Value *FTypeInfo = FArray->getOperand(i_nocapture: f)->stripPointerCasts();
4819	AllFound = false;
4820	for (unsigned l = `0`; l != LElts; ++l) {
4821	Value *LTypeInfo = LArray->getOperand(i_nocapture: l)->stripPointerCasts();
4822	if (LTypeInfo == FTypeInfo) {
4823	AllFound = true;
4824	break;
4825	}
4826	}
4827	if (!AllFound)
4828	break;
4829	}
4830	if (AllFound) {
4831	// Discard LFilter.
4832	NewClauses.erase(CI: J);
4833	MakeNewInstruction = true;
4834	}
4835	// Move on to the next filter.
4836	}
4837	}
4838
4839	// If we changed any of the clauses, replace the old landingpad instruction
4840	// with a new one.
4841	if (MakeNewInstruction) {
4842	LandingPadInst *NLI = LandingPadInst::Create(RetTy: LI.getType(),
4843	NumReservedClauses: NewClauses.size());
4844	for (Constant *C : NewClauses)
4845	NLI->addClause(ClauseVal: C);
4846	// A landing pad with no clauses must have the cleanup flag set. It is
4847	// theoretically possible, though highly unlikely, that we eliminated all
4848	// clauses. If so, force the cleanup flag to true.
4849	if (NewClauses.empty())
4850	CleanupFlag = true;
4851	NLI->setCleanup(CleanupFlag);
4852	return NLI;
4853	}
4854
4855	// Even if none of the clauses changed, we may nonetheless have understood
4856	// that the cleanup flag is pointless. Clear it if so.
4857	if (LI.isCleanup() != CleanupFlag) {
4858	assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
4859	LI.setCleanup(CleanupFlag);
4860	return &LI;
4861	}
4862
4863	return nullptr;
4864	}
4865
4866	Value *
4867	InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) {
4868	// Try to push freeze through instructions that propagate but don't produce
4869	// poison as far as possible. If an operand of freeze follows three
4870	// conditions 1) one-use, 2) does not produce poison, and 3) has all but one
4871	// guaranteed-non-poison operands then push the freeze through to the one
4872	// operand that is not guaranteed non-poison. The actual transform is as
4873	// follows.
4874	// Op1 = ... ; Op1 can be posion
4875	// Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
4876	// ; single guaranteed-non-poison operands
4877	// ... = Freeze(Op0)
4878	// =>
4879	// Op1 = ...
4880	// Op1.fr = Freeze(Op1)
4881	// ... = Inst(Op1.fr, NonPoisonOps...)
4882	auto *OrigOp = OrigFI.getOperand(i_nocapture: `0`);
4883	auto *OrigOpInst = dyn_cast<Instruction>(Val: OrigOp);
4884
4885	// While we could change the other users of OrigOp to use freeze(OrigOp), that
4886	// potentially reduces their optimization potential, so let's only do this iff
4887	// the OrigOp is only used by the freeze.
4888	if (!OrigOpInst \|\| !OrigOpInst->hasOneUse() \|\| isa<PHINode>(Val: OrigOp))
4889	return nullptr;
4890
4891	// We can't push the freeze through an instruction which can itself create
4892	// poison. If the only source of new poison is flags, we can simply
4893	// strip them (since we know the only use is the freeze and nothing can
4894	// benefit from them.)
4895	if (canCreateUndefOrPoison(Op: cast<Operator>(Val: OrigOp),
4896	/ConsiderFlagsAndMetadata/ false))
4897	return nullptr;
4898
4899	// If operand is guaranteed not to be poison, there is no need to add freeze
4900	// to the operand. So we first find the operand that is not guaranteed to be
4901	// poison.
4902	Use MaybePoisonOperand = nullptr*;
4903	for (Use &U : OrigOpInst->operands()) {
4904	if (isa<MetadataAsValue>(Val: U.get()) \|\|
4905	isGuaranteedNotToBeUndefOrPoison(V: U.get()))
4906	continue;
4907	if (!MaybePoisonOperand)
4908	MaybePoisonOperand = &U;
4909	else
4910	return nullptr;
4911	}
4912
4913	OrigOpInst->dropPoisonGeneratingAnnotations();
4914
4915	// If all operands are guaranteed to be non-poison, we can drop freeze.
4916	if (!MaybePoisonOperand)
4917	return OrigOp;
4918
4919	Builder.SetInsertPoint(OrigOpInst);
4920	auto *FrozenMaybePoisonOperand = Builder.CreateFreeze(
4921	V: MaybePoisonOperand->get(), Name: MaybePoisonOperand->get()->getName() + ".fr");
4922
4923	replaceUse(U&: *MaybePoisonOperand, NewValue: FrozenMaybePoisonOperand);
4924	return OrigOp;
4925	}
4926
4927	Instruction *InstCombinerImpl::foldFreezeIntoRecurrence(FreezeInst &FI,
4928	PHINode *PN) {
4929	// Detect whether this is a recurrence with a start value and some number of
4930	// backedge values. We'll check whether we can push the freeze through the
4931	// backedge values (possibly dropping poison flags along the way) until we
4932	// reach the phi again. In that case, we can move the freeze to the start
4933	// value.
4934	Use StartU = nullptr*;
4935	SmallVector<Value *> Worklist;
4936	for (Use &U : PN->incoming_values()) {
4937	if (DT.dominates(A: PN->getParent(), B: PN->getIncomingBlock(U))) {
4938	// Add backedge value to worklist.
4939	Worklist.push_back(Elt: U.get());
4940	continue;
4941	}
4942
4943	// Don't bother handling multiple start values.
4944	if (StartU)
4945	return nullptr;
4946	StartU = &U;
4947	}
4948
4949	if (!StartU \|\| Worklist.empty())
4950	return nullptr; // Not a recurrence.
4951
4952	Value *StartV = StartU->get();
4953	BasicBlock StartBB = PN->getIncomingBlock(U: StartU);
4954	bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(V: StartV);
4955	// We can't insert freeze if the start value is the result of the
4956	// terminator (e.g. an invoke).
4957	if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
4958	return nullptr;
4959
4960	SmallPtrSet<Value *, `32`> Visited;
4961	SmallVector<Instruction *> DropFlags;
4962	while (!Worklist.empty()) {
4963	Value *V = Worklist.pop_back_val();
4964	if (!Visited.insert(Ptr: V).second)
4965	continue;
4966
4967	if (Visited.size() > `32`)
4968	return nullptr; // Limit the total number of values we inspect.
4969
4970	// Assume that PN is non-poison, because it will be after the transform.
4971	if (V == PN \|\| isGuaranteedNotToBeUndefOrPoison(V))
4972	continue;
4973
4974	Instruction *I = dyn_cast<Instruction>(Val: V);
4975	if (!I \|\| canCreateUndefOrPoison(Op: cast<Operator>(Val: I),
4976	/ConsiderFlagsAndMetadata/ false))
4977	return nullptr;
4978
4979	DropFlags.push_back(Elt: I);
4980	append_range(C&: Worklist, R: I->operands());
4981	}
4982
4983	for (Instruction *I : DropFlags)
4984	I->dropPoisonGeneratingAnnotations();
4985
4986	if (StartNeedsFreeze) {
4987	Builder.SetInsertPoint(StartBB->getTerminator());
4988	Value *FrozenStartV = Builder.CreateFreeze(V: StartV,
4989	Name: StartV->getName() + ".fr");
4990	replaceUse(U&: *StartU, NewValue: FrozenStartV);
4991	}
4992	return replaceInstUsesWith(I&: FI, V: PN);
4993	}
4994
4995	bool InstCombinerImpl::freezeOtherUses(FreezeInst &FI) {
4996	Value *Op = FI.getOperand(i_nocapture: `0`);
4997
4998	if (isa<Constant>(Val: Op) \|\| Op->hasOneUse())
4999	return false;
5000
5001	// Move the freeze directly after the definition of its operand, so that
5002	// it dominates the maximum number of uses. Note that it may not dominate
5003	// all* uses if the operand is an invoke/callbr and the use is in a phi on*
5004	// the normal/default destination. This is why the domination check in the
5005	// replacement below is still necessary.
5006	BasicBlock::iterator MoveBefore;
5007	if (isa<Argument>(Val: Op)) {
5008	MoveBefore =
5009	FI.getFunction()->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
5010	} else {
5011	auto MoveBeforeOpt = cast<Instruction>(Val: Op)->getInsertionPointAfterDef();
5012	if (!MoveBeforeOpt)
5013	return false;
5014	MoveBefore = *MoveBeforeOpt;
5015	}
5016
5017	// Re-point iterator to come after any debug-info records.
5018	MoveBefore.setHeadBit(false);
5019
5020	bool Changed = false;
5021	if (&FI != &*MoveBefore) {
5022	FI.moveBefore(BB&: *MoveBefore ->getParent(), I: MoveBefore);
5023	Changed = true;
5024	}
5025
5026	Op->replaceUsesWithIf(New: &FI, ShouldReplace: [&](Use &U) -> bool {
5027	bool Dominates = DT.dominates(Def: &FI, U);
5028	Changed \|= Dominates;
5029	return Dominates;
5030	});
5031
5032	return Changed;
5033	}
5034
5035	// Check if any direct or bitcast user of this value is a shuffle instruction.
5036	static bool isUsedWithinShuffleVector(Value *V) {
5037	for (auto *U : V->users()) {
5038	if (isa<ShuffleVectorInst>(Val: U))
5039	return true;
5040	else if (match(V: U, P: m_BitCast(Op: m_Specific(V))) && isUsedWithinShuffleVector(V: U))
5041	return true;
5042	}
5043	return false;
5044	}
5045
5046	Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
5047	Value *Op0 = I.getOperand(i_nocapture: `0`);
5048
5049	if (Value *V = simplifyFreezeInst(Op: Op0, Q: SQ.getWithInstruction(I: &I)))
5050	return replaceInstUsesWith(I, V);
5051
5052	// freeze (phi const, x) --> phi const, (freeze x)
5053	if (auto *PN = dyn_cast<PHINode>(Val: Op0)) {
5054	if (Instruction *NV = foldOpIntoPhi(I, PN))
5055	return NV;
5056	if (Instruction *NV = foldFreezeIntoRecurrence(FI&: I, PN))
5057	return NV;
5058	}
5059
5060	if (Value *NI = pushFreezeToPreventPoisonFromPropagating(OrigFI&: I))
5061	return replaceInstUsesWith(I, V: NI);
5062
5063	// If I is freeze(undef), check its uses and fold it to a fixed constant.
5064	// - or: pick -1
5065	// - select's condition: if the true value is constant, choose it by making
5066	// the condition true.
5067	// - default: pick 0
5068	//
5069	// Note that this transform is intentionally done here rather than
5070	// via an analysis in InstSimplify or at individual user sites. That is
5071	// because we must produce the same value for all uses of the freeze -
5072	// it's the reason "freeze" exists!
5073	//
5074	// TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
5075	// duplicating logic for binops at least.
5076	auto getUndefReplacement = [&](Type *Ty) {
5077	Value BestValue = nullptr*;
5078	Value *NullValue = Constant::getNullValue(Ty);
5079	for (const auto *U : I.users()) {
5080	Value *V = NullValue;
5081	if (match(V: U, P: m_Or(L: m_Value(), R: m_Value())))
5082	V = ConstantInt::getAllOnesValue(Ty);
5083	else if (match(V: U, P: m_Select(C: m_Specific(V: &I), L: m_Constant(), R: m_Value())))
5084	V = ConstantInt::getTrue(Ty);
5085	else if (match(V: U, P: m_c_Select(L: m_Specific(V: &I), R: m_Value(V)))) {
5086	if (!isGuaranteedNotToBeUndefOrPoison(V, AC: &AC, CtxI: &I, DT: &DT))
5087	V = NullValue;
5088	}
5089
5090	if (!BestValue)
5091	BestValue = V;
5092	else if (BestValue != V)
5093	BestValue = NullValue;
5094	}
5095	assert(BestValue && "Must have at least one use");
5096	return BestValue;
5097	};
5098
5099	if (match(V: Op0, P: m_Undef())) {
5100	// Don't fold freeze(undef/poison) if it's used as a vector operand in
5101	// a shuffle. This may improve codegen for shuffles that allow
5102	// unspecified inputs.
5103	if (isUsedWithinShuffleVector(V: &I))
5104	return nullptr;
5105	return replaceInstUsesWith(I, V: getUndefReplacement (I.getType()));
5106	}
5107
5108	auto getFreezeVectorReplacement = [](Constant C) -> Constant {
5109	Type *Ty = C->getType();
5110	auto *VTy = dyn_cast<FixedVectorType>(Val: Ty);
5111	if (!VTy)
5112	return nullptr;
5113	unsigned NumElts = VTy->getNumElements();
5114	Constant *BestValue = Constant::getNullValue(Ty: VTy->getScalarType());
5115	for (unsigned i = `0`; i != NumElts; ++i) {
5116	Constant *EltC = C->getAggregateElement(Elt: i);
5117	if (EltC && !match(V: EltC, P: m_Undef())) {
5118	BestValue = EltC;
5119	break;
5120	}
5121	}
5122	return Constant::replaceUndefsWith(C, Replacement: BestValue);
5123	};
5124
5125	Constant *C;
5126	if (match(V: Op0, P: m_Constant(C)) && C->containsUndefOrPoisonElement() &&
5127	!C->containsConstantExpression()) {
5128	if (Constant *Repl = getFreezeVectorReplacement (C))
5129	return replaceInstUsesWith(I, V: Repl);
5130	}
5131
5132	// Replace uses of Op with freeze(Op).
5133	if (freezeOtherUses(FI&: I))
5134	return &I;
5135
5136	return nullptr;
5137	}
5138
5139	/// Check for case where the call writes to an otherwise dead alloca. This
5140	/// shows up for unused out-params in idiomatic C/C++ code. Note that this
5141	/// helper only* analyzes the write; doesn't check any other legality aspect.*
5142	static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI) {
5143	auto *CB = dyn_cast<CallBase>(Val: I);
5144	if (!CB)
5145	// TODO: handle e.g. store to alloca here - only worth doing if we extend
5146	// to allow reload along used path as described below. Otherwise, this
5147	// is simply a store to a dead allocation which will be removed.
5148	return false;
5149	std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CI: CB, TLI);
5150	if (!Dest)
5151	return false;
5152	auto *AI = dyn_cast<AllocaInst>(Val: getUnderlyingObject(V: Dest ->Ptr));
5153	if (!AI)
5154	// TODO: allow malloc?
5155	return false;
5156	// TODO: allow memory access dominated by move point? Note that since AI
5157	// could have a reference to itself captured by the call, we would need to
5158	// account for cycles in doing so.
5159	SmallVector<const User *> AllocaUsers;
5160	SmallPtrSet<const User *, `4`> Visited;
5161	auto pushUsers = [&](const Instruction &I) {
5162	for (const User *U : I.users()) {
5163	if (Visited.insert(Ptr: U).second)
5164	AllocaUsers.push_back(Elt: U);
5165	}
5166	};
5167	pushUsers (*AI);
5168	while (!AllocaUsers.empty()) {
5169	auto *UserI = cast<Instruction>(Val: AllocaUsers.pop_back_val());
5170	if (isa<GetElementPtrInst>(Val: UserI) \|\| isa<AddrSpaceCastInst>(Val: UserI)) {
5171	pushUsers (*UserI);
5172	continue;
5173	}
5174	if (UserI == CB)
5175	continue;
5176	// TODO: support lifetime.start/end here
5177	return false;
5178	}
5179	return true;
5180	}
5181
5182	/// Try to move the specified instruction from its current block into the
5183	/// beginning of DestBlock, which can only happen if it's safe to move the
5184	/// instruction past all of the instructions between it and the end of its
5185	/// block.
5186	bool InstCombinerImpl::tryToSinkInstruction(Instruction *I,
5187	BasicBlock *DestBlock) {
5188	BasicBlock *SrcBlock = I->getParent();
5189
5190	// Cannot move control-flow-involving, volatile loads, vaarg, etc.
5191	if (isa<PHINode>(Val: I) \|\| I->isEHPad() \|\| I->mayThrow() \|\| !I->willReturn() \|\|
5192	I->isTerminator())
5193	return false;
5194
5195	// Do not sink static or dynamic alloca instructions. Static allocas must
5196	// remain in the entry block, and dynamic allocas must not be sunk in between
5197	// a stacksave / stackrestore pair, which would incorrectly shorten its
5198	// lifetime.
5199	if (isa<AllocaInst>(Val: I))
5200	return false;
5201
5202	// Do not sink into catchswitch blocks.
5203	if (isa<CatchSwitchInst>(Val: DestBlock->getTerminator()))
5204	return false;
5205
5206	// Do not sink convergent call instructions.
5207	if (auto *CI = dyn_cast<CallInst>(Val: I)) {
5208	if (CI->isConvergent())
5209	return false;
5210	}
5211
5212	// Unless we can prove that the memory write isn't visibile except on the
5213	// path we're sinking to, we must bail.
5214	if (I->mayWriteToMemory()) {
5215	if (!SoleWriteToDeadLocal(I, TLI))
5216	return false;
5217	}
5218
5219	// We can only sink load instructions if there is nothing between the load and
5220	// the end of block that could change the value.
5221	if (I->mayReadFromMemory() &&
5222	!I->hasMetadata(KindID: LLVMContext::MD_invariant_load)) {
5223	// We don't want to do any sophisticated alias analysis, so we only check
5224	// the instructions after I in I's parent block if we try to sink to its
5225	// successor block.
5226	if (DestBlock->getUniquePredecessor() != I->getParent())
5227	return false;
5228	for (BasicBlock::iterator Scan = std::next(x: I->getIterator()),
5229	E = I->getParent()->end();
5230	Scan != E; ++Scan)
5231	if (Scan ->mayWriteToMemory())
5232	return false;
5233	}
5234
5235	I->dropDroppableUses(ShouldDrop: [&](const Use *U) {
5236	auto *I = dyn_cast<Instruction>(Val: U->getUser());
5237	if (I && I->getParent() != DestBlock) {
5238	Worklist.add(I);
5239	return true;
5240	}
5241	return false;
5242	});
5243	/// FIXME: We could remove droppable uses that are not dominated by
5244	/// the new position.
5245
5246	BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
5247	I->moveBefore(BB&: *DestBlock, I: InsertPos);
5248	++NumSunkInst;
5249
5250	// Also sink all related debug uses from the source basic block. Otherwise we
5251	// get debug use before the def. Attempt to salvage debug uses first, to
5252	// maximise the range variables have location for. If we cannot salvage, then
5253	// mark the location undef: we know it was supposed to receive a new location
5254	// here, but that computation has been sunk.
5255	SmallVector<DbgVariableIntrinsic *, `2`> DbgUsers;
5256	SmallVector<DbgVariableRecord *, `2`> DbgVariableRecords;
5257	findDbgUsers(DbgInsts&: DbgUsers, V: I, DbgVariableRecords: &DbgVariableRecords);
5258	if (!DbgUsers.empty())
5259	tryToSinkInstructionDbgValues(I, InsertPos, SrcBlock, DestBlock, DbgUsers);
5260	if (!DbgVariableRecords.empty())
5261	tryToSinkInstructionDbgVariableRecords(I, InsertPos, SrcBlock, DestBlock,
5262	DPUsers&: DbgVariableRecords);
5263
5264	// PS: there are numerous flaws with this behaviour, not least that right now
5265	// assignments can be re-ordered past other assignments to the same variable
5266	// if they use different Values. Creating more undef assignements can never be
5267	// undone. And salvaging all users outside of this block can un-necessarily
5268	// alter the lifetime of the live-value that the variable refers to.
5269	// Some of these things can be resolved by tolerating debug use-before-defs in
5270	// LLVM-IR, however it depends on the instruction-referencing CodeGen backend
5271	// being used for more architectures.
5272
5273	return true;
5274	}
5275
5276	void InstCombinerImpl::tryToSinkInstructionDbgValues(
5277	Instruction I, BasicBlock::iterator InsertPos, BasicBlock SrcBlock,
5278	BasicBlock DestBlock, SmallVectorImpl<DbgVariableIntrinsic > &DbgUsers) {
5279	// For all debug values in the destination block, the sunk instruction
5280	// will still be available, so they do not need to be dropped.
5281	SmallVector<DbgVariableIntrinsic *, `2`> DbgUsersToSalvage;
5282	for (auto &DbgUser : DbgUsers)
5283	if (DbgUser->getParent() != DestBlock)
5284	DbgUsersToSalvage.push_back(Elt: DbgUser);
5285
5286	// Process the sinking DbgUsersToSalvage in reverse order, as we only want
5287	// to clone the last appearing debug intrinsic for each given variable.
5288	SmallVector<DbgVariableIntrinsic *, `2`> DbgUsersToSink;
5289	for (DbgVariableIntrinsic *DVI : DbgUsersToSalvage)
5290	if (DVI->getParent() == SrcBlock)
5291	DbgUsersToSink.push_back(Elt: DVI);
5292	llvm::sort(C&: DbgUsersToSink,
5293	Comp: [](auto A, auto* B) { return* B->comesBefore(A); });
5294
5295	SmallVector<DbgVariableIntrinsic *, `2`> DIIClones;
5296	SmallSet<DebugVariable, `4`> SunkVariables;
5297	for (auto *User : DbgUsersToSink) {
5298	// A dbg.declare instruction should not be cloned, since there can only be
5299	// one per variable fragment. It should be left in the original place
5300	// because the sunk instruction is not an alloca (otherwise we could not be
5301	// here).
5302	if (isa<DbgDeclareInst>(Val: User))
5303	continue;
5304
5305	DebugVariable DbgUserVariable =
5306	DebugVariable (User->getVariable(), User->getExpression(),
5307	User->getDebugLoc()->getInlinedAt());
5308
5309	if (!SunkVariables.insert(V: DbgUserVariable).second)
5310	continue;
5311
5312	// Leave dbg.assign intrinsics in their original positions and there should
5313	// be no need to insert a clone.
5314	if (isa<DbgAssignIntrinsic>(Val: User))
5315	continue;
5316
5317	DIIClones.emplace_back(Args: cast<DbgVariableIntrinsic>(Val: User->clone()));
5318	if (isa<DbgDeclareInst>(Val: User) && isa<CastInst>(Val: I))
5319	DIIClones.back()->replaceVariableLocationOp(OldValue: I, NewValue: I->getOperand(i: `0`));
5320	LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << `'\n'`);
5321	}
5322
5323	// Perform salvaging without the clones, then sink the clones.
5324	if (!DIIClones.empty()) {
5325	salvageDebugInfoForDbgValues(I&: *I, Insns: DbgUsersToSalvage, DPInsns: {});
5326	// The clones are in reverse order of original appearance, reverse again to
5327	// maintain the original order.
5328	for (auto &DIIClone : llvm::reverse(C&: DIIClones)) {
5329	DIIClone->insertBefore(InsertPos);
5330	LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << `'\n'`);
5331	}
5332	}
5333	}
5334
5335	void InstCombinerImpl::tryToSinkInstructionDbgVariableRecords(
5336	Instruction I, BasicBlock::iterator InsertPos, BasicBlock SrcBlock,
5337	BasicBlock *DestBlock,
5338	SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords) {
5339	// Implementation of tryToSinkInstructionDbgValues, but for the
5340	// DbgVariableRecord of variable assignments rather than dbg.values.
5341
5342	// Fetch all DbgVariableRecords not already in the destination.
5343	SmallVector<DbgVariableRecord *, `2`> DbgVariableRecordsToSalvage;
5344	for (auto &DVR : DbgVariableRecords)
5345	if (DVR->getParent() != DestBlock)
5346	DbgVariableRecordsToSalvage.push_back(Elt: DVR);
5347
5348	// Fetch a second collection, of DbgVariableRecords in the source block that
5349	// we're going to sink.
5350	SmallVector<DbgVariableRecord *> DbgVariableRecordsToSink;
5351	for (DbgVariableRecord *DVR : DbgVariableRecordsToSalvage)
5352	if (DVR->getParent() == SrcBlock)
5353	DbgVariableRecordsToSink.push_back(Elt: DVR);
5354
5355	// Sort DbgVariableRecords according to their position in the block. This is a
5356	// partial order: DbgVariableRecords attached to different instructions will
5357	// be ordered by the instruction order, but DbgVariableRecords attached to the
5358	// same instruction won't have an order.
5359	auto Order = [](DbgVariableRecord A, DbgVariableRecord B) -> bool {
5360	return B->getInstruction()->comesBefore(Other: A->getInstruction());
5361	};
5362	llvm::stable_sort(Range&: DbgVariableRecordsToSink, C: Order);
5363
5364	// If there are two assignments to the same variable attached to the same
5365	// instruction, the ordering between the two assignments is important. Scan
5366	// for this (rare) case and establish which is the last assignment.
5367	using InstVarPair = std::pair<const Instruction *, DebugVariable>;
5368	SmallDenseMap<InstVarPair, DbgVariableRecord *> FilterOutMap;
5369	if (DbgVariableRecordsToSink.size() > `1`) {
5370	SmallDenseMap<InstVarPair, unsigned> CountMap;
5371	// Count how many assignments to each variable there is per instruction.
5372	for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
5373	DebugVariable DbgUserVariable =
5374	DebugVariable (DVR->getVariable(), DVR->getExpression(),
5375	DVR->getDebugLoc()->getInlinedAt());
5376	CountMap [std::make_pair(x: DVR->getInstruction(), y&: DbgUserVariable)] += `1`;
5377	}
5378
5379	// If there are any instructions with two assignments, add them to the
5380	// FilterOutMap to record that they need extra filtering.
5381	SmallPtrSet<const Instruction *, `4`> DupSet;
5382	for (auto It : CountMap) {
5383	if (It.second > `1`) {
5384	FilterOutMap [It.first] = nullptr;
5385	DupSet.insert(Ptr: It.first.first);
5386	}
5387	}
5388
5389	// For all instruction/variable pairs needing extra filtering, find the
5390	// latest assignment.
5391	for (const Instruction *Inst : DupSet) {
5392	for (DbgVariableRecord &DVR :
5393	llvm::reverse(C: filterDbgVars(R: Inst->getDbgRecordRange()))) {
5394	DebugVariable DbgUserVariable =
5395	DebugVariable (DVR.getVariable(), DVR.getExpression(),
5396	DVR.getDebugLoc()->getInlinedAt());
5397	auto FilterIt =
5398	FilterOutMap.find(Val: std::make_pair(x&: Inst, y&: DbgUserVariable));
5399	if (FilterIt == FilterOutMap.end())
5400	continue;
5401	if (FilterIt ->second != nullptr)
5402	continue;
5403	FilterIt ->second = &DVR;
5404	}
5405	}
5406	}
5407
5408	// Perform cloning of the DbgVariableRecords that we plan on sinking, filter
5409	// out any duplicate assignments identified above.
5410	SmallVector<DbgVariableRecord *, `2`> DVRClones;
5411	SmallSet<DebugVariable, `4`> SunkVariables;
5412	for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
5413	if (DVR->Type == DbgVariableRecord::LocationType::Declare)
5414	continue;
5415
5416	DebugVariable DbgUserVariable =
5417	DebugVariable (DVR->getVariable(), DVR->getExpression(),
5418	DVR->getDebugLoc()->getInlinedAt());
5419
5420	// For any variable where there were multiple assignments in the same place,
5421	// ignore all but the last assignment.
5422	if (!FilterOutMap.empty()) {
5423	InstVarPair IVP = std::make_pair(x: DVR->getInstruction(), y&: DbgUserVariable);
5424	auto It = FilterOutMap.find(Val: IVP);
5425
5426	// Filter out.
5427	if (It != FilterOutMap.end() && It ->second != DVR)
5428	continue;
5429	}
5430
5431	if (!SunkVariables.insert(V: DbgUserVariable).second)
5432	continue;
5433
5434	if (DVR->isDbgAssign())
5435	continue;
5436
5437	DVRClones.emplace_back(Args: DVR->clone());
5438	LLVM_DEBUG(dbgs() << "CLONE: " << *DVRClones.back() << `'\n'`);
5439	}
5440
5441	// Perform salvaging without the clones, then sink the clones.
5442	if (DVRClones.empty())
5443	return;
5444
5445	salvageDebugInfoForDbgValues(I&: *I, Insns: {}, DPInsns: DbgVariableRecordsToSalvage);
5446
5447	// The clones are in reverse order of original appearance. Assert that the
5448	// head bit is set on the iterator as we _should_ have received it via
5449	// getFirstInsertionPt. Inserting like this will reverse the clone order as
5450	// we'll repeatedly insert at the head, such as:
5451	// DVR-3 (third insertion goes here)
5452	// DVR-2 (second insertion goes here)
5453	// DVR-1 (first insertion goes here)
5454	// Any-Prior-DVRs
5455	// InsertPtInst
5456	assert(InsertPos.getHeadBit());
5457	for (DbgVariableRecord *DVRClone : DVRClones) {
5458	InsertPos ->getParent()->insertDbgRecordBefore(DR: DVRClone, Here: InsertPos);
5459	LLVM_DEBUG(dbgs() << "SINK: " << *DVRClone << `'\n'`);
5460	}
5461	}
5462
5463	bool InstCombinerImpl::run() {
5464	while (!Worklist.isEmpty()) {
5465	// Walk deferred instructions in reverse order, and push them to the
5466	// worklist, which means they'll end up popped from the worklist in-order.
5467	while (Instruction *I = Worklist.popDeferred()) {
5468	// Check to see if we can DCE the instruction. We do this already here to
5469	// reduce the number of uses and thus allow other folds to trigger.
5470	// Note that eraseInstFromFunction() may push additional instructions on
5471	// the deferred worklist, so this will DCE whole instruction chains.
5472	if (isInstructionTriviallyDead(I, TLI: &TLI)) {
5473	eraseInstFromFunction(I&: *I);
5474	++NumDeadInst;
5475	continue;
5476	}
5477
5478	Worklist.push(I);
5479	}
5480
5481	Instruction *I = Worklist.removeOne();
5482	if (I == nullptr) continue; // skip null values.
5483
5484	// Check to see if we can DCE the instruction.
5485	if (isInstructionTriviallyDead(I, TLI: &TLI)) {
5486	eraseInstFromFunction(I&: *I);
5487	++NumDeadInst;
5488	continue;
5489	}
5490
5491	if (!DebugCounter::shouldExecute(CounterName: VisitCounter))
5492	continue;
5493
5494	// See if we can trivially sink this instruction to its user if we can
5495	// prove that the successor is not executed more frequently than our block.
5496	// Return the UserBlock if successful.
5497	auto getOptionalSinkBlockForInst =
5498	[this](Instruction I) -> std::optional<BasicBlock > {
5499	if (!EnableCodeSinking)
5500	return std::nullopt;
5501
5502	BasicBlock *BB = I->getParent();
5503	BasicBlock UserParent = nullptr*;
5504	unsigned NumUsers = `0`;
5505
5506	for (Use &U : I->uses()) {
5507	User *User = U.getUser();
5508	if (User->isDroppable())
5509	continue;
5510	if (NumUsers > MaxSinkNumUsers)
5511	return std::nullopt;
5512
5513	Instruction *UserInst = cast<Instruction>(Val: User);
5514	// Special handling for Phi nodes - get the block the use occurs in.
5515	BasicBlock *UserBB = UserInst->getParent();
5516	if (PHINode *PN = dyn_cast<PHINode>(Val: UserInst))
5517	UserBB = PN->getIncomingBlock(U);
5518	// Bail out if we have uses in different blocks. We don't do any
5519	// sophisticated analysis (i.e finding NearestCommonDominator of these
5520	// use blocks).
5521	if (UserParent && UserParent != UserBB)
5522	return std::nullopt;
5523	UserParent = UserBB;
5524
5525	// Make sure these checks are done only once, naturally we do the checks
5526	// the first time we get the userparent, this will save compile time.
5527	if (NumUsers == `0`) {
5528	// Try sinking to another block. If that block is unreachable, then do
5529	// not bother. SimplifyCFG should handle it.
5530	if (UserParent == BB \|\| !DT.isReachableFromEntry(A: UserParent))
5531	return std::nullopt;
5532
5533	auto *Term = UserParent->getTerminator();
5534	// See if the user is one of our successors that has only one
5535	// predecessor, so that we don't have to split the critical edge.
5536	// Another option where we can sink is a block that ends with a
5537	// terminator that does not pass control to other block (such as
5538	// return or unreachable or resume). In this case:
5539	// - I dominates the User (by SSA form);
5540	// - the User will be executed at most once.
5541	// So sinking I down to User is always profitable or neutral.
5542	if (UserParent->getUniquePredecessor() != BB && !succ_empty(I: Term))
5543	return std::nullopt;
5544
5545	assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
5546	}
5547
5548	NumUsers++;
5549	}
5550
5551	// No user or only has droppable users.
5552	if (!UserParent)
5553	return std::nullopt;
5554
5555	return UserParent;
5556	};
5557
5558	auto OptBB = getOptionalSinkBlockForInst (I);
5559	if (OptBB) {
5560	auto UserParent = OptBB;
5561	// Okay, the CFG is simple enough, try to sink this instruction.
5562	if (tryToSinkInstruction(I, DestBlock: UserParent)) {
5563	LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << `'\n'`);
5564	MadeIRChange = true;
5565	// We'll add uses of the sunk instruction below, but since
5566	// sinking can expose opportunities for it's operands* add*
5567	// them to the worklist
5568	for (Use &U : I->operands())
5569	if (Instruction *OpI = dyn_cast<Instruction>(Val: U.get()))
5570	Worklist.push(I: OpI);
5571	}
5572	}
5573
5574	// Now that we have an instruction, try combining it to simplify it.
5575	Builder.SetInsertPoint(I);
5576	Builder.CollectMetadataToCopy(
5577	Src: I, MetadataKinds: {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
5578
5579	#ifndef NDEBUG
5580	std::string OrigI;
5581	#endif
5582	LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS););
5583	LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << `'\n'`);
5584
5585	if (Instruction Result = visit(I&: I)) {
5586	++NumCombined;
5587	// Should we replace the old instruction with a new one?
5588	if (Result != I) {
5589	LLVM_DEBUG(dbgs() << "IC: Old = " << *I << `'\n'`
5590	<< " New = " << *Result << `'\n'`);
5591
5592	// We copy the old instruction's DebugLoc to the new instruction, unless
5593	// InstCombine already assigned a DebugLoc to it, in which case we
5594	// should trust the more specifically selected DebugLoc.
5595	Result->setDebugLoc(Result->getDebugLoc().orElse(Other: I->getDebugLoc()));
5596	// We also copy annotation metadata to the new instruction.
5597	Result->copyMetadata(SrcInst: *I, WL: LLVMContext::MD_annotation);
5598	// Everything uses the new instruction now.
5599	I->replaceAllUsesWith(V: Result);
5600
5601	// Move the name to the new instruction first.
5602	Result->takeName(V: I);
5603
5604	// Insert the new instruction into the basic block...
5605	BasicBlock *InstParent = I->getParent();
5606	BasicBlock::iterator InsertPos = I->getIterator();
5607
5608	// Are we replace a PHI with something that isn't a PHI, or vice versa?
5609	if (isa<PHINode>(Val: Result) != isa<PHINode>(Val: I)) {
5610	// We need to fix up the insertion point.
5611	if (isa<PHINode>(Val: I)) // PHI -> Non-PHI
5612	InsertPos = InstParent->getFirstInsertionPt();
5613	else // Non-PHI -> PHI
5614	InsertPos = InstParent->getFirstNonPHIIt();
5615	}
5616
5617	Result->insertInto(ParentBB: InstParent, It: InsertPos);
5618
5619	// Push the new instruction and any users onto the worklist.
5620	Worklist.pushUsersToWorkList(I&: *Result);
5621	Worklist.push(I: Result);
5622
5623	eraseInstFromFunction(I&: *I);
5624	} else {
5625	LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << `'\n'`
5626	<< " New = " << *I << `'\n'`);
5627
5628	// If the instruction was modified, it's possible that it is now dead.
5629	// if so, remove it.
5630	if (isInstructionTriviallyDead(I, TLI: &TLI)) {
5631	eraseInstFromFunction(I&: *I);
5632	} else {
5633	Worklist.pushUsersToWorkList(I&: *I);
5634	Worklist.push(I);
5635	}
5636	}
5637	MadeIRChange = true;
5638	}
5639	}
5640
5641	Worklist.zap();
5642	return MadeIRChange;
5643	}
5644
5645	// Track the scopes used by !alias.scope and !noalias. In a function, a
5646	// @llvm.experimental.noalias.scope.decl is only useful if that scope is used
5647	// by both sets. If not, the declaration of the scope can be safely omitted.
5648	// The MDNode of the scope can be omitted as well for the instructions that are
5649	// part of this function. We do not do that at this point, as this might become
5650	// too time consuming to do.
5651	class AliasScopeTracker {
5652	SmallPtrSet<const MDNode *, `8`> UsedAliasScopesAndLists;
5653	SmallPtrSet<const MDNode *, `8`> UsedNoAliasScopesAndLists;
5654
5655	public:
5656	void analyse(Instruction *I) {
5657	// This seems to be faster than checking 'mayReadOrWriteMemory()'.
5658	if (!I->hasMetadataOtherThanDebugLoc())
5659	return;
5660
5661	auto Track = [](Metadata ScopeList, auto* &Container) {
5662	const auto *MDScopeList = dyn_cast_or_null<MDNode>(Val: ScopeList);
5663	if (!MDScopeList \|\| !Container.insert(MDScopeList).second)
5664	return;
5665	for (const auto &MDOperand : MDScopeList->operands())
5666	if (auto *MDScope = dyn_cast<MDNode>(Val: MDOperand))
5667	Container.insert(MDScope);
5668	};
5669
5670	Track(I->getMetadata(KindID: LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
5671	Track(I->getMetadata(KindID: LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
5672	}
5673
5674	bool isNoAliasScopeDeclDead(Instruction *Inst) {
5675	NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: Inst);
5676	if (!Decl)
5677	return false;
5678
5679	assert(Decl->use_empty() &&
5680	"llvm.experimental.noalias.scope.decl in use ?");
5681	const MDNode *MDSL = Decl->getScopeList();
5682	assert(MDSL->getNumOperands() == `1` &&
5683	"llvm.experimental.noalias.scope should refer to a single scope");
5684	auto &MDOperand = MDSL->getOperand(I: `0`);
5685	if (auto *MD = dyn_cast<MDNode>(Val: MDOperand))
5686	return !UsedAliasScopesAndLists.contains(Ptr: MD) \|\|
5687	!UsedNoAliasScopesAndLists.contains(Ptr: MD);
5688
5689	// Not an MDNode ? throw away.
5690	return true;
5691	}
5692	};
5693
5694	/// Populate the IC worklist from a function, by walking it in reverse
5695	/// post-order and adding all reachable code to the worklist.
5696	///
5697	/// This has a couple of tricks to make the code faster and more powerful. In
5698	/// particular, we constant fold and DCE instructions as we go, to avoid adding
5699	/// them to the worklist (this significantly speeds up instcombine on code where
5700	/// many instructions are dead or constant). Additionally, if we find a branch
5701	/// whose condition is a known constant, we only visit the reachable successors.
5702	bool InstCombinerImpl::prepareWorklist(Function &F) {
5703	bool MadeIRChange = false;
5704	SmallPtrSet<BasicBlock *, `32`> LiveBlocks;
5705	SmallVector<Instruction *, `128`> InstrsForInstructionWorklist;
5706	DenseMap<Constant , Constant > FoldedConstants;
5707	AliasScopeTracker SeenAliasScopes;
5708
5709	auto HandleOnlyLiveSuccessor = [&](BasicBlock BB, BasicBlock LiveSucc) {
5710	for (BasicBlock *Succ : successors(BB))
5711	if (Succ != LiveSucc && DeadEdges.insert(V: {BB, Succ}).second)
5712	for (PHINode &PN : Succ->phis())
5713	for (Use &U : PN.incoming_values())
5714	if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(Val: U)) {
5715	U.set(PoisonValue::get(T: PN.getType()));
5716	MadeIRChange = true;
5717	}
5718	};
5719
5720	for (BasicBlock *BB : RPOT) {
5721	if (!BB->isEntryBlock() && all_of(Range: predecessors(BB), P: [&](BasicBlock *Pred) {
5722	return DeadEdges.contains(V: {Pred, BB}) \|\| DT.dominates(A: BB, B: Pred);
5723	})) {
5724	HandleOnlyLiveSuccessor (BB, nullptr);
5725	continue;
5726	}
5727	LiveBlocks.insert(Ptr: BB);
5728
5729	for (Instruction &Inst : llvm::make_early_inc_range(Range&: *BB)) {
5730	// ConstantProp instruction if trivially constant.
5731	if (!Inst.use_empty() &&
5732	(Inst.getNumOperands() == `0` \|\| isa<Constant>(Val: Inst.getOperand(i: `0`))))
5733	if (Constant *C = ConstantFoldInstruction(I: &Inst, DL, TLI: &TLI)) {
5734	LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
5735	<< `'\n'`);
5736	Inst.replaceAllUsesWith(V: C);
5737	++NumConstProp;
5738	if (isInstructionTriviallyDead(I: &Inst, TLI: &TLI))
5739	Inst.eraseFromParent();
5740	MadeIRChange = true;
5741	continue;
5742	}
5743
5744	// See if we can constant fold its operands.
5745	for (Use &U : Inst.operands()) {
5746	if (!isa<ConstantVector>(Val: U) && !isa<ConstantExpr>(Val: U))
5747	continue;
5748
5749	auto *C = cast<Constant>(Val&: U);
5750	Constant *&FoldRes = FoldedConstants [C];
5751	if (!FoldRes)
5752	FoldRes = ConstantFoldConstant(C, DL, TLI: &TLI);
5753
5754	if (FoldRes != C) {
5755	LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
5756	<< "\n Old = " << *C
5757	<< "\n New = " << *FoldRes << `'\n'`);
5758	U = FoldRes;
5759	MadeIRChange = true;
5760	}
5761	}
5762
5763	// Skip processing debug and pseudo intrinsics in InstCombine. Processing
5764	// these call instructions consumes non-trivial amount of time and
5765	// provides no value for the optimization.
5766	if (!Inst.isDebugOrPseudoInst()) {
5767	InstrsForInstructionWorklist.push_back(Elt: &Inst);
5768	SeenAliasScopes.analyse(I: &Inst);
5769	}
5770	}
5771
5772	// If this is a branch or switch on a constant, mark only the single
5773	// live successor. Otherwise assume all successors are live.
5774	Instruction *TI = BB->getTerminator();
5775	if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI); BI && BI->isConditional()) {
5776	if (isa<UndefValue>(Val: BI->getCondition())) {
5777	// Branch on undef is UB.
5778	HandleOnlyLiveSuccessor (BB, nullptr);
5779	continue;
5780	}
5781	if (auto *Cond = dyn_cast<ConstantInt>(Val: BI->getCondition())) {
5782	bool CondVal = Cond->getZExtValue();
5783	HandleOnlyLiveSuccessor (BB, BI->getSuccessor(i: !CondVal));
5784	continue;
5785	}
5786	} else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
5787	if (isa<UndefValue>(Val: SI->getCondition())) {
5788	// Switch on undef is UB.
5789	HandleOnlyLiveSuccessor (BB, nullptr);
5790	continue;
5791	}
5792	if (auto *Cond = dyn_cast<ConstantInt>(Val: SI->getCondition())) {
5793	HandleOnlyLiveSuccessor (BB,
5794	SI->findCaseValue(C: Cond)->getCaseSuccessor());
5795	continue;
5796	}
5797	}
5798	}
5799
5800	// Remove instructions inside unreachable blocks. This prevents the
5801	// instcombine code from having to deal with some bad special cases, and
5802	// reduces use counts of instructions.
5803	for (BasicBlock &BB : F) {
5804	if (LiveBlocks.count(Ptr: &BB))
5805	continue;
5806
5807	unsigned NumDeadInstInBB;
5808	NumDeadInstInBB = removeAllNonTerminatorAndEHPadInstructions(BB: &BB);
5809
5810	MadeIRChange \|= NumDeadInstInBB != `0`;
5811	NumDeadInst += NumDeadInstInBB;
5812	}
5813
5814	// Once we've found all of the instructions to add to instcombine's worklist,
5815	// add them in reverse order. This way instcombine will visit from the top
5816	// of the function down. This jives well with the way that it adds all uses
5817	// of instructions to the worklist after doing a transformation, thus avoiding
5818	// some N^2 behavior in pathological cases.
5819	Worklist.reserve(Size: InstrsForInstructionWorklist.size());
5820	for (Instruction *Inst : reverse(C&: InstrsForInstructionWorklist)) {
5821	// DCE instruction if trivially dead. As we iterate in reverse program
5822	// order here, we will clean up whole chains of dead instructions.
5823	if (isInstructionTriviallyDead(I: Inst, TLI: &TLI) \|\|
5824	SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
5825	++NumDeadInst;
5826	LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << `'\n'`);
5827	salvageDebugInfo(I&: *Inst);
5828	Inst->eraseFromParent();
5829	MadeIRChange = true;
5830	continue;
5831	}
5832
5833	Worklist.push(I: Inst);
5834	}
5835
5836	return MadeIRChange;
5837	}
5838
5839	void InstCombiner::computeBackEdges() {
5840	// Collect backedges.
5841	SmallPtrSet<BasicBlock *, `16`> Visited;
5842	for (BasicBlock *BB : RPOT) {
5843	Visited.insert(Ptr: BB);
5844	for (BasicBlock *Succ : successors(BB))
5845	if (Visited.contains(Ptr: Succ))
5846	BackEdges.insert(V: {BB, Succ});
5847	}
5848	ComputedBackEdges = true;
5849	}
5850
5851	static bool combineInstructionsOverFunction(
5852	Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA,
5853	AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
5854	DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
5855	BranchProbabilityInfo BPI, ProfileSummaryInfo PSI,
5856	const InstCombineOptions &Opts) {
5857	auto &DL = F.getDataLayout();
5858	bool VerifyFixpoint = Opts.VerifyFixpoint &&
5859	!F.hasFnAttribute(Kind: "instcombine-no-verify-fixpoint");
5860
5861	/// Builder - This is an IRBuilder that automatically inserts new
5862	/// instructions into the worklist when they are created.
5863	IRBuilder<TargetFolder, IRBuilderCallbackInserter> Builder(
5864	F.getContext(), TargetFolder (DL),
5865	IRBuilderCallbackInserter ([&Worklist, &AC](Instruction *I) {
5866	Worklist.add(I);
5867	if (auto *Assume = dyn_cast<AssumeInst>(Val: I))
5868	AC.registerAssumption(CI: Assume);
5869	}));
5870
5871	ReversePostOrderTraversal<BasicBlock *> RPOT(&F.front());
5872
5873	// Lower dbg.declare intrinsics otherwise their value may be clobbered
5874	// by instcombiner.
5875	bool MadeIRChange = false;
5876	if (ShouldLowerDbgDeclare)
5877	MadeIRChange = LowerDbgDeclare(F);
5878
5879	// Iterate while there is work to do.
5880	unsigned Iteration = `0`;
5881	while (true) {
5882	if (Iteration >= Opts.MaxIterations && !VerifyFixpoint) {
5883	LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations
5884	<< " on " << F.getName()
5885	<< " reached; stopping without verifying fixpoint\n");
5886	break;
5887	}
5888
5889	++Iteration;
5890	++NumWorklistIterations;
5891	LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
5892	<< F.getName() << "\n");
5893
5894	InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
5895	ORE, BFI, BPI, PSI, DL, RPOT);
5896	IC.MaxArraySizeForCombine = MaxArraySize;
5897	bool MadeChangeInThisIteration = IC.prepareWorklist(F);
5898	MadeChangeInThisIteration \|= IC.run();
5899	if (!MadeChangeInThisIteration)
5900	break;
5901
5902	MadeIRChange = true;
5903	if (Iteration > Opts.MaxIterations) {
5904	reportFatalUsageError(
5905	reason: "Instruction Combining on " + Twine (F.getName()) +
5906	" did not reach a fixpoint after " + Twine (Opts.MaxIterations) +
5907	" iterations. " +
5908	"Use 'instcombine<no-verify-fixpoint>' or function attribute "
5909	"'instcombine-no-verify-fixpoint' to suppress this error.");
5910	}
5911	}
5912
5913	if (Iteration == `1`)
5914	++NumOneIteration;
5915	else if (Iteration == `2`)
5916	++NumTwoIterations;
5917	else if (Iteration == `3`)
5918	++NumThreeIterations;
5919	else
5920	++NumFourOrMoreIterations;
5921
5922	return MadeIRChange;
5923	}
5924
5925	InstCombinePass::InstCombinePass(InstCombineOptions Opts) : Options (Opts) {}
5926
5927	void InstCombinePass::printPipeline(
5928	raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
5929	static_cast<PassInfoMixin<InstCombinePass> >(this*)->printPipeline(
5930	OS, MapClassName2PassName);
5931	OS << `'<'`;
5932	OS << "max-iterations=" << Options.MaxIterations << ";";
5933	OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint";
5934	OS << `'>'`;
5935	}
5936
5937	char InstCombinePass::ID = `0`;
5938
5939	PreservedAnalyses InstCombinePass::run(Function &F,
5940	FunctionAnalysisManager &AM) {
5941	auto &LRT = AM.getResult<LastRunTrackingAnalysis>(IR&: F);
5942	// No changes since last InstCombine pass, exit early.
5943	if (LRT.shouldSkip(ID: &ID))
5944	return PreservedAnalyses::all();
5945
5946	auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
5947	auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
5948	auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
5949	auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
5950	auto &TTI = AM.getResult<TargetIRAnalysis>(IR&: F);
5951
5952	auto *AA = &AM.getResult<AAManager>(IR&: F);
5953	auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F);
5954	ProfileSummaryInfo *PSI =
5955	MAMProxy.getCachedResult<ProfileSummaryAnalysis>(IR&: *F.getParent());
5956	auto *BFI = (PSI && PSI->hasProfileSummary()) ?
5957	&AM.getResult<BlockFrequencyAnalysis>(IR&: F) : nullptr;
5958	auto *BPI = AM.getCachedResult<BranchProbabilityAnalysis>(IR&: F);
5959
5960	if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
5961	BFI, BPI, PSI, Opts: Options)) {
5962	// No changes, all analyses are preserved.
5963	LRT.update(ID: &ID, /Changed=/false);
5964	return PreservedAnalyses::all();
5965	}
5966
5967	// Mark all the analyses that instcombine updates as preserved.
5968	PreservedAnalyses PA;
5969	LRT.update(ID: &ID, /Changed=/true);
5970	PA.preserve<LastRunTrackingAnalysis>();
5971	PA.preserveSet<CFGAnalyses>();
5972	return PA;
5973	}
5974
5975	void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
5976	AU.setPreservesCFG();
5977	AU.addRequired<AAResultsWrapperPass>();
5978	AU.addRequired<AssumptionCacheTracker>();
5979	AU.addRequired<TargetLibraryInfoWrapperPass>();
5980	AU.addRequired<TargetTransformInfoWrapperPass>();
5981	AU.addRequired<DominatorTreeWrapperPass>();
5982	AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
5983	AU.addPreserved<DominatorTreeWrapperPass>();
5984	AU.addPreserved<AAResultsWrapperPass>();
5985	AU.addPreserved<BasicAAWrapperPass>();
5986	AU.addPreserved<GlobalsAAWrapperPass>();
5987	AU.addRequired<ProfileSummaryInfoWrapperPass>();
5988	LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
5989	}
5990
5991	bool InstructionCombiningPass::runOnFunction(Function &F) {
5992	if (skipFunction(F))
5993	return false;
5994
5995	// Required analyses.
5996	auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
5997	auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
5998	auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
5999	auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
6000	auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6001	auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
6002
6003	// Optional analyses.
6004	ProfileSummaryInfo *PSI =
6005	&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
6006	BlockFrequencyInfo *BFI =
6007	(PSI && PSI->hasProfileSummary()) ?
6008	&getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
6009	nullptr;
6010	BranchProbabilityInfo BPI = nullptr*;
6011	if (auto *WrapperPass =
6012	getAnalysisIfAvailable<BranchProbabilityInfoWrapperPass>())
6013	BPI = &WrapperPass->getBPI();
6014
6015	return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
6016	BFI, BPI, PSI, Opts: InstCombineOptions ());
6017	}
6018
6019	char InstructionCombiningPass::ID = `0`;
6020
6021	InstructionCombiningPass::InstructionCombiningPass() : FunctionPass (ID) {
6022	initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry());
6023	}
6024
6025	INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine",
6026	"Combine redundant instructions", false, false)
6027	INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
6028	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
6029	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
6030	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
6031	INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
6032	INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
6033	INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
6034	INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
6035	INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
6036	INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
6037	"Combine redundant instructions", false, false)
6038
6039	// Initialization Routines
6040	void llvm::initializeInstCombine(PassRegistry &Registry) {
6041	initializeInstructionCombiningPassPass(Registry);
6042	}
6043
6044	FunctionPass *llvm::createInstructionCombiningPass() {
6045	return new InstructionCombiningPass ();
6046	}
6047

Browse the source code of llvm_projects/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp