InstCombineVectorOps.cpp source code [llvm_projects/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp]

1	//===- InstCombineVectorOps.cpp -------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements instcombine for ExtractElement, InsertElement and
10	// ShuffleVector.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "InstCombineInternal.h"
15	#include "llvm/ADT/APInt.h"
16	#include "llvm/ADT/ArrayRef.h"
17	#include "llvm/ADT/DenseMap.h"
18	#include "llvm/ADT/STLExtras.h"
19	#include "llvm/ADT/SmallBitVector.h"
20	#include "llvm/ADT/SmallVector.h"
21	#include "llvm/ADT/Statistic.h"
22	#include "llvm/Analysis/InstructionSimplify.h"
23	#include "llvm/Analysis/VectorUtils.h"
24	#include "llvm/IR/BasicBlock.h"
25	#include "llvm/IR/Constant.h"
26	#include "llvm/IR/Constants.h"
27	#include "llvm/IR/DerivedTypes.h"
28	#include "llvm/IR/InstrTypes.h"
29	#include "llvm/IR/Instruction.h"
30	#include "llvm/IR/Instructions.h"
31	#include "llvm/IR/Operator.h"
32	#include "llvm/IR/PatternMatch.h"
33	#include "llvm/IR/Type.h"
34	#include "llvm/IR/User.h"
35	#include "llvm/IR/Value.h"
36	#include "llvm/Support/Casting.h"
37	#include "llvm/Support/ErrorHandling.h"
38	#include "llvm/Transforms/InstCombine/InstCombiner.h"
39	#include <cassert>
40	#include <cstdint>
41	#include <iterator>
42	#include <utility>
43
44	#define DEBUG_TYPE "instcombine"
45
46	using namespace llvm;
47	using namespace PatternMatch;
48
49	STATISTIC(NumAggregateReconstructionsSimplified,
50	"Number of aggregate reconstructions turned into reuse of the "
51	"original aggregate");
52
53	/// Return true if the value is cheaper to scalarize than it is to leave as a
54	/// vector operation. If the extract index \p EI is a constant integer then
55	/// some operations may be cheap to scalarize.
56	///
57	/// FIXME: It's possible to create more instructions than previously existed.
58	static bool cheapToScalarize(Value V, Value EI) {
59	ConstantInt *CEI = dyn_cast<ConstantInt>(Val: EI);
60
61	// If we can pick a scalar constant value out of a vector, that is free.
62	if (auto *C = dyn_cast<Constant>(Val: V))
63	return CEI \|\| C->getSplatValue();
64
65	if (CEI && match(V, P: m_Intrinsic<Intrinsic::stepvector>())) {
66	ElementCount EC = cast<VectorType>(Val: V->getType())->getElementCount();
67	// Index needs to be lower than the minimum size of the vector, because
68	// for scalable vector, the vector size is known at run time.
69	return CEI->getValue().ult(RHS: EC.getKnownMinValue());
70	}
71
72	// An insertelement to the same constant index as our extract will simplify
73	// to the scalar inserted element. An insertelement to a different constant
74	// index is irrelevant to our extract.
75	if (match(V, P: m_InsertElt(Val: m_Value(), Elt: m_Value(), Idx: m_ConstantInt())))
76	return CEI;
77
78	if (match(V, P: m_OneUse(SubPattern: m_Load(Op: m_Value()))))
79	return true;
80
81	if (match(V, P: m_OneUse(SubPattern: m_UnOp())))
82	return true;
83
84	Value V0, V1;
85	if (match(V, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: V0), R: m_Value(V&: V1)))))
86	if (cheapToScalarize(V: V0, EI) \|\| cheapToScalarize(V: V1, EI))
87	return true;
88
89	CmpPredicate UnusedPred;
90	if (match(V, P: m_OneUse(SubPattern: m_Cmp(Pred&: UnusedPred, L: m_Value(V&: V0), R: m_Value(V&: V1)))))
91	if (cheapToScalarize(V: V0, EI) \|\| cheapToScalarize(V: V1, EI))
92	return true;
93
94	return false;
95	}
96
97	// If we have a PHI node with a vector type that is only used to feed
98	// itself and be an operand of extractelement at a constant location,
99	// try to replace the PHI of the vector type with a PHI of a scalar type.
100	Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
101	PHINode *PN) {
102	SmallVector<Instruction *, `2`> Extracts;
103	// The users we want the PHI to have are:
104	// 1) The EI ExtractElement (we already know this)
105	// 2) Possibly more ExtractElements with the same index.
106	// 3) Another operand, which will feed back into the PHI.
107	Instruction PHIUser = nullptr*;
108	for (auto *U : PN->users()) {
109	if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(Val: U)) {
110	if (EI.getIndexOperand() == EU->getIndexOperand())
111	Extracts.push_back(Elt: EU);
112	else
113	return nullptr;
114	} else if (!PHIUser) {
115	PHIUser = cast<Instruction>(Val: U);
116	} else {
117	return nullptr;
118	}
119	}
120
121	if (!PHIUser)
122	return nullptr;
123
124	// Verify that this PHI user has one use, which is the PHI itself,
125	// and that it is a binary operation which is cheap to scalarize.
126	// otherwise return nullptr.
127	if (!PHIUser->hasOneUse() \|\| !(PHIUser->user_back() == PN) \|\|
128	!(isa<BinaryOperator>(Val: PHIUser)) \|\|
129	!cheapToScalarize(V: PHIUser, EI: EI.getIndexOperand()))
130	return nullptr;
131
132	// Create a scalar PHI node that will replace the vector PHI node
133	// just before the current PHI node.
134	PHINode *scalarPHI = cast<PHINode>(Val: InsertNewInstWith(
135	New: PHINode::Create(Ty: EI.getType(), NumReservedValues: PN->getNumIncomingValues(), NameStr: ""), Old: PN->getIterator()));
136	// Scalarize each PHI operand.
137	for (unsigned i = `0`; i < PN->getNumIncomingValues(); i++) {
138	Value *PHIInVal = PN->getIncomingValue(i);
139	BasicBlock *inBB = PN->getIncomingBlock(i);
140	Value *Elt = EI.getIndexOperand();
141	// If the operand is the PHI induction variable:
142	if (PHIInVal == PHIUser) {
143	// Scalarize the binary operation. One operand is the
144	// scalar PHI, and the other is extracted from the other
145	// vector operand.
146	BinaryOperator *B0 = cast<BinaryOperator>(Val: PHIUser);
147	unsigned opId = (B0->getOperand(i_nocapture: `0`) == PN) ? `1` : `0`;
148	Value *Op = InsertNewInstWith(
149	New: ExtractElementInst::Create(Vec: B0->getOperand(i_nocapture: opId), Idx: Elt,
150	NameStr: B0->getOperand(i_nocapture: opId)->getName() + ".Elt"),
151	Old: B0->getIterator());
152	// Preserve operand order for binary operation to preserve semantics of
153	// non-commutative operations.
154	Value *FirstOp = (B0->getOperand(i_nocapture: `0`) == PN) ? scalarPHI : Op;
155	Value *SecondOp = (B0->getOperand(i_nocapture: `0`) == PN) ? Op : scalarPHI;
156	Value *newPHIUser =
157	InsertNewInstWith(New: BinaryOperator::CreateWithCopiedFlags(
158	Opc: B0->getOpcode(), V1: FirstOp, V2: SecondOp, CopyO: B0),
159	Old: B0->getIterator());
160	scalarPHI->addIncoming(V: newPHIUser, BB: inBB);
161	} else {
162	// Scalarize PHI input:
163	Instruction *newEI = ExtractElementInst::Create(Vec: PHIInVal, Idx: Elt, NameStr: "");
164	// Insert the new instruction into the predecessor basic block.
165	Instruction *pos = dyn_cast<Instruction>(Val: PHIInVal);
166	BasicBlock::iterator InsertPos;
167	if (pos && !isa<PHINode>(Val: pos)) {
168	InsertPos = ++pos->getIterator();
169	} else {
170	InsertPos = inBB->getFirstInsertionPt();
171	}
172
173	InsertNewInstWith(New: newEI, Old: InsertPos);
174
175	scalarPHI->addIncoming(V: newEI, BB: inBB);
176	}
177	}
178
179	for (auto *E : Extracts) {
180	replaceInstUsesWith(I&: *E, V: scalarPHI);
181	// Add old extract to worklist for DCE.
182	addToWorklist(I: E);
183	}
184
185	return &EI;
186	}
187
188	Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
189	Value *X;
190	uint64_t ExtIndexC;
191	if (!match(V: Ext.getVectorOperand(), P: m_BitCast(Op: m_Value(V&: X))) \|\|
192	!match(V: Ext.getIndexOperand(), P: m_ConstantInt(V&: ExtIndexC)))
193	return nullptr;
194
195	ElementCount NumElts =
196	cast<VectorType>(Val: Ext.getVectorOperandType())->getElementCount();
197	Type *DestTy = Ext.getType();
198	unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
199	bool IsBigEndian = DL.isBigEndian();
200
201	// If we are casting an integer to vector and extracting a portion, that is
202	// a shift-right and truncate.
203	if (X->getType()->isIntegerTy()) {
204	assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
205	"Expected fixed vector type for bitcast from scalar integer");
206
207	// Big endian requires adjusting the extract index since MSB is at index 0.
208	// LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
209	// BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
210	if (IsBigEndian)
211	ExtIndexC = NumElts.getKnownMinValue() - `1` - ExtIndexC;
212	unsigned ShiftAmountC = ExtIndexC * DestWidth;
213	if ((!ShiftAmountC \|\|
214	isDesirableIntType(BitWidth: X->getType()->getPrimitiveSizeInBits())) &&
215	Ext.getVectorOperand()->hasOneUse()) {
216	if (ShiftAmountC)
217	X = Builder.CreateLShr(LHS: X, RHS: ShiftAmountC, Name: "extelt.offset");
218	if (DestTy->isFloatingPointTy()) {
219	Type *DstIntTy = IntegerType::getIntNTy(C&: X->getContext(), N: DestWidth);
220	Value *Trunc = Builder.CreateTrunc(V: X, DestTy: DstIntTy);
221	return new BitCastInst (Trunc, DestTy);
222	}
223	return new TruncInst (X, DestTy);
224	}
225	}
226
227	if (!X->getType()->isVectorTy())
228	return nullptr;
229
230	// If this extractelement is using a bitcast from a vector of the same number
231	// of elements, see if we can find the source element from the source vector:
232	// extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
233	auto *SrcTy = cast<VectorType>(Val: X->getType());
234	ElementCount NumSrcElts = SrcTy->getElementCount();
235	if (NumSrcElts == NumElts)
236	if (Value *Elt = findScalarElement(V: X, EltNo: ExtIndexC))
237	return new BitCastInst (Elt, DestTy);
238
239	assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
240	"Src and Dst must be the same sort of vector type");
241
242	// If the source elements are wider than the destination, try to shift and
243	// truncate a subset of scalar bits of an insert op.
244	if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
245	Value *Scalar;
246	Value *Vec;
247	uint64_t InsIndexC;
248	if (!match(V: X, P: m_InsertElt(Val: m_Value(V&: Vec), Elt: m_Value(V&: Scalar),
249	Idx: m_ConstantInt(V&: InsIndexC))))
250	return nullptr;
251
252	// The extract must be from the subset of vector elements that we inserted
253	// into. Example: if we inserted element 1 of a <2 x i64> and we are
254	// extracting an i16 (narrowing ratio = 4), then this extract must be from 1
255	// of elements 4-7 of the bitcasted vector.
256	unsigned NarrowingRatio =
257	NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
258
259	if (ExtIndexC / NarrowingRatio != InsIndexC) {
260	// Remove insertelement, if we don't use the inserted element.
261	// extractelement (bitcast (insertelement (Vec, b)), a) ->
262	// extractelement (bitcast (Vec), a)
263	// FIXME: this should be removed to SimplifyDemandedVectorElts,
264	// once scale vectors are supported.
265	if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
266	Value *NewBC = Builder.CreateBitCast(V: Vec, DestTy: Ext.getVectorOperandType());
267	return ExtractElementInst::Create(Vec: NewBC, Idx: Ext.getIndexOperand());
268	}
269	return nullptr;
270	}
271
272	// We are extracting part of the original scalar. How that scalar is
273	// inserted into the vector depends on the endian-ness. Example:
274	// Vector Byte Elt Index: 0 1 2 3 4 5 6 7
275	// +--+--+--+--+--+--+--+--+
276	// inselt <2 x i32> V, <i32> S, 1: \|V0\|V1\|V2\|V3\|S0\|S1\|S2\|S3\|
277	// extelt <4 x i16> V', 3: \| \|S2\|S3\|
278	// +--+--+--+--+--+--+--+--+
279	// If this is little-endian, S2\|S3 are the MSB of the 32-bit 'S' value.
280	// If this is big-endian, S2\|S3 are the LSB of the 32-bit 'S' value.
281	// In this example, we must right-shift little-endian. Big-endian is just a
282	// truncate.
283	unsigned Chunk = ExtIndexC % NarrowingRatio;
284	if (IsBigEndian)
285	Chunk = NarrowingRatio - `1` - Chunk;
286
287	// Bail out if this is an FP vector to FP vector sequence. That would take
288	// more instructions than we started with unless there is no shift, and it
289	// may not be handled as well in the backend.
290	bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
291	bool NeedDestBitcast = DestTy->isFloatingPointTy();
292	if (NeedSrcBitcast && NeedDestBitcast)
293	return nullptr;
294
295	unsigned SrcWidth = SrcTy->getScalarSizeInBits();
296	unsigned ShAmt = Chunk * DestWidth;
297
298	// TODO: This limitation is more strict than necessary. We could sum the
299	// number of new instructions and subtract the number eliminated to know if
300	// we can proceed.
301	if (!X->hasOneUse() \|\| !Ext.getVectorOperand()->hasOneUse())
302	if (NeedSrcBitcast \|\| NeedDestBitcast)
303	return nullptr;
304
305	if (NeedSrcBitcast) {
306	Type *SrcIntTy = IntegerType::getIntNTy(C&: Scalar->getContext(), N: SrcWidth);
307	Scalar = Builder.CreateBitCast(V: Scalar, DestTy: SrcIntTy);
308	}
309
310	if (ShAmt) {
311	// Bail out if we could end with more instructions than we started with.
312	if (!Ext.getVectorOperand()->hasOneUse())
313	return nullptr;
314	Scalar = Builder.CreateLShr(LHS: Scalar, RHS: ShAmt);
315	}
316
317	if (NeedDestBitcast) {
318	Type *DestIntTy = IntegerType::getIntNTy(C&: Scalar->getContext(), N: DestWidth);
319	return new BitCastInst (Builder.CreateTrunc(V: Scalar, DestTy: DestIntTy), DestTy);
320	}
321	return new TruncInst (Scalar, DestTy);
322	}
323
324	return nullptr;
325	}
326
327	/// Find elements of V demanded by UserInstr. If returns false, we were not able
328	/// to determine all elements.
329	static bool findDemandedEltsBySingleUser(Value V, Instruction UserInstr,
330	APInt &UnionUsedElts) {
331	unsigned VWidth = cast<FixedVectorType>(Val: V->getType())->getNumElements();
332
333	switch (UserInstr->getOpcode()) {
334	case Instruction::ExtractElement: {
335	ExtractElementInst *EEI = cast<ExtractElementInst>(Val: UserInstr);
336	assert(EEI->getVectorOperand() == V);
337	ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(Val: EEI->getIndexOperand());
338	if (EEIIndexC && EEIIndexC->getValue().ult(RHS: VWidth)) {
339	UnionUsedElts.setBit(EEIIndexC->getZExtValue());
340	return true;
341	}
342	break;
343	}
344	case Instruction::ShuffleVector: {
345	ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(Val: UserInstr);
346	unsigned MaskNumElts =
347	cast<FixedVectorType>(Val: UserInstr->getType())->getNumElements();
348
349	for (auto I : llvm::seq(Size: MaskNumElts)) {
350	unsigned MaskVal = Shuffle->getMaskValue(Elt: I);
351	if (MaskVal == -`1u` \|\| MaskVal >= `2` * VWidth)
352	continue;
353	if (Shuffle->getOperand(i_nocapture: `0`) == V && (MaskVal < VWidth))
354	UnionUsedElts.setBit(MaskVal);
355	if (Shuffle->getOperand(i_nocapture: `1`) == V &&
356	((MaskVal >= VWidth) && (MaskVal < `2` * VWidth)))
357	UnionUsedElts.setBit(MaskVal - VWidth);
358	}
359	return true;
360	}
361	default:
362	break;
363	}
364
365	return false;
366	}
367
368	/// Find union of elements of V demanded by all its users.
369	/// If it is known by querying findDemandedEltsBySingleUser that
370	/// no user demands an element of V, then the corresponding bit
371	/// remains unset in the returned value.
372	static APInt findDemandedEltsByAllUsers(Value *V) {
373	unsigned VWidth = cast<FixedVectorType>(Val: V->getType())->getNumElements();
374
375	APInt UnionUsedElts(VWidth, `0`);
376	for (const Use &U : V->uses()) {
377	if (Instruction *I = dyn_cast<Instruction>(Val: U.getUser())) {
378	if (!findDemandedEltsBySingleUser(V, UserInstr: I, UnionUsedElts))
379	return APInt::getAllOnes(numBits: VWidth);
380	} else {
381	UnionUsedElts = APInt::getAllOnes(numBits: VWidth);
382	break;
383	}
384
385	if (UnionUsedElts.isAllOnes())
386	break;
387	}
388
389	return UnionUsedElts;
390	}
391
392	/// Given a constant index for a extractelement or insertelement instruction,
393	/// return it with the canonical type if it isn't already canonical. We
394	/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
395	/// matter, we just want a consistent type to simplify CSE.
396	static ConstantInt getPreferredVectorIndex(ConstantInt IndexC) {
397	const unsigned IndexBW = IndexC->getBitWidth();
398	if (IndexBW == `64` \|\| IndexC->getValue().getActiveBits() > `64`)
399	return nullptr;
400	return ConstantInt::get(Context&: IndexC->getContext(),
401	V: IndexC->getValue().zextOrTrunc(width: `64`));
402	}
403
404	Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
405	Value *SrcVec = EI.getVectorOperand();
406	Value *Index = EI.getIndexOperand();
407	if (Value *V = simplifyExtractElementInst(Vec: SrcVec, Idx: Index,
408	Q: SQ.getWithInstruction(I: &EI)))
409	return replaceInstUsesWith(I&: EI, V);
410
411	// extractelt (select %x, %vec1, %vec2), %const ->
412	// select %x, %vec1[%const], %vec2[%const]
413	// TODO: Support constant folding of multiple select operands:
414	// extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)
415	// If the extractelement will for instance try to do out of bounds accesses
416	// because of the values of %c1 and/or %c2, the sequence could be optimized
417	// early. This is currently not possible because constant folding will reach
418	// an unreachable assertion if it doesn't find a constant operand.
419	if (SelectInst *SI = dyn_cast<SelectInst>(Val: EI.getVectorOperand()))
420	if (SI->getCondition()->getType()->isIntegerTy() &&
421	isa<Constant>(Val: EI.getIndexOperand()))
422	if (Instruction *R = FoldOpIntoSelect(Op&: EI, SI))
423	return R;
424
425	// If extracting a specified index from the vector, see if we can recursively
426	// find a previously computed scalar that was inserted into the vector.
427	auto *IndexC = dyn_cast<ConstantInt>(Val: Index);
428	bool HasKnownValidIndex = false;
429	if (IndexC) {
430	// Canonicalize type of constant indices to i64 to simplify CSE
431	if (auto *NewIdx = getPreferredVectorIndex(IndexC))
432	return replaceOperand(I&: EI, OpNum: `1`, V: NewIdx);
433
434	ElementCount EC = EI.getVectorOperandType()->getElementCount();
435	unsigned NumElts = EC.getKnownMinValue();
436	HasKnownValidIndex = IndexC->getValue().ult(RHS: NumElts);
437
438	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: SrcVec)) {
439	Intrinsic::ID IID = II->getIntrinsicID();
440	// Index needs to be lower than the minimum size of the vector, because
441	// for scalable vector, the vector size is known at run time.
442	if (IID == Intrinsic::stepvector && IndexC->getValue().ult(RHS: NumElts)) {
443	Type *Ty = EI.getType();
444	unsigned BitWidth = Ty->getIntegerBitWidth();
445	Value *Idx;
446	// Return index when its value does not exceed the allowed limit
447	// for the element type of the vector.
448	// TODO: Truncate out-of-range values.
449	if (IndexC->getValue().getActiveBits() <= BitWidth)
450	Idx = ConstantInt::get(Ty, V: IndexC->getValue().zextOrTrunc(width: BitWidth));
451	else
452	return nullptr;
453	return replaceInstUsesWith(I&: EI, V: Idx);
454	}
455	}
456
457	// InstSimplify should handle cases where the index is invalid.
458	// For fixed-length vector, it's invalid to extract out-of-range element.
459	if (!EC.isScalable() && IndexC->getValue().uge(RHS: NumElts))
460	return nullptr;
461
462	if (Instruction *I = foldBitcastExtElt(Ext&: EI))
463	return I;
464
465	// If there's a vector PHI feeding a scalar use through this extractelement
466	// instruction, try to scalarize the PHI.
467	if (auto *Phi = dyn_cast<PHINode>(Val: SrcVec))
468	if (Instruction *ScalarPHI = scalarizePHI(EI, PN: Phi))
469	return ScalarPHI;
470	}
471
472	// If SrcVec is a subvector starting at index 0, extract from the
473	// wider source vector
474	Value *V;
475	if (match(V: SrcVec,
476	P: m_Intrinsic<Intrinsic::vector_extract>(Op0: m_Value(V), Op1: m_Zero())))
477	return ExtractElementInst::Create(Vec: V, Idx: Index);
478
479	// TODO come up with a n-ary matcher that subsumes both unary and
480	// binary matchers.
481	UnaryOperator *UO;
482	if (match(V: SrcVec, P: m_UnOp(I&: UO)) && cheapToScalarize(V: SrcVec, EI: Index)) {
483	// extelt (unop X), Index --> unop (extelt X, Index)
484	Value *X = UO->getOperand(i_nocapture: `0`);
485	Value *E = Builder.CreateExtractElement(Vec: X, Idx: Index);
486	return UnaryOperator::CreateWithCopiedFlags(Opc: UO->getOpcode(), V: E, CopyO: UO);
487	}
488
489	// If the binop is not speculatable, we cannot hoist the extractelement if
490	// it may make the operand poison.
491	BinaryOperator *BO;
492	if (match(V: SrcVec, P: m_BinOp(I&: BO)) && cheapToScalarize(V: SrcVec, EI: Index) &&
493	(HasKnownValidIndex \|\|
494	isSafeToSpeculativelyExecuteWithVariableReplaced(I: BO))) {
495	// extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
496	Value X = BO->getOperand(i_nocapture: `0`), Y = BO->getOperand(i_nocapture: `1`);
497	Value *E0 = Builder.CreateExtractElement(Vec: X, Idx: Index);
498	Value *E1 = Builder.CreateExtractElement(Vec: Y, Idx: Index);
499	return BinaryOperator::CreateWithCopiedFlags(Opc: BO->getOpcode(), V1: E0, V2: E1, CopyO: BO);
500	}
501
502	Value X, Y;
503	CmpPredicate Pred;
504	if (match(V: SrcVec, P: m_Cmp(Pred, L: m_Value(V&: X), R: m_Value(V&: Y))) &&
505	cheapToScalarize(V: SrcVec, EI: Index)) {
506	// extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
507	Value *E0 = Builder.CreateExtractElement(Vec: X, Idx: Index);
508	Value *E1 = Builder.CreateExtractElement(Vec: Y, Idx: Index);
509	CmpInst *SrcCmpInst = cast<CmpInst>(Val: SrcVec);
510	return CmpInst::CreateWithCopiedFlags(Op: SrcCmpInst->getOpcode(), Pred, S1: E0, S2: E1,
511	FlagsSource: SrcCmpInst);
512	}
513
514	if (auto *I = dyn_cast<Instruction>(Val: SrcVec)) {
515	if (auto *IE = dyn_cast<InsertElementInst>(Val: I)) {
516	// instsimplify already handled the case where the indices are constants
517	// and equal by value, if both are constants, they must not be the same
518	// value, extract from the pre-inserted value instead.
519	if (isa<Constant>(Val: IE->getOperand(i_nocapture: `2`)) && IndexC)
520	return replaceOperand(I&: EI, OpNum: `0`, V: IE->getOperand(i_nocapture: `0`));
521	} else if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: I)) {
522	auto *VecType = cast<VectorType>(Val: GEP->getType());
523	ElementCount EC = VecType->getElementCount();
524	uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : `0`;
525	if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
526	// Find out why we have a vector result - these are a few examples:
527	// 1. We have a scalar pointer and a vector of indices, or
528	// 2. We have a vector of pointers and a scalar index, or
529	// 3. We have a vector of pointers and a vector of indices, etc.
530	// Here we only consider combining when there is exactly one vector
531	// operand, since the optimization is less obviously a win due to
532	// needing more than one extractelements.
533
534	unsigned VectorOps =
535	llvm::count_if(Range: GEP->operands(), P: [](const Value *V) {
536	return isa<VectorType>(Val: V->getType());
537	});
538	if (VectorOps == `1`) {
539	Value *NewPtr = GEP->getPointerOperand();
540	if (isa<VectorType>(Val: NewPtr->getType()))
541	NewPtr = Builder.CreateExtractElement(Vec: NewPtr, Idx: IndexC);
542
543	SmallVector<Value *> NewOps;
544	for (unsigned I = `1`; I != GEP->getNumOperands(); ++I) {
545	Value *Op = GEP->getOperand(i_nocapture: I);
546	if (isa<VectorType>(Val: Op->getType()))
547	NewOps.push_back(Elt: Builder.CreateExtractElement(Vec: Op, Idx: IndexC));
548	else
549	NewOps.push_back(Elt: Op);
550	}
551
552	GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
553	PointeeType: GEP->getSourceElementType(), Ptr: NewPtr, IdxList: NewOps);
554	NewGEP->setNoWrapFlags(GEP->getNoWrapFlags());
555	return NewGEP;
556	}
557	}
558	} else if (auto *SVI = dyn_cast<ShuffleVectorInst>(Val: I)) {
559	int SplatIndex = getSplatIndex(Mask: SVI->getShuffleMask());
560	// We know the all-0 splat must be reading from the first operand, even
561	// in the case of scalable vectors (vscale is always > 0).
562	if (SplatIndex == `0`)
563	return ExtractElementInst::Create(Vec: SVI->getOperand(i_nocapture: `0`),
564	Idx: Builder.getInt64(C: `0`));
565
566	if (isa<FixedVectorType>(Val: SVI->getType())) {
567	std::optional<int> SrcIdx;
568	// getSplatIndex returns -1 to mean not-found.
569	if (SplatIndex != -`1`)
570	SrcIdx = SplatIndex;
571	else if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: Index))
572	SrcIdx = SVI->getMaskValue(Elt: CI->getZExtValue());
573
574	if (SrcIdx) {
575	Value *Src;
576	unsigned LHSWidth =
577	cast<FixedVectorType>(Val: SVI->getOperand(i_nocapture: `0`)->getType())
578	->getNumElements();
579
580	if (*SrcIdx < `0`)
581	return replaceInstUsesWith(I&: EI, V: PoisonValue::get(T: EI.getType()));
582	if (SrcIdx < (int*)LHSWidth)
583	Src = SVI->getOperand(i_nocapture: `0`);
584	else {
585	*SrcIdx -= LHSWidth;
586	Src = SVI->getOperand(i_nocapture: `1`);
587	}
588	Type *Int64Ty = Type::getInt64Ty(C&: EI.getContext());
589	return ExtractElementInst::Create(
590	Vec: Src, Idx: ConstantInt::get(Ty: Int64Ty, V: SrcIdx, IsSigned: false*));
591	}
592	}
593	} else if (auto *CI = dyn_cast<CastInst>(Val: I)) {
594	// Canonicalize extractelement(cast) -> cast(extractelement).
595	// Bitcasts can change the number of vector elements, and they cost
596	// nothing.
597	// If the CI has only one use, but that use is inside a loop, this
598	// canonicalization is not profitable because it would turn a vector
599	// operation into scalar operations inside the loop. Apply the transform
600	// when:
601	// - the index is constant and CI has one use, or
602	// - the CI and EI are in the same basic block, so the cast won't be sunk
603	// into a loop.
604	if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast) &&
605	(EI.getParent() == CI->getParent() \|\| isa<ConstantInt>(Val: Index))) {
606	Value *EE = Builder.CreateExtractElement(Vec: CI->getOperand(i_nocapture: `0`), Idx: Index);
607	return CastInst::Create(CI->getOpcode(), S: EE, Ty: EI.getType());
608	}
609	}
610	}
611
612	// Run demanded elements after other transforms as this can drop flags on
613	// binops. If there's two paths to the same final result, we prefer the
614	// one which doesn't force us to drop flags.
615	if (IndexC) {
616	ElementCount EC = EI.getVectorOperandType()->getElementCount();
617	unsigned NumElts = EC.getKnownMinValue();
618	// This instruction only demands the single element from the input vector.
619	// Skip for scalable type, the number of elements is unknown at
620	// compile-time.
621	if (!EC.isScalable() && NumElts != `1`) {
622	// If the input vector has a single use, simplify it based on this use
623	// property.
624	if (SrcVec->hasOneUse()) {
625	APInt PoisonElts(NumElts, `0`);
626	APInt DemandedElts(NumElts, `0`);
627	DemandedElts.setBit(IndexC->getZExtValue());
628	if (Value *V =
629	SimplifyDemandedVectorElts(V: SrcVec, DemandedElts, PoisonElts))
630	return replaceOperand(I&: EI, OpNum: `0`, V);
631	} else {
632	// If the input vector has multiple uses, simplify it based on a union
633	// of all elements used.
634	APInt DemandedElts = findDemandedEltsByAllUsers(V: SrcVec);
635	if (!DemandedElts.isAllOnes()) {
636	APInt PoisonElts(NumElts, `0`);
637	if (Value *V = SimplifyDemandedVectorElts(
638	V: SrcVec, DemandedElts, PoisonElts, Depth: `0` / Depth /,
639	AllowMultipleUsers: true / AllowMultipleUsers /)) {
640	if (V != SrcVec) {
641	Worklist.addValue(V: SrcVec);
642	SrcVec->replaceAllUsesWith(V);
643	return &EI;
644	}
645	}
646	}
647	}
648	}
649	}
650	return nullptr;
651	}
652
653	/// If V is a shuffle of values that ONLY returns elements from either LHS or
654	/// RHS, return the shuffle mask and true. Otherwise, return false.
655	static bool collectSingleShuffleElements(Value V, Value LHS, Value *RHS,
656	SmallVectorImpl<int> &Mask) {
657	assert(LHS->getType() == RHS->getType() &&
658	"Invalid CollectSingleShuffleElements");
659	unsigned NumElts = cast<FixedVectorType>(Val: V->getType())->getNumElements();
660
661	if (match(V, P: m_Poison())) {
662	Mask.assign(NumElts, Elt: -`1`);
663	return true;
664	}
665
666	if (V == LHS) {
667	for (unsigned i = `0`; i != NumElts; ++i)
668	Mask.push_back(Elt: i);
669	return true;
670	}
671
672	if (V == RHS) {
673	for (unsigned i = `0`; i != NumElts; ++i)
674	Mask.push_back(Elt: i + NumElts);
675	return true;
676	}
677
678	if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(Val: V)) {
679	// If this is an insert of an extract from some other vector, include it.
680	Value *VecOp = IEI->getOperand(i_nocapture: `0`);
681	Value *ScalarOp = IEI->getOperand(i_nocapture: `1`);
682	Value *IdxOp = IEI->getOperand(i_nocapture: `2`);
683
684	if (!isa<ConstantInt>(Val: IdxOp))
685	return false;
686	unsigned InsertedIdx = cast<ConstantInt>(Val: IdxOp)->getZExtValue();
687
688	if (isa<PoisonValue>(Val: ScalarOp)) { // inserting poison into vector.
689	// We can handle this if the vector we are inserting into is
690	// transitively ok.
691	if (collectSingleShuffleElements(V: VecOp, LHS, RHS, Mask)) {
692	// If so, update the mask to reflect the inserted poison.
693	Mask [InsertedIdx] = -`1`;
694	return true;
695	}
696	} else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(Val: ScalarOp)){
697	if (isa<ConstantInt>(Val: EI->getOperand(i_nocapture: `1`))) {
698	unsigned ExtractedIdx =
699	cast<ConstantInt>(Val: EI->getOperand(i_nocapture: `1`))->getZExtValue();
700	unsigned NumLHSElts =
701	cast<FixedVectorType>(Val: LHS->getType())->getNumElements();
702
703	// This must be extracting from either LHS or RHS.
704	if (EI->getOperand(i_nocapture: `0`) == LHS \|\| EI->getOperand(i_nocapture: `0`) == RHS) {
705	// We can handle this if the vector we are inserting into is
706	// transitively ok.
707	if (collectSingleShuffleElements(V: VecOp, LHS, RHS, Mask)) {
708	// If so, update the mask to reflect the inserted value.
709	if (EI->getOperand(i_nocapture: `0`) == LHS) {
710	Mask [InsertedIdx % NumElts] = ExtractedIdx;
711	} else {
712	assert(EI->getOperand(`0`) == RHS);
713	Mask [InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
714	}
715	return true;
716	}
717	}
718	}
719	}
720	}
721
722	return false;
723	}
724
725	/// If we have insertion into a vector that is wider than the vector that we
726	/// are extracting from, try to widen the source vector to allow a single
727	/// shufflevector to replace one or more insert/extract pairs.
728	static bool replaceExtractElements(InsertElementInst *InsElt,
729	ExtractElementInst *ExtElt,
730	InstCombinerImpl &IC) {
731	auto *InsVecType = cast<FixedVectorType>(Val: InsElt->getType());
732	auto *ExtVecType = cast<FixedVectorType>(Val: ExtElt->getVectorOperandType());
733	unsigned NumInsElts = InsVecType->getNumElements();
734	unsigned NumExtElts = ExtVecType->getNumElements();
735
736	// The inserted-to vector must be wider than the extracted-from vector.
737	if (InsVecType->getElementType() != ExtVecType->getElementType() \|\|
738	NumExtElts >= NumInsElts)
739	return false;
740
741	Value *ExtVecOp = ExtElt->getVectorOperand();
742	// Bail out on constant vectors.
743	if (isa<ConstantData>(Val: ExtVecOp))
744	return false;
745
746	// Create a shuffle mask to widen the extended-from vector using poison
747	// values. The mask selects all of the values of the original vector followed
748	// by as many poison values as needed to create a vector of the same length
749	// as the inserted-to vector.
750	SmallVector<int, `16`> ExtendMask;
751	for (unsigned i = `0`; i < NumExtElts; ++i)
752	ExtendMask.push_back(Elt: i);
753	for (unsigned i = NumExtElts; i < NumInsElts; ++i)
754	ExtendMask.push_back(Elt: -`1`);
755
756	auto *ExtVecOpInst = dyn_cast<Instruction>(Val: ExtVecOp);
757	BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(Val: ExtVecOpInst))
758	? ExtVecOpInst->getParent()
759	: ExtElt->getParent();
760
761	// TODO: This restriction matches the basic block check below when creating
762	// new extractelement instructions. If that limitation is removed, this one
763	// could also be removed. But for now, we just bail out to ensure that we
764	// will replace the extractelement instruction that is feeding our
765	// insertelement instruction. This allows the insertelement to then be
766	// replaced by a shufflevector. If the insertelement is not replaced, we can
767	// induce infinite looping because there's an optimization for extractelement
768	// that will delete our widening shuffle. This would trigger another attempt
769	// here to create that shuffle, and we spin forever.
770	if (InsertionBlock != InsElt->getParent())
771	return false;
772
773	// TODO: This restriction matches the check in visitInsertElementInst() and
774	// prevents an infinite loop caused by not turning the extract/insert pair
775	// into a shuffle. We really should not need either check, but we're lacking
776	// folds for shufflevectors because we're afraid to generate shuffle masks
777	// that the backend can't handle.
778	if (InsElt->hasOneUse() && isa<InsertElementInst>(Val: InsElt->user_back()))
779	return false;
780
781	auto WideVec = new* ShuffleVectorInst (ExtVecOp, ExtendMask);
782
783	// Insert the new shuffle after the vector operand of the extract is defined
784	// (as long as it's not a PHI) or at the start of the basic block of the
785	// extract, so any subsequent extracts in the same basic block can use it.
786	// TODO: Insert before the earliest ExtractElementInst that is replaced.
787	if (ExtVecOpInst && !isa<PHINode>(Val: ExtVecOpInst))
788	WideVec->insertAfter(InsertPos: ExtVecOpInst->getIterator());
789	else
790	IC.InsertNewInstWith(New: WideVec, Old: ExtElt->getParent()->getFirstInsertionPt());
791
792	// Replace extracts from the original narrow vector with extracts from the new
793	// wide vector.
794	for (User *U : ExtVecOp->users()) {
795	ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(Val: U);
796	if (!OldExt \|\| OldExt->getParent() != WideVec->getParent())
797	continue;
798	auto *NewExt = ExtractElementInst::Create(Vec: WideVec, Idx: OldExt->getOperand(i_nocapture: `1`));
799	IC.InsertNewInstWith(New: NewExt, Old: OldExt->getIterator());
800	IC.replaceInstUsesWith(I&: *OldExt, V: NewExt);
801	// Add the old extracts to the worklist for DCE. We can't remove the
802	// extracts directly, because they may still be used by the calling code.
803	IC.addToWorklist(I: OldExt);
804	}
805
806	return true;
807	}
808
809	/// We are building a shuffle to create V, which is a sequence of insertelement,
810	/// extractelement pairs. If PermittedRHS is set, then we must either use it or
811	/// not rely on the second vector source. Return a std::pair containing the
812	/// left and right vectors of the proposed shuffle (or 0), and set the Mask
813	/// parameter as required.
814	///
815	/// Note: we intentionally don't try to fold earlier shuffles since they have
816	/// often been chosen carefully to be efficiently implementable on the target.
817	using ShuffleOps = std::pair<Value , Value >;
818
819	static ShuffleOps collectShuffleElements(Value V, SmallVectorImpl<int*> &Mask,
820	Value *PermittedRHS,
821	InstCombinerImpl &IC, bool &Rerun) {
822	assert(V->getType()->isVectorTy() && "Invalid shuffle!");
823	unsigned NumElts = cast<FixedVectorType>(Val: V->getType())->getNumElements();
824
825	if (match(V, P: m_Poison())) {
826	Mask.assign(NumElts, Elt: -`1`);
827	return std::make_pair(
828	x: PermittedRHS ? PoisonValue::get(T: PermittedRHS->getType()) : V, y: nullptr);
829	}
830
831	if (isa<ConstantAggregateZero>(Val: V)) {
832	Mask.assign(NumElts, Elt: `0`);
833	return std::make_pair(x&: V, y: nullptr);
834	}
835
836	if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(Val: V)) {
837	// If this is an insert of an extract from some other vector, include it.
838	Value *VecOp = IEI->getOperand(i_nocapture: `0`);
839	Value *ScalarOp = IEI->getOperand(i_nocapture: `1`);
840	Value *IdxOp = IEI->getOperand(i_nocapture: `2`);
841
842	if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(Val: ScalarOp)) {
843	if (isa<ConstantInt>(Val: EI->getOperand(i_nocapture: `1`)) && isa<ConstantInt>(Val: IdxOp)) {
844	unsigned ExtractedIdx =
845	cast<ConstantInt>(Val: EI->getOperand(i_nocapture: `1`))->getZExtValue();
846	unsigned InsertedIdx = cast<ConstantInt>(Val: IdxOp)->getZExtValue();
847
848	// Either the extracted from or inserted into vector must be RHSVec,
849	// otherwise we'd end up with a shuffle of three inputs.
850	if (EI->getOperand(i_nocapture: `0`) == PermittedRHS \|\| PermittedRHS == nullptr) {
851	Value *RHS = EI->getOperand(i_nocapture: `0`);
852	ShuffleOps LR = collectShuffleElements(V: VecOp, Mask, PermittedRHS: RHS, IC, Rerun);
853	assert(LR.second == nullptr \|\| LR.second == RHS);
854
855	if (LR.first->getType() != RHS->getType()) {
856	// Although we are giving up for now, see if we can create extracts
857	// that match the inserts for another round of combining.
858	if (replaceExtractElements(InsElt: IEI, ExtElt: EI, IC))
859	Rerun = true;
860
861	// We tried our best, but we can't find anything compatible with RHS
862	// further up the chain. Return a trivial shuffle.
863	for (unsigned i = `0`; i < NumElts; ++i)
864	Mask [i] = i;
865	return std::make_pair(x&: V, y: nullptr);
866	}
867
868	unsigned NumLHSElts =
869	cast<FixedVectorType>(Val: RHS->getType())->getNumElements();
870	Mask [InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
871	return std::make_pair(x&: LR.first, y&: RHS);
872	}
873
874	if (VecOp == PermittedRHS) {
875	// We've gone as far as we can: anything on the other side of the
876	// extractelement will already have been converted into a shuffle.
877	unsigned NumLHSElts =
878	cast<FixedVectorType>(Val: EI->getOperand(i_nocapture: `0`)->getType())
879	->getNumElements();
880	for (unsigned i = `0`; i != NumElts; ++i)
881	Mask.push_back(Elt: i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
882	return std::make_pair(x: EI->getOperand(i_nocapture: `0`), y&: PermittedRHS);
883	}
884
885	// If this insertelement is a chain that comes from exactly these two
886	// vectors, return the vector and the effective shuffle.
887	if (EI->getOperand(i_nocapture: `0`)->getType() == PermittedRHS->getType() &&
888	collectSingleShuffleElements(V: IEI, LHS: EI->getOperand(i_nocapture: `0`), RHS: PermittedRHS,
889	Mask))
890	return std::make_pair(x: EI->getOperand(i_nocapture: `0`), y&: PermittedRHS);
891	}
892	}
893	}
894
895	// Otherwise, we can't do anything fancy. Return an identity vector.
896	for (unsigned i = `0`; i != NumElts; ++i)
897	Mask.push_back(Elt: i);
898	return std::make_pair(x&: V, y: nullptr);
899	}
900
901	/// Look for chain of insertvalue's that fully define an aggregate, and trace
902	/// back the values inserted, see if they are all were extractvalue'd from
903	/// the same source aggregate from the exact same element indexes.
904	/// If they were, just reuse the source aggregate.
905	/// This potentially deals with PHI indirections.
906	Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
907	InsertValueInst &OrigIVI) {
908	Type *AggTy = OrigIVI.getType();
909	unsigned NumAggElts;
910	switch (AggTy->getTypeID()) {
911	case Type::StructTyID:
912	NumAggElts = AggTy->getStructNumElements();
913	break;
914	case Type::ArrayTyID:
915	NumAggElts = AggTy->getArrayNumElements();
916	break;
917	default:
918	llvm_unreachable("Unhandled aggregate type?");
919	}
920
921	// Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
922	// to handle clang C++ exception struct (which is hardcoded as {i8, i32}),*
923	// FIXME: any interesting patterns to be caught with larger limit?
924	assert(NumAggElts > `0` && "Aggregate should have elements.");
925	if (NumAggElts > `2`)
926	return nullptr;
927
928	static constexpr auto NotFound = std::nullopt;
929	static constexpr auto FoundMismatch = nullptr;
930
931	// Try to find a value of each element of an aggregate.
932	// FIXME: deal with more complex, not one-dimensional, aggregate types
933	SmallVector<std::optional<Instruction *>, `2`> AggElts(NumAggElts, NotFound);
934
935	// Do we know values for each element of the aggregate?
936	auto KnowAllElts = [&AggElts]() {
937	return !llvm::is_contained(Range&: AggElts, Element: NotFound);
938	};
939
940	int Depth = `0`;
941
942	// Arbitrary `insertvalue` visitation depth limit. Let's be okay with
943	// every element being overwritten twice, which should never happen.
944	static const int DepthLimit = `2` * NumAggElts;
945
946	// Recurse up the chain of `insertvalue` aggregate operands until either we've
947	// reconstructed full initializer or can't visit any more `insertvalue`'s.
948	for (InsertValueInst *CurrIVI = &OrigIVI;
949	Depth < DepthLimit && CurrIVI && !KnowAllElts ();
950	CurrIVI = dyn_cast<InsertValueInst>(Val: CurrIVI->getAggregateOperand()),
951	++Depth) {
952	auto *InsertedValue =
953	dyn_cast<Instruction>(Val: CurrIVI->getInsertedValueOperand());
954	if (!InsertedValue)
955	return nullptr; // Inserted value must be produced by an instruction.
956
957	ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
958
959	// Don't bother with more than single-level aggregates.
960	if (Indices.size() != `1`)
961	return nullptr; // FIXME: deal with more complex aggregates?
962
963	// Now, we may have already previously recorded the value for this element
964	// of an aggregate. If we did, that means the CurrIVI will later be
965	// overwritten with the already-recorded value. But if not, let's record it!
966	std::optional<Instruction *> &Elt = AggElts [Indices.front()];
967	Elt = Elt.value_or(u&: InsertedValue);
968
969	// FIXME: should we handle chain-terminating undef base operand?
970	}
971
972	// Was that sufficient to deduce the full initializer for the aggregate?
973	if (!KnowAllElts ())
974	return nullptr; // Give up then.
975
976	// We now want to find the source[s] of the aggregate elements we've found.
977	// And with "source" we mean the original aggregate[s] from which
978	// the inserted elements were extracted. This may require PHI translation.
979
980	enum class AggregateDescription {
981	/// When analyzing the value that was inserted into an aggregate, we did
982	/// not manage to find defining `extractvalue` instruction to analyze.
983	NotFound,
984	/// When analyzing the value that was inserted into an aggregate, we did
985	/// manage to find defining `extractvalue` instruction[s], and everything
986	/// matched perfectly - aggregate type, element insertion/extraction index.
987	Found,
988	/// When analyzing the value that was inserted into an aggregate, we did
989	/// manage to find defining `extractvalue` instruction, but there was
990	/// a mismatch: either the source type from which the extraction was didn't
991	/// match the aggregate type into which the insertion was,
992	/// or the extraction/insertion channels mismatched,
993	/// or different elements had different source aggregates.
994	FoundMismatch
995	};
996	auto Describe = [](std::optional<Value *> SourceAggregate) {
997	if (SourceAggregate == NotFound)
998	return AggregateDescription::NotFound;
999	if (*SourceAggregate == FoundMismatch)
1000	return AggregateDescription::FoundMismatch;
1001	return AggregateDescription::Found;
1002	};
1003
1004	// If an aggregate element is defined in UseBB, we can't use it in PredBB.
1005	bool EltDefinedInUseBB = false;
1006
1007	// Given the value \p Elt that was being inserted into element \p EltIdx of an
1008	// aggregate AggTy, see if \p Elt was originally defined by an
1009	// appropriate extractvalue (same element index, same aggregate type).
1010	// If found, return the source aggregate from which the extraction was.
1011	// If \p PredBB is provided, does PHI translation of an \p Elt first.
1012	auto FindSourceAggregate =
1013	[&](Instruction Elt, unsigned* EltIdx, std::optional<BasicBlock *> UseBB,
1014	std::optional<BasicBlock > PredBB) -> std::optional<Value > {
1015	// For now(?), only deal with, at most, a single level of PHI indirection.
1016	if (UseBB && PredBB) {
1017	Elt = dyn_cast<Instruction>(Val: Elt->DoPHITranslation(CurBB: UseBB, PredBB: PredBB));
1018	if (Elt && Elt->getParent() == *UseBB)
1019	EltDefinedInUseBB = true;
1020	}
1021	// FIXME: deal with multiple levels of PHI indirection?
1022
1023	// Did we find an extraction?
1024	auto *EVI = dyn_cast_or_null<ExtractValueInst>(Val: Elt);
1025	if (!EVI)
1026	return NotFound;
1027
1028	Value *SourceAggregate = EVI->getAggregateOperand();
1029
1030	// Is the extraction from the same type into which the insertion was?
1031	if (SourceAggregate->getType() != AggTy)
1032	return FoundMismatch;
1033	// And the element index doesn't change between extraction and insertion?
1034	if (EVI->getNumIndices() != `1` \|\| EltIdx != EVI->getIndices().front())
1035	return FoundMismatch;
1036
1037	return SourceAggregate; // AggregateDescription::Found
1038	};
1039
1040	// Given elements AggElts that were constructing an aggregate OrigIVI,
1041	// see if we can find appropriate source aggregate for each of the elements,
1042	// and see it's the same aggregate for each element. If so, return it.
1043	auto FindCommonSourceAggregate =
1044	[&](std::optional<BasicBlock *> UseBB,
1045	std::optional<BasicBlock > PredBB) -> std::optional<Value > {
1046	std::optional<Value *> SourceAggregate;
1047
1048	for (auto I : enumerate(First&: AggElts)) {
1049	assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
1050	"We don't store nullptr in SourceAggregate!");
1051	assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
1052	(I.index() != `0`) &&
1053	"SourceAggregate should be valid after the first element,");
1054
1055	// For this element, is there a plausible source aggregate?
1056	// FIXME: we could special-case undef element, IFF we know that in the
1057	// source aggregate said element isn't poison.
1058	std::optional<Value *> SourceAggregateForElement =
1059	FindSourceAggregate (*I.value(), I.index(), UseBB, PredBB);
1060
1061	// Okay, what have we found? Does that correlate with previous findings?
1062
1063	// Regardless of whether or not we have previously found source
1064	// aggregate for previous elements (if any), if we didn't find one for
1065	// this element, passthrough whatever we have just found.
1066	if (Describe (SourceAggregateForElement) != AggregateDescription::Found)
1067	return SourceAggregateForElement;
1068
1069	// Okay, we have found source aggregate for this element.
1070	// Let's see what we already know from previous elements, if any.
1071	switch (Describe (SourceAggregate)) {
1072	case AggregateDescription::NotFound:
1073	// This is apparently the first element that we have examined.
1074	SourceAggregate = SourceAggregateForElement; // Record the aggregate!
1075	continue; // Great, now look at next element.
1076	case AggregateDescription::Found:
1077	// We have previously already successfully examined other elements.
1078	// Is this the same source aggregate we've found for other elements?
1079	if (SourceAggregateForElement != SourceAggregate)
1080	return FoundMismatch;
1081	continue; // Still the same aggregate, look at next element.
1082	case AggregateDescription::FoundMismatch:
1083	llvm_unreachable("Can't happen. We would have early-exited then.");
1084	};
1085	}
1086
1087	assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1088	"Must be a valid Value");
1089	return *SourceAggregate;
1090	};
1091
1092	std::optional<Value *> SourceAggregate;
1093
1094	// Can we find the source aggregate without looking at predecessors?
1095	SourceAggregate = FindCommonSourceAggregate (/UseBB=/std::nullopt,
1096	/PredBB=/std::nullopt);
1097	if (Describe (SourceAggregate) != AggregateDescription::NotFound) {
1098	if (Describe (SourceAggregate) == AggregateDescription::FoundMismatch)
1099	return nullptr; // Conflicting source aggregates!
1100	++NumAggregateReconstructionsSimplified;
1101	return replaceInstUsesWith(I&: OrigIVI, V: *SourceAggregate);
1102	}
1103
1104	// Okay, apparently we need to look at predecessors.
1105
1106	// We should be smart about picking the "use" basic block, which will be the
1107	// merge point for aggregate, where we'll insert the final PHI that will be
1108	// used instead of OrigIVI. Basic block of OrigIVI is not* the right choice.*
1109	// We should look in which blocks each of the AggElts is being defined,
1110	// they all should be defined in the same basic block.
1111	BasicBlock UseBB = nullptr*;
1112
1113	for (const std::optional<Instruction *> &I : AggElts) {
1114	BasicBlock BB = (I)->getParent();
1115	// If it's the first instruction we've encountered, record the basic block.
1116	if (!UseBB) {
1117	UseBB = BB;
1118	continue;
1119	}
1120	// Otherwise, this must be the same basic block we've seen previously.
1121	if (UseBB != BB)
1122	return nullptr;
1123	}
1124
1125	// If all* of the elements are basic-block-independent, meaning they are*
1126	// either function arguments, or constant expressions, then if we didn't
1127	// handle them without predecessor-aware handling, we won't handle them now.
1128	if (!UseBB)
1129	return nullptr;
1130
1131	// If we didn't manage to find source aggregate without looking at
1132	// predecessors, and there are no predecessors to look at, then we're done.
1133	if (pred_empty(BB: UseBB))
1134	return nullptr;
1135
1136	// Arbitrary predecessor count limit.
1137	static const int PredCountLimit = `64`;
1138
1139	// Cache the (non-uniqified!) list of predecessors in a vector,
1140	// checking the limit at the same time for efficiency.
1141	SmallVector<BasicBlock , `4`> Preds; // May have duplicates!*
1142	for (BasicBlock *Pred : predecessors(BB: UseBB)) {
1143	// Don't bother if there are too many predecessors.
1144	if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1145	return nullptr;
1146	Preds.emplace_back(Args&: Pred);
1147	}
1148
1149	// For each predecessor, what is the source aggregate,
1150	// from which all the elements were originally extracted from?
1151	// Note that we want for the map to have stable iteration order!
1152	SmallMapVector<BasicBlock , Value , `4`> SourceAggregates;
1153	bool FoundSrcAgg = false;
1154	for (BasicBlock *Pred : Preds) {
1155	std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1156	SourceAggregates.try_emplace(Key: Pred);
1157	// Did we already evaluate this predecessor?
1158	if (!IV.second)
1159	continue;
1160
1161	// Let's hope that when coming from predecessor Pred, all elements of the
1162	// aggregate produced by OrigIVI must have been originally extracted from
1163	// the same aggregate. Is that so? Can we find said original aggregate?
1164	SourceAggregate = FindCommonSourceAggregate (UseBB, Pred);
1165	if (Describe (SourceAggregate) == AggregateDescription::Found) {
1166	FoundSrcAgg = true;
1167	IV.first->second = *SourceAggregate;
1168	} else {
1169	// If UseBB is the single successor of Pred, we can add InsertValue to
1170	// Pred.
1171	auto *BI = dyn_cast<UncondBrInst>(Val: Pred->getTerminator());
1172	if (!BI)
1173	return nullptr;
1174	}
1175	}
1176
1177	if (!FoundSrcAgg)
1178	return nullptr;
1179
1180	// Do some sanity check if we need to add insertvalue into predecessors.
1181	auto OrigBB = OrigIVI.getParent();
1182	for (auto &It : SourceAggregates) {
1183	if (Describe (It.second) == AggregateDescription::Found)
1184	continue;
1185
1186	// Element is defined in UseBB, so it can't be used in predecessors.
1187	if (EltDefinedInUseBB)
1188	return nullptr;
1189
1190	// Do this transformation cross loop boundary may cause dead loop. So we
1191	// should avoid this situation. But LoopInfo is not generally available, we
1192	// must be conservative here.
1193	// If OrigIVI is in UseBB and it's the only successor of PredBB, PredBB
1194	// can't be in inner loop.
1195	if (UseBB != OrigBB)
1196	return nullptr;
1197
1198	// Avoid constructing constant aggregate because constant value may expose
1199	// more optimizations.
1200	bool ConstAgg = true;
1201	for (auto Val : AggElts) {
1202	Value Elt = (Val)->DoPHITranslation(CurBB: UseBB, PredBB: It.first);
1203	if (!isa<Constant>(Val: Elt)) {
1204	ConstAgg = false;
1205	break;
1206	}
1207	}
1208	if (ConstAgg)
1209	return nullptr;
1210	}
1211
1212	// For predecessors without appropriate source aggregate, create one in the
1213	// predecessor.
1214	for (auto &It : SourceAggregates) {
1215	if (Describe (It.second) == AggregateDescription::Found)
1216	continue;
1217
1218	BasicBlock *Pred = It.first;
1219	Builder.SetInsertPoint(Pred->getTerminator());
1220	Value *V = PoisonValue::get(T: AggTy);
1221	for (auto [Idx, Val] : enumerate(First&: AggElts)) {
1222	Value Elt = (Val)->DoPHITranslation(CurBB: UseBB, PredBB: Pred);
1223	V = Builder.CreateInsertValue(Agg: V, Val: Elt, Idxs: Idx);
1224	}
1225
1226	It.second = V;
1227	}
1228
1229	// All good! Now we just need to thread the source aggregates here.
1230	// Note that we have to insert the new PHI here, ourselves, because we can't
1231	// rely on InstCombinerImpl::run() inserting it into the right basic block.
1232	// Note that the same block can be a predecessor more than once,
1233	// and we need to preserve that invariant for the PHI node.
1234	BuilderTy::InsertPointGuard Guard(Builder);
1235	Builder.SetInsertPoint(TheBB: UseBB, IP: UseBB->getFirstNonPHIIt());
1236	auto *PHI =
1237	Builder.CreatePHI(Ty: AggTy, NumReservedValues: Preds.size(), Name: OrigIVI.getName() + ".merged");
1238	for (BasicBlock *Pred : Preds)
1239	PHI->addIncoming(V: SourceAggregates [Pred], BB: Pred);
1240
1241	++NumAggregateReconstructionsSimplified;
1242	return replaceInstUsesWith(I&: OrigIVI, V: PHI);
1243	}
1244
1245	/// Try to find redundant insertvalue instructions, like the following ones:
1246	/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1247	/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1248	/// Here the second instruction inserts values at the same indices, as the
1249	/// first one, making the first one redundant.
1250	/// It should be transformed to:
1251	/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1252	Instruction *InstCombinerImpl::visitInsertValueInst(InsertValueInst &I) {
1253	if (Value *V = simplifyInsertValueInst(
1254	Agg: I.getAggregateOperand(), Val: I.getInsertedValueOperand(), Idxs: I.getIndices(),
1255	Q: SQ.getWithInstruction(I: &I)))
1256	return replaceInstUsesWith(I, V);
1257
1258	bool IsRedundant = false;
1259	ArrayRef<unsigned int> FirstIndices = I.getIndices();
1260
1261	// If there is a chain of insertvalue instructions (each of them except the
1262	// last one has only one use and it's another insertvalue insn from this
1263	// chain), check if any of the 'children' uses the same indices as the first
1264	// instruction. In this case, the first one is redundant.
1265	Value *V = &I;
1266	unsigned Depth = `0`;
1267	while (V->hasOneUse() && Depth < `10`) {
1268	User *U = V->user_back();
1269	auto UserInsInst = dyn_cast<InsertValueInst>(Val: U);
1270	if (!UserInsInst \|\| U->getOperand(i: `0`) != V)
1271	break;
1272	if (UserInsInst->getIndices() == FirstIndices) {
1273	IsRedundant = true;
1274	break;
1275	}
1276	V = UserInsInst;
1277	Depth++;
1278	}
1279
1280	if (IsRedundant)
1281	return replaceInstUsesWith(I, V: I.getOperand(i_nocapture: `0`));
1282
1283	if (Instruction *NewI = foldAggregateConstructionIntoAggregateReuse(OrigIVI&: I))
1284	return NewI;
1285
1286	return nullptr;
1287	}
1288
1289	static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
1290	// Can not analyze scalable type, the number of elements is not a compile-time
1291	// constant.
1292	if (isa<ScalableVectorType>(Val: Shuf.getOperand(i_nocapture: `0`)->getType()))
1293	return false;
1294
1295	int MaskSize = Shuf.getShuffleMask().size();
1296	int VecSize =
1297	cast<FixedVectorType>(Val: Shuf.getOperand(i_nocapture: `0`)->getType())->getNumElements();
1298
1299	// A vector select does not change the size of the operands.
1300	if (MaskSize != VecSize)
1301	return false;
1302
1303	// Each mask element must be undefined or choose a vector element from one of
1304	// the source operands without crossing vector lanes.
1305	for (int i = `0`; i != MaskSize; ++i) {
1306	int Elt = Shuf.getMaskValue(Elt: i);
1307	if (Elt != -`1` && Elt != i && Elt != i + VecSize)
1308	return false;
1309	}
1310
1311	return true;
1312	}
1313
1314	/// Turn a chain of inserts that splats a value into an insert + shuffle:
1315	/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1316	/// shufflevector(insertelt(X, %k, 0), poison, zero)
1317	static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
1318	// We are interested in the last insert in a chain. So if this insert has a
1319	// single user and that user is an insert, bail.
1320	if (InsElt.hasOneUse() && isa<InsertElementInst>(Val: InsElt.user_back()))
1321	return nullptr;
1322
1323	VectorType *VecTy = InsElt.getType();
1324	// Can not handle scalable type, the number of elements is not a compile-time
1325	// constant.
1326	if (isa<ScalableVectorType>(Val: VecTy))
1327	return nullptr;
1328	unsigned NumElements = cast<FixedVectorType>(Val: VecTy)->getNumElements();
1329
1330	// Do not try to do this for a one-element vector, since that's a nop,
1331	// and will cause an inf-loop.
1332	if (NumElements == `1`)
1333	return nullptr;
1334
1335	Value *SplatVal = InsElt.getOperand(i_nocapture: `1`);
1336	InsertElementInst *CurrIE = &InsElt;
1337	SmallBitVector ElementPresent(NumElements, false);
1338	InsertElementInst FirstIE = nullptr*;
1339
1340	// Walk the chain backwards, keeping track of which indices we inserted into,
1341	// until we hit something that isn't an insert of the splatted value.
1342	while (CurrIE) {
1343	auto *Idx = dyn_cast<ConstantInt>(Val: CurrIE->getOperand(i_nocapture: `2`));
1344	if (!Idx \|\| CurrIE->getOperand(i_nocapture: `1`) != SplatVal)
1345	return nullptr;
1346
1347	auto *NextIE = dyn_cast<InsertElementInst>(Val: CurrIE->getOperand(i_nocapture: `0`));
1348	// Check none of the intermediate steps have any additional uses, except
1349	// for the root insertelement instruction, which can be re-used, if it
1350	// inserts at position 0.
1351	if (CurrIE != &InsElt &&
1352	(!CurrIE->hasOneUse() && (NextIE != nullptr \|\| !Idx->isZero())))
1353	return nullptr;
1354
1355	ElementPresent [Idx->getZExtValue()] = true;
1356	FirstIE = CurrIE;
1357	CurrIE = NextIE;
1358	}
1359
1360	// If this is just a single insertelement (not a sequence), we are done.
1361	if (FirstIE == &InsElt)
1362	return nullptr;
1363
1364	// If we are not inserting into a poison vector, make sure we've seen an
1365	// insert into every element.
1366	// TODO: If the base vector is not undef, it might be better to create a splat
1367	// and then a select-shuffle (blend) with the base vector.
1368	if (!match(V: FirstIE->getOperand(i_nocapture: `0`), P: m_Poison()))
1369	if (!ElementPresent.all())
1370	return nullptr;
1371
1372	// Create the insert + shuffle.
1373	Type *Int64Ty = Type::getInt64Ty(C&: InsElt.getContext());
1374	PoisonValue *PoisonVec = PoisonValue::get(T: VecTy);
1375	Constant *Zero = ConstantInt::get(Ty: Int64Ty, V: `0`);
1376	if (!cast<ConstantInt>(Val: FirstIE->getOperand(i_nocapture: `2`))->isZero())
1377	FirstIE = InsertElementInst::Create(Vec: PoisonVec, NewElt: SplatVal, Idx: Zero, NameStr: "",
1378	InsertBefore: InsElt.getIterator());
1379
1380	// Splat from element 0, but replace absent elements with poison in the mask.
1381	SmallVector<int, `16`> Mask(NumElements, `0`);
1382	for (unsigned i = `0`; i != NumElements; ++i)
1383	if (!ElementPresent [i])
1384	Mask [i] = -`1`;
1385
1386	return new ShuffleVectorInst (FirstIE, Mask);
1387	}
1388
1389	/// Try to fold an insert element into an existing splat shuffle by changing
1390	/// the shuffle's mask to include the index of this insert element.
1391	static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
1392	// Check if the vector operand of this insert is a canonical splat shuffle.
1393	auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: InsElt.getOperand(i_nocapture: `0`));
1394	if (!Shuf \|\| !Shuf->isZeroEltSplat())
1395	return nullptr;
1396
1397	// Bail out early if shuffle is scalable type. The number of elements in
1398	// shuffle mask is unknown at compile-time.
1399	if (isa<ScalableVectorType>(Val: Shuf->getType()))
1400	return nullptr;
1401
1402	// Check for a constant insertion index.
1403	uint64_t IdxC;
1404	if (!match(V: InsElt.getOperand(i_nocapture: `2`), P: m_ConstantInt(V&: IdxC)))
1405	return nullptr;
1406
1407	// Check if the splat shuffle's input is the same as this insert's scalar op.
1408	Value *X = InsElt.getOperand(i_nocapture: `1`);
1409	Value *Op0 = Shuf->getOperand(i_nocapture: `0`);
1410	if (!match(V: Op0, P: m_InsertElt(Val: m_Undef(), Elt: m_Specific(V: X), Idx: m_ZeroInt())))
1411	return nullptr;
1412
1413	// Replace the shuffle mask element at the index of this insert with a zero.
1414	// For example:
1415	// inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1416	// --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1417	unsigned NumMaskElts =
1418	cast<FixedVectorType>(Val: Shuf->getType())->getNumElements();
1419	SmallVector<int, `16`> NewMask(NumMaskElts);
1420	for (unsigned i = `0`; i != NumMaskElts; ++i)
1421	NewMask [i] = i == IdxC ? `0` : Shuf->getMaskValue(Elt: i);
1422
1423	return new ShuffleVectorInst (Op0, NewMask);
1424	}
1425
1426	/// Try to fold an extract+insert element into an existing identity shuffle by
1427	/// changing the shuffle's mask to include the index of this insert element.
1428	static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {
1429	// Check if the vector operand of this insert is an identity shuffle.
1430	auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: InsElt.getOperand(i_nocapture: `0`));
1431	if (!Shuf \|\| !match(V: Shuf->getOperand(i_nocapture: `1`), P: m_Poison()) \|\|
1432	!(Shuf->isIdentityWithExtract() \|\| Shuf->isIdentityWithPadding()))
1433	return nullptr;
1434
1435	// Bail out early if shuffle is scalable type. The number of elements in
1436	// shuffle mask is unknown at compile-time.
1437	if (isa<ScalableVectorType>(Val: Shuf->getType()))
1438	return nullptr;
1439
1440	// Check for a constant insertion index.
1441	uint64_t IdxC;
1442	if (!match(V: InsElt.getOperand(i_nocapture: `2`), P: m_ConstantInt(V&: IdxC)))
1443	return nullptr;
1444
1445	// Check if this insert's scalar op is extracted from the identity shuffle's
1446	// input vector.
1447	Value *Scalar = InsElt.getOperand(i_nocapture: `1`);
1448	Value *X = Shuf->getOperand(i_nocapture: `0`);
1449	if (!match(V: Scalar, P: m_ExtractElt(Val: m_Specific(V: X), Idx: m_SpecificInt(V: IdxC))))
1450	return nullptr;
1451
1452	// Replace the shuffle mask element at the index of this extract+insert with
1453	// that same index value.
1454	// For example:
1455	// inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1456	unsigned NumMaskElts =
1457	cast<FixedVectorType>(Val: Shuf->getType())->getNumElements();
1458	SmallVector<int, `16`> NewMask(NumMaskElts);
1459	ArrayRef<int> OldMask = Shuf->getShuffleMask();
1460	for (unsigned i = `0`; i != NumMaskElts; ++i) {
1461	if (i != IdxC) {
1462	// All mask elements besides the inserted element remain the same.
1463	NewMask [i] = OldMask [i];
1464	} else if (OldMask [i] == (int)IdxC) {
1465	// If the mask element was already set, there's nothing to do
1466	// (demanded elements analysis may unset it later).
1467	return nullptr;
1468	} else {
1469	assert(OldMask[i] == PoisonMaskElem &&
1470	"Unexpected shuffle mask element for identity shuffle");
1471	NewMask [i] = IdxC;
1472	}
1473	}
1474
1475	return new ShuffleVectorInst (X, Shuf->getOperand(i_nocapture: `1`), NewMask);
1476	}
1477
1478	/// If we have an insertelement instruction feeding into another insertelement
1479	/// and the 2nd is inserting a constant into the vector, canonicalize that
1480	/// constant insertion before the insertion of a variable:
1481	///
1482	/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1483	/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1484	///
1485	/// This has the potential of eliminating the 2nd insertelement instruction
1486	/// via constant folding of the scalar constant into a vector constant.
1487	static Instruction *hoistInsEltConst(InsertElementInst &InsElt2,
1488	InstCombiner::BuilderTy &Builder) {
1489	auto *InsElt1 = dyn_cast<InsertElementInst>(Val: InsElt2.getOperand(i_nocapture: `0`));
1490	if (!InsElt1 \|\| !InsElt1->hasOneUse())
1491	return nullptr;
1492
1493	Value X, Y;
1494	Constant *ScalarC;
1495	ConstantInt IdxC1, IdxC2;
1496	if (match(V: InsElt1->getOperand(i_nocapture: `0`), P: m_Value(V&: X)) &&
1497	match(V: InsElt1->getOperand(i_nocapture: `1`), P: m_Value(V&: Y)) && !isa<Constant>(Val: Y) &&
1498	match(V: InsElt1->getOperand(i_nocapture: `2`), P: m_ConstantInt(CI&: IdxC1)) &&
1499	match(V: InsElt2.getOperand(i_nocapture: `1`), P: m_Constant(C&: ScalarC)) &&
1500	match(V: InsElt2.getOperand(i_nocapture: `2`), P: m_ConstantInt(CI&: IdxC2)) && IdxC1 != IdxC2) {
1501	Value *NewInsElt1 = Builder.CreateInsertElement(Vec: X, NewElt: ScalarC, Idx: IdxC2);
1502	return InsertElementInst::Create(Vec: NewInsElt1, NewElt: Y, Idx: IdxC1);
1503	}
1504
1505	return nullptr;
1506	}
1507
1508	/// insertelt (shufflevector X, CVec, Mask\|insertelt X, C1, CIndex1), C, CIndex
1509	/// --> shufflevector X, CVec', Mask'
1510	static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
1511	auto *Inst = dyn_cast<Instruction>(Val: InsElt.getOperand(i_nocapture: `0`));
1512	// Bail out if the parent has more than one use. In that case, we'd be
1513	// replacing the insertelt with a shuffle, and that's not a clear win.
1514	if (!Inst \|\| !Inst->hasOneUse())
1515	return nullptr;
1516	if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: InsElt.getOperand(i_nocapture: `0`))) {
1517	// The shuffle must have a constant vector operand. The insertelt must have
1518	// a constant scalar being inserted at a constant position in the vector.
1519	Constant ShufConstVec, InsEltScalar;
1520	uint64_t InsEltIndex;
1521	if (!match(V: Shuf->getOperand(i_nocapture: `1`), P: m_Constant(C&: ShufConstVec)) \|\|
1522	!match(V: InsElt.getOperand(i_nocapture: `1`), P: m_Constant(C&: InsEltScalar)) \|\|
1523	!match(V: InsElt.getOperand(i_nocapture: `2`), P: m_ConstantInt(V&: InsEltIndex)))
1524	return nullptr;
1525
1526	// Adding an element to an arbitrary shuffle could be expensive, but a
1527	// shuffle that selects elements from vectors without crossing lanes is
1528	// assumed cheap.
1529	// If we're just adding a constant into that shuffle, it will still be
1530	// cheap.
1531	if (!isShuffleEquivalentToSelect(Shuf&: *Shuf))
1532	return nullptr;
1533
1534	// From the above 'select' check, we know that the mask has the same number
1535	// of elements as the vector input operands. We also know that each constant
1536	// input element is used in its lane and can not be used more than once by
1537	// the shuffle. Therefore, replace the constant in the shuffle's constant
1538	// vector with the insertelt constant. Replace the constant in the shuffle's
1539	// mask vector with the insertelt index plus the length of the vector
1540	// (because the constant vector operand of a shuffle is always the 2nd
1541	// operand).
1542	ArrayRef<int> Mask = Shuf->getShuffleMask();
1543	unsigned NumElts = Mask.size();
1544	SmallVector<Constant *, `16`> NewShufElts(NumElts);
1545	SmallVector<int, `16`> NewMaskElts(NumElts);
1546	for (unsigned I = `0`; I != NumElts; ++I) {
1547	if (I == InsEltIndex) {
1548	NewShufElts [I] = InsEltScalar;
1549	NewMaskElts [I] = InsEltIndex + NumElts;
1550	} else {
1551	// Copy over the existing values.
1552	NewShufElts [I] = ShufConstVec->getAggregateElement(Elt: I);
1553	NewMaskElts [I] = Mask [I];
1554	}
1555
1556	// Bail if we failed to find an element.
1557	if (!NewShufElts [I])
1558	return nullptr;
1559	}
1560
1561	// Create new operands for a shuffle that includes the constant of the
1562	// original insertelt. The old shuffle will be dead now.
1563	return new ShuffleVectorInst (Shuf->getOperand(i_nocapture: `0`),
1564	ConstantVector::get(V: NewShufElts), NewMaskElts);
1565	} else if (auto *IEI = dyn_cast<InsertElementInst>(Val: Inst)) {
1566	// Transform sequences of insertelements ops with constant data/indexes into
1567	// a single shuffle op.
1568	// Can not handle scalable type, the number of elements needed to create
1569	// shuffle mask is not a compile-time constant.
1570	if (isa<ScalableVectorType>(Val: InsElt.getType()))
1571	return nullptr;
1572	unsigned NumElts =
1573	cast<FixedVectorType>(Val: InsElt.getType())->getNumElements();
1574
1575	uint64_t InsertIdx[`2`];
1576	Constant *Val[`2`];
1577	if (!match(V: InsElt.getOperand(i_nocapture: `2`), P: m_ConstantInt(V&: InsertIdx[`0`])) \|\|
1578	!match(V: InsElt.getOperand(i_nocapture: `1`), P: m_Constant(C&: Val[`0`])) \|\|
1579	!match(V: IEI->getOperand(i_nocapture: `2`), P: m_ConstantInt(V&: InsertIdx[`1`])) \|\|
1580	!match(V: IEI->getOperand(i_nocapture: `1`), P: m_Constant(C&: Val[`1`])))
1581	return nullptr;
1582	SmallVector<Constant *, `16`> Values(NumElts);
1583	SmallVector<int, `16`> Mask(NumElts);
1584	auto ValI = std::begin(arr&: Val);
1585	// Generate new constant vector and mask.
1586	// We have 2 values/masks from the insertelements instructions. Insert them
1587	// into new value/mask vectors.
1588	for (uint64_t I : InsertIdx) {
1589	if (!Values [I]) {
1590	Values [I] = *ValI;
1591	Mask [I] = NumElts + I;
1592	}
1593	++ValI;
1594	}
1595	// Remaining values are filled with 'poison' values.
1596	for (unsigned I = `0`; I < NumElts; ++I) {
1597	if (!Values [I]) {
1598	Values [I] = PoisonValue::get(T: InsElt.getType()->getElementType());
1599	Mask [I] = I;
1600	}
1601	}
1602	// Create new operands for a shuffle that includes the constant of the
1603	// original insertelt.
1604	return new ShuffleVectorInst (IEI->getOperand(i_nocapture: `0`),
1605	ConstantVector::get(V: Values), Mask);
1606	}
1607	return nullptr;
1608	}
1609
1610	/// If both the base vector and the inserted element are extended from the same
1611	/// type, do the insert element in the narrow source type followed by extend.
1612	/// TODO: This can be extended to include other cast opcodes, but particularly
1613	/// if we create a wider insertelement, make sure codegen is not harmed.
1614	static Instruction *narrowInsElt(InsertElementInst &InsElt,
1615	InstCombiner::BuilderTy &Builder) {
1616	// We are creating a vector extend. If the original vector extend has another
1617	// use, that would mean we end up with 2 vector extends, so avoid that.
1618	// TODO: We could ease the use-clause to "if at least one op has one use"
1619	// (assuming that the source types match - see next TODO comment).
1620	Value *Vec = InsElt.getOperand(i_nocapture: `0`);
1621	if (!Vec->hasOneUse())
1622	return nullptr;
1623
1624	Value *Scalar = InsElt.getOperand(i_nocapture: `1`);
1625	Value X, Y;
1626	CastInst::CastOps CastOpcode;
1627	if (match(V: Vec, P: m_FPExt(Op: m_Value(V&: X))) && match(V: Scalar, P: m_FPExt(Op: m_Value(V&: Y))))
1628	CastOpcode = Instruction::FPExt;
1629	else if (match(V: Vec, P: m_SExt(Op: m_Value(V&: X))) && match(V: Scalar, P: m_SExt(Op: m_Value(V&: Y))))
1630	CastOpcode = Instruction::SExt;
1631	else if (match(V: Vec, P: m_ZExt(Op: m_Value(V&: X))) && match(V: Scalar, P: m_ZExt(Op: m_Value(V&: Y))))
1632	CastOpcode = Instruction::ZExt;
1633	else
1634	return nullptr;
1635
1636	// TODO: We can allow mismatched types by creating an intermediate cast.
1637	if (X->getType()->getScalarType() != Y->getType())
1638	return nullptr;
1639
1640	// inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1641	Value *NewInsElt = Builder.CreateInsertElement(Vec: X, NewElt: Y, Idx: InsElt.getOperand(i_nocapture: `2`));
1642	return CastInst::Create(CastOpcode, S: NewInsElt, Ty: InsElt.getType());
1643	}
1644
1645	/// If we are inserting 2 halves of a value into adjacent elements of a vector,
1646	/// try to convert to a single insert with appropriate bitcasts.
1647	static Instruction *foldTruncInsEltPair(InsertElementInst &InsElt,
1648	bool IsBigEndian,
1649	InstCombiner::BuilderTy &Builder) {
1650	Value *VecOp = InsElt.getOperand(i_nocapture: `0`);
1651	Value *ScalarOp = InsElt.getOperand(i_nocapture: `1`);
1652	Value *IndexOp = InsElt.getOperand(i_nocapture: `2`);
1653
1654	// Pattern depends on endian because we expect lower index is inserted first.
1655	// Big endian:
1656	// inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1
1657	// Little endian:
1658	// inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1
1659	// Note: It is not safe to do this transform with an arbitrary base vector
1660	// because the bitcast of that vector to fewer/larger elements could
1661	// allow poison to spill into an element that was not poison before.
1662	// TODO: Detect smaller fractions of the scalar.
1663	// TODO: One-use checks are conservative.
1664	auto *VTy = dyn_cast<FixedVectorType>(Val: InsElt.getType());
1665	Value Scalar0, BaseVec;
1666	uint64_t Index0, Index1;
1667	if (!VTy \|\| (VTy->getNumElements() & `1`) \|\|
1668	!match(V: IndexOp, P: m_ConstantInt(V&: Index1)) \|\|
1669	!match(V: VecOp, P: m_InsertElt(Val: m_Value(V&: BaseVec), Elt: m_Value(V&: Scalar0),
1670	Idx: m_ConstantInt(V&: Index0))) \|\|
1671	!match(V: BaseVec, P: m_Undef()))
1672	return nullptr;
1673
1674	// The first insert must be to the index one less than this one, and
1675	// the first insert must be to an even index.
1676	if (Index0 + `1` != Index1 \|\| Index0 & `1`)
1677	return nullptr;
1678
1679	// For big endian, the high half of the value should be inserted first.
1680	// For little endian, the low half of the value should be inserted first.
1681	Value *X;
1682	uint64_t ShAmt;
1683	if (IsBigEndian) {
1684	if (!match(V: ScalarOp, P: m_Trunc(Op: m_Value(V&: X))) \|\|
1685	!match(V: Scalar0, P: m_Trunc(Op: m_LShr(L: m_Specific(V: X), R: m_ConstantInt(V&: ShAmt)))))
1686	return nullptr;
1687	} else {
1688	if (!match(V: Scalar0, P: m_Trunc(Op: m_Value(V&: X))) \|\|
1689	!match(V: ScalarOp, P: m_Trunc(Op: m_LShr(L: m_Specific(V: X), R: m_ConstantInt(V&: ShAmt)))))
1690	return nullptr;
1691	}
1692
1693	Type *SrcTy = X->getType();
1694	unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
1695	unsigned VecEltWidth = VTy->getScalarSizeInBits();
1696	if (ScalarWidth != VecEltWidth * `2` \|\| ShAmt != VecEltWidth)
1697	return nullptr;
1698
1699	// Bitcast the base vector to a vector type with the source element type.
1700	Type *CastTy = FixedVectorType::get(ElementType: SrcTy, NumElts: VTy->getNumElements() / `2`);
1701	Value *CastBaseVec = Builder.CreateBitCast(V: BaseVec, DestTy: CastTy);
1702
1703	// Scale the insert index for a vector with half as many elements.
1704	// bitcast (inselt (bitcast BaseVec), X, NewIndex)
1705	uint64_t NewIndex = IsBigEndian ? Index1 / `2` : Index0 / `2`;
1706	Value *NewInsert = Builder.CreateInsertElement(Vec: CastBaseVec, NewElt: X, Idx: NewIndex);
1707	return new BitCastInst (NewInsert, VTy);
1708	}
1709
1710	Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
1711	Value *VecOp = IE.getOperand(i_nocapture: `0`);
1712	Value *ScalarOp = IE.getOperand(i_nocapture: `1`);
1713	Value *IdxOp = IE.getOperand(i_nocapture: `2`);
1714
1715	if (auto *V = simplifyInsertElementInst(
1716	Vec: VecOp, Elt: ScalarOp, Idx: IdxOp, Q: SQ.getWithInstruction(I: &IE)))
1717	return replaceInstUsesWith(I&: IE, V);
1718
1719	// Canonicalize type of constant indices to i64 to simplify CSE
1720	if (auto *IndexC = dyn_cast<ConstantInt>(Val: IdxOp)) {
1721	if (auto *NewIdx = getPreferredVectorIndex(IndexC))
1722	return replaceOperand(I&: IE, OpNum: `2`, V: NewIdx);
1723
1724	Value BaseVec, OtherScalar;
1725	uint64_t OtherIndexVal;
1726	if (match(V: VecOp, P: m_OneUse(SubPattern: m_InsertElt(Val: m_Value(V&: BaseVec),
1727	Elt: m_Value(V&: OtherScalar),
1728	Idx: m_ConstantInt(V&: OtherIndexVal)))) &&
1729	!isa<Constant>(Val: OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {
1730	Value *NewIns = Builder.CreateInsertElement(Vec: BaseVec, NewElt: ScalarOp, Idx: IdxOp);
1731	return InsertElementInst::Create(Vec: NewIns, NewElt: OtherScalar,
1732	Idx: Builder.getInt64(C: OtherIndexVal));
1733	}
1734	}
1735
1736	// If the scalar is bitcast and inserted into undef, do the insert in the
1737	// source type followed by bitcast.
1738	// TODO: Generalize for insert into any constant, not just undef?
1739	Value *ScalarSrc;
1740	if (match(V: VecOp, P: m_Undef()) &&
1741	match(V: ScalarOp, P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: ScalarSrc)))) &&
1742	(ScalarSrc->getType()->isIntegerTy() \|\|
1743	ScalarSrc->getType()->isFloatingPointTy())) {
1744	// inselt undef, (bitcast ScalarSrc), IdxOp -->
1745	// bitcast (inselt undef, ScalarSrc, IdxOp)
1746	Type *ScalarTy = ScalarSrc->getType();
1747	Type *VecTy = VectorType::get(ElementType: ScalarTy, EC: IE.getType()->getElementCount());
1748	Constant *NewUndef = isa<PoisonValue>(Val: VecOp) ? PoisonValue::get(T: VecTy)
1749	: UndefValue::get(T: VecTy);
1750	Value *NewInsElt = Builder.CreateInsertElement(Vec: NewUndef, NewElt: ScalarSrc, Idx: IdxOp);
1751	return new BitCastInst (NewInsElt, IE.getType());
1752	}
1753
1754	// If the vector and scalar are both bitcast from the same element type, do
1755	// the insert in that source type followed by bitcast.
1756	Value *VecSrc;
1757	if (match(V: VecOp, P: m_BitCast(Op: m_Value(V&: VecSrc))) &&
1758	match(V: ScalarOp, P: m_BitCast(Op: m_Value(V&: ScalarSrc))) &&
1759	(VecOp->hasOneUse() \|\| ScalarOp->hasOneUse()) &&
1760	VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1761	cast<VectorType>(Val: VecSrc->getType())->getElementType() ==
1762	ScalarSrc->getType()) {
1763	// inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1764	// bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1765	Value *NewInsElt = Builder.CreateInsertElement(Vec: VecSrc, NewElt: ScalarSrc, Idx: IdxOp);
1766	return new BitCastInst (NewInsElt, IE.getType());
1767	}
1768
1769	// If the inserted element was extracted from some other fixed-length vector
1770	// and both indexes are valid constants, try to turn this into a shuffle.
1771	// Can not handle scalable vector type, the number of elements needed to
1772	// create shuffle mask is not a compile-time constant.
1773	uint64_t InsertedIdx, ExtractedIdx;
1774	Value *ExtVecOp;
1775	if (isa<FixedVectorType>(Val: IE.getType()) &&
1776	match(V: IdxOp, P: m_ConstantInt(V&: InsertedIdx)) &&
1777	match(V: ScalarOp,
1778	P: m_ExtractElt(Val: m_Value(V&: ExtVecOp), Idx: m_ConstantInt(V&: ExtractedIdx))) &&
1779	isa<FixedVectorType>(Val: ExtVecOp->getType()) &&
1780	ExtractedIdx <
1781	cast<FixedVectorType>(Val: ExtVecOp->getType())->getNumElements()) {
1782	// TODO: Looking at the user(s) to determine if this insert is a
1783	// fold-to-shuffle opportunity does not match the usual instcombine
1784	// constraints. We should decide if the transform is worthy based only
1785	// on this instruction and its operands, but that may not work currently.
1786	//
1787	// Here, we are trying to avoid creating shuffles before reaching
1788	// the end of a chain of extract-insert pairs. This is complicated because
1789	// we do not generally form arbitrary shuffle masks in instcombine
1790	// (because those may codegen poorly), but collectShuffleElements() does
1791	// exactly that.
1792	//
1793	// The rules for determining what is an acceptable target-independent
1794	// shuffle mask are fuzzy because they evolve based on the backend's
1795	// capabilities and real-world impact.
1796	auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1797	if (!Insert.hasOneUse())
1798	return true;
1799	auto *InsertUser = dyn_cast<InsertElementInst>(Val: Insert.user_back());
1800	if (!InsertUser)
1801	return true;
1802	return false;
1803	};
1804
1805	// Try to form a shuffle from a chain of extract-insert ops.
1806	if (isShuffleRootCandidate (IE)) {
1807	bool Rerun = true;
1808	while (Rerun) {
1809	Rerun = false;
1810
1811	SmallVector<int, `16`> Mask;
1812	ShuffleOps LR =
1813	collectShuffleElements(V: &IE, Mask, PermittedRHS: nullptr, IC&: *this, Rerun);
1814
1815	// The proposed shuffle may be trivial, in which case we shouldn't
1816	// perform the combine.
1817	if (LR.first != &IE && LR.second != &IE) {
1818	// We now have a shuffle of LHS, RHS, Mask.
1819	if (LR.second == nullptr)
1820	LR.second = PoisonValue::get(T: LR.first->getType());
1821	return new ShuffleVectorInst (LR.first, LR.second, Mask);
1822	}
1823	}
1824	}
1825	}
1826
1827	if (auto VecTy = dyn_cast<FixedVectorType>(Val: VecOp->getType())) {
1828	unsigned VWidth = VecTy->getNumElements();
1829	APInt PoisonElts(VWidth, `0`);
1830	APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
1831	if (Value *V = SimplifyDemandedVectorElts(V: &IE, DemandedElts: AllOnesEltMask,
1832	PoisonElts)) {
1833	if (V != &IE)
1834	return replaceInstUsesWith(I&: IE, V);
1835	return &IE;
1836	}
1837	}
1838
1839	if (Instruction *Shuf = foldConstantInsEltIntoShuffle(InsElt&: IE))
1840	return Shuf;
1841
1842	if (Instruction *NewInsElt = hoistInsEltConst(InsElt2&: IE, Builder))
1843	return NewInsElt;
1844
1845	if (Instruction *Broadcast = foldInsSequenceIntoSplat(InsElt&: IE))
1846	return Broadcast;
1847
1848	if (Instruction *Splat = foldInsEltIntoSplat(InsElt&: IE))
1849	return Splat;
1850
1851	if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(InsElt&: IE))
1852	return IdentityShuf;
1853
1854	if (Instruction *Ext = narrowInsElt(InsElt&: IE, Builder))
1855	return Ext;
1856
1857	if (Instruction *Ext = foldTruncInsEltPair(InsElt&: IE, IsBigEndian: DL.isBigEndian(), Builder))
1858	return Ext;
1859
1860	return nullptr;
1861	}
1862
1863	/// Return true if we can evaluate the specified expression tree if the vector
1864	/// elements were shuffled in a different order.
1865	static bool canEvaluateShuffled(Value V, ArrayRef<int*> Mask,
1866	unsigned Depth = `5`) {
1867	// We can always reorder the elements of a constant.
1868	if (isa<Constant>(Val: V))
1869	return true;
1870
1871	// We won't reorder vector arguments. No IPO here.
1872	Instruction *I = dyn_cast<Instruction>(Val: V);
1873	if (!I) return false;
1874
1875	// Two users may expect different orders of the elements. Don't try it.
1876	if (!I->hasOneUse())
1877	return false;
1878
1879	if (Depth == `0`) return false;
1880
1881	switch (I->getOpcode()) {
1882	case Instruction::UDiv:
1883	case Instruction::SDiv:
1884	case Instruction::URem:
1885	case Instruction::SRem:
1886	// Propagating an undefined shuffle mask element to integer div/rem is not
1887	// allowed because those opcodes can create immediate undefined behavior
1888	// from an undefined element in an operand.
1889	if (llvm::is_contained(Range&: Mask, Element: -`1`))
1890	return false;
1891	[[fallthrough]];
1892	case Instruction::Add:
1893	case Instruction::FAdd:
1894	case Instruction::Sub:
1895	case Instruction::FSub:
1896	case Instruction::Mul:
1897	case Instruction::FMul:
1898	case Instruction::FDiv:
1899	case Instruction::FRem:
1900	case Instruction::Shl:
1901	case Instruction::LShr:
1902	case Instruction::AShr:
1903	case Instruction::And:
1904	case Instruction::Or:
1905	case Instruction::Xor:
1906	case Instruction::ICmp:
1907	case Instruction::FCmp:
1908	case Instruction::Trunc:
1909	case Instruction::ZExt:
1910	case Instruction::SExt:
1911	case Instruction::FPToUI:
1912	case Instruction::FPToSI:
1913	case Instruction::UIToFP:
1914	case Instruction::SIToFP:
1915	case Instruction::FPTrunc:
1916	case Instruction::FPExt:
1917	case Instruction::GetElementPtr: {
1918	// Bail out if we would create longer vector ops. We could allow creating
1919	// longer vector ops, but that may result in more expensive codegen.
1920	Type *ITy = I->getType();
1921	if (ITy->isVectorTy() &&
1922	Mask.size() > cast<FixedVectorType>(Val: ITy)->getNumElements())
1923	return false;
1924	for (Value *Operand : I->operands()) {
1925	if (!canEvaluateShuffled(V: Operand, Mask, Depth: Depth - `1`))
1926	return false;
1927	}
1928	return true;
1929	}
1930	case Instruction::InsertElement: {
1931	ConstantInt *CI = dyn_cast<ConstantInt>(Val: I->getOperand(i: `2`));
1932	if (!CI) return false;
1933	int ElementNumber = CI->getLimitedValue();
1934
1935	// Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1936	// can't put an element into multiple indices.
1937	bool SeenOnce = false;
1938	for (int I : Mask) {
1939	if (I == ElementNumber) {
1940	if (SeenOnce)
1941	return false;
1942	SeenOnce = true;
1943	}
1944	}
1945	return canEvaluateShuffled(V: I->getOperand(i: `0`), Mask, Depth: Depth - `1`);
1946	}
1947	}
1948	return false;
1949	}
1950
1951	/// Rebuild a new instruction just like 'I' but with the new operands given.
1952	/// In the event of type mismatch, the type of the operands is correct.
1953	static Value buildNew(Instruction I, ArrayRef<Value*> NewOps,
1954	IRBuilderBase &Builder) {
1955	Builder.SetInsertPoint(I);
1956	switch (I->getOpcode()) {
1957	case Instruction::Add:
1958	case Instruction::FAdd:
1959	case Instruction::Sub:
1960	case Instruction::FSub:
1961	case Instruction::Mul:
1962	case Instruction::FMul:
1963	case Instruction::UDiv:
1964	case Instruction::SDiv:
1965	case Instruction::FDiv:
1966	case Instruction::URem:
1967	case Instruction::SRem:
1968	case Instruction::FRem:
1969	case Instruction::Shl:
1970	case Instruction::LShr:
1971	case Instruction::AShr:
1972	case Instruction::And:
1973	case Instruction::Or:
1974	case Instruction::Xor: {
1975	BinaryOperator *BO = cast<BinaryOperator>(Val: I);
1976	assert(NewOps.size() == `2` && "binary operator with #ops != 2");
1977	Value *New = Builder.CreateBinOp(Opc: cast<BinaryOperator>(Val: I)->getOpcode(),
1978	LHS: NewOps [`0`], RHS: NewOps [`1`]);
1979	if (auto *NewI = dyn_cast<Instruction>(Val: New)) {
1980	if (isa<OverflowingBinaryOperator>(Val: BO)) {
1981	NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1982	NewI->setHasNoSignedWrap(BO->hasNoSignedWrap());
1983	}
1984	if (isa<PossiblyExactOperator>(Val: BO)) {
1985	NewI->setIsExact(BO->isExact());
1986	}
1987	if (isa<FPMathOperator>(Val: BO))
1988	NewI->copyFastMathFlags(I);
1989	}
1990	return New;
1991	}
1992	case Instruction::ICmp:
1993	assert(NewOps.size() == `2` && "icmp with #ops != 2");
1994	return Builder.CreateICmp(P: cast<ICmpInst>(Val: I)->getPredicate(), LHS: NewOps [`0`],
1995	RHS: NewOps [`1`]);
1996	case Instruction::FCmp:
1997	assert(NewOps.size() == `2` && "fcmp with #ops != 2");
1998	return Builder.CreateFCmp(P: cast<FCmpInst>(Val: I)->getPredicate(), LHS: NewOps [`0`],
1999	RHS: NewOps [`1`]);
2000	case Instruction::Trunc:
2001	case Instruction::ZExt:
2002	case Instruction::SExt:
2003	case Instruction::FPToUI:
2004	case Instruction::FPToSI:
2005	case Instruction::UIToFP:
2006	case Instruction::SIToFP:
2007	case Instruction::FPTrunc:
2008	case Instruction::FPExt: {
2009	// It's possible that the mask has a different number of elements from
2010	// the original cast. We recompute the destination type to match the mask.
2011	Type *DestTy = VectorType::get(
2012	ElementType: I->getType()->getScalarType(),
2013	EC: cast<VectorType>(Val: NewOps [`0`]->getType())->getElementCount());
2014	assert(NewOps.size() == `1` && "cast with #ops != 1");
2015	return Builder.CreateCast(Op: cast<CastInst>(Val: I)->getOpcode(), V: NewOps [`0`],
2016	DestTy);
2017	}
2018	case Instruction::GetElementPtr: {
2019	Value *Ptr = NewOps [`0`];
2020	ArrayRef<Value*> Idx = NewOps.slice(N: `1`);
2021	return Builder.CreateGEP(Ty: cast<GEPOperator>(Val: I)->getSourceElementType(),
2022	Ptr, IdxList: Idx, Name: "",
2023	NW: cast<GEPOperator>(Val: I)->getNoWrapFlags());
2024	}
2025	}
2026	llvm_unreachable("failed to rebuild vector instructions");
2027	}
2028
2029	static Value evaluateInDifferentElementOrder(Value V, ArrayRef<int> Mask,
2030	IRBuilderBase &Builder) {
2031	// Mask.size() does not need to be equal to the number of vector elements.
2032
2033	assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
2034	Type *EltTy = V->getType()->getScalarType();
2035
2036	if (isa<PoisonValue>(Val: V))
2037	return PoisonValue::get(T: FixedVectorType::get(ElementType: EltTy, NumElts: Mask.size()));
2038
2039	if (match(V, P: m_Undef()))
2040	return UndefValue::get(T: FixedVectorType::get(ElementType: EltTy, NumElts: Mask.size()));
2041
2042	if (isa<ConstantAggregateZero>(Val: V))
2043	return ConstantAggregateZero::get(Ty: FixedVectorType::get(ElementType: EltTy, NumElts: Mask.size()));
2044
2045	if (Constant *C = dyn_cast<Constant>(Val: V))
2046	return ConstantExpr::getShuffleVector(V1: C, V2: PoisonValue::get(T: C->getType()),
2047	Mask);
2048
2049	Instruction *I = cast<Instruction>(Val: V);
2050	switch (I->getOpcode()) {
2051	case Instruction::Add:
2052	case Instruction::FAdd:
2053	case Instruction::Sub:
2054	case Instruction::FSub:
2055	case Instruction::Mul:
2056	case Instruction::FMul:
2057	case Instruction::UDiv:
2058	case Instruction::SDiv:
2059	case Instruction::FDiv:
2060	case Instruction::URem:
2061	case Instruction::SRem:
2062	case Instruction::FRem:
2063	case Instruction::Shl:
2064	case Instruction::LShr:
2065	case Instruction::AShr:
2066	case Instruction::And:
2067	case Instruction::Or:
2068	case Instruction::Xor:
2069	case Instruction::ICmp:
2070	case Instruction::FCmp:
2071	case Instruction::Trunc:
2072	case Instruction::ZExt:
2073	case Instruction::SExt:
2074	case Instruction::FPToUI:
2075	case Instruction::FPToSI:
2076	case Instruction::UIToFP:
2077	case Instruction::SIToFP:
2078	case Instruction::FPTrunc:
2079	case Instruction::FPExt:
2080	case Instruction::Select:
2081	case Instruction::GetElementPtr: {
2082	SmallVector<Value*, `8`> NewOps;
2083	bool NeedsRebuild =
2084	(Mask.size() !=
2085	cast<FixedVectorType>(Val: I->getType())->getNumElements());
2086	for (int i = `0`, e = I->getNumOperands(); i != e; ++i) {
2087	Value *V;
2088	// Recursively call evaluateInDifferentElementOrder on vector arguments
2089	// as well. E.g. GetElementPtr may have scalar operands even if the
2090	// return value is a vector, so we need to examine the operand type.
2091	if (I->getOperand(i)->getType()->isVectorTy())
2092	V = evaluateInDifferentElementOrder(V: I->getOperand(i), Mask, Builder);
2093	else
2094	V = I->getOperand(i);
2095	NewOps.push_back(Elt: V);
2096	NeedsRebuild \|= (V != I->getOperand(i));
2097	}
2098	if (NeedsRebuild)
2099	return buildNew(I, NewOps, Builder);
2100	return I;
2101	}
2102	case Instruction::InsertElement: {
2103	int Element = cast<ConstantInt>(Val: I->getOperand(i: `2`))->getLimitedValue();
2104
2105	// The insertelement was inserting at Element. Figure out which element
2106	// that becomes after shuffling. The answer is guaranteed to be unique
2107	// by CanEvaluateShuffled.
2108	bool Found = false;
2109	int Index = `0`;
2110	for (int e = Mask.size(); Index != e; ++Index) {
2111	if (Mask [Index] == Element) {
2112	Found = true;
2113	break;
2114	}
2115	}
2116
2117	// If element is not in Mask, no need to handle the operand 1 (element to
2118	// be inserted). Just evaluate values in operand 0 according to Mask.
2119	if (!Found)
2120	return evaluateInDifferentElementOrder(V: I->getOperand(i: `0`), Mask, Builder);
2121
2122	Value *V = evaluateInDifferentElementOrder(V: I->getOperand(i: `0`), Mask,
2123	Builder);
2124	Builder.SetInsertPoint(I);
2125	return Builder.CreateInsertElement(Vec: V, NewElt: I->getOperand(i: `1`), Idx: Index);
2126	}
2127	}
2128	llvm_unreachable("failed to reorder elements of vector instruction!");
2129	}
2130
2131	// Returns true if the shuffle is extracting a contiguous range of values from
2132	// LHS, for example:
2133	// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2134	// Input: \|AA\|BB\|CC\|DD\|EE\|FF\|GG\|HH\|II\|JJ\|KK\|LL\|MM\|NN\|OO\|PP\|
2135	// Shuffles to: \|EE\|FF\|GG\|HH\|
2136	// +--+--+--+--+
2137	static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
2138	ArrayRef<int> Mask) {
2139	unsigned LHSElems =
2140	cast<FixedVectorType>(Val: SVI.getOperand(i_nocapture: `0`)->getType())->getNumElements();
2141	unsigned MaskElems = Mask.size();
2142	unsigned BegIdx = Mask.front();
2143	unsigned EndIdx = Mask.back();
2144	if (BegIdx > EndIdx \|\| EndIdx >= LHSElems \|\| EndIdx - BegIdx != MaskElems - `1`)
2145	return false;
2146	for (unsigned I = `0`; I != MaskElems; ++I)
2147	if (static_cast<unsigned>(Mask [I]) != BegIdx + I)
2148	return false;
2149	return true;
2150	}
2151
2152	/// These are the ingredients in an alternate form binary operator as described
2153	/// below.
2154	struct BinopElts {
2155	BinaryOperator::BinaryOps Opcode;
2156	Value *Op0;
2157	Value *Op1;
2158	BinopElts(BinaryOperator::BinaryOps Opc = (BinaryOperator::BinaryOps)`0`,
2159	Value V0 = nullptr, Value V1 = nullptr) :
2160	Opcode(Opc), Op0(V0), Op1(V1) {}
2161	operator bool() const { return Opcode != `0`; }
2162	};
2163
2164	/// Binops may be transformed into binops with different opcodes and operands.
2165	/// Reverse the usual canonicalization to enable folds with the non-canonical
2166	/// form of the binop. If a transform is possible, return the elements of the
2167	/// new binop. If not, return invalid elements.
2168	static BinopElts getAlternateBinop(BinaryOperator BO, const* DataLayout &DL) {
2169	Value BO0 = BO->getOperand(i_nocapture: `0`), BO1 = BO->getOperand(i_nocapture: `1`);
2170	Type *Ty = BO->getType();
2171	switch (BO->getOpcode()) {
2172	case Instruction::Shl: {
2173	// shl X, C --> mul X, (1 << C)
2174	Constant *C;
2175	if (match(V: BO1, P: m_ImmConstant(C))) {
2176	Constant *ShlOne = ConstantFoldBinaryOpOperands(
2177	Opcode: Instruction::Shl, LHS: ConstantInt::get(Ty, V: `1`), RHS: C, DL);
2178	assert(ShlOne && "Constant folding of immediate constants failed");
2179	return {Instruction::Mul, BO0, ShlOne};
2180	}
2181	break;
2182	}
2183	case Instruction::Or: {
2184	// or disjoin X, C --> add X, C
2185	if (cast<PossiblyDisjointInst>(Val: BO)->isDisjoint())
2186	return {Instruction::Add, BO0, BO1};
2187	break;
2188	}
2189	case Instruction::Sub:
2190	// sub 0, X --> mul X, -1
2191	if (match(V: BO0, P: m_ZeroInt()))
2192	return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)};
2193	break;
2194	default:
2195	break;
2196	}
2197	return {};
2198	}
2199
2200	/// A select shuffle of a select shuffle with a shared operand can be reduced
2201	/// to a single select shuffle. This is an obvious improvement in IR, and the
2202	/// backend is expected to lower select shuffles efficiently.
2203	static Instruction *foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf) {
2204	assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2205
2206	Value Op0 = Shuf.getOperand(i_nocapture: `0`), Op1 = Shuf.getOperand(i_nocapture: `1`);
2207	SmallVector<int, `16`> Mask;
2208	Shuf.getShuffleMask(Result&: Mask);
2209	unsigned NumElts = Mask.size();
2210
2211	// Canonicalize a select shuffle with common operand as Op1.
2212	auto *ShufOp = dyn_cast<ShuffleVectorInst>(Val: Op0);
2213	if (ShufOp && ShufOp->isSelect() &&
2214	(ShufOp->getOperand(i_nocapture: `0`) == Op1 \|\| ShufOp->getOperand(i_nocapture: `1`) == Op1)) {
2215	std::swap(a&: Op0, b&: Op1);
2216	ShuffleVectorInst::commuteShuffleMask(Mask, InVecNumElts: NumElts);
2217	}
2218
2219	ShufOp = dyn_cast<ShuffleVectorInst>(Val: Op1);
2220	if (!ShufOp \|\| !ShufOp->isSelect() \|\|
2221	(ShufOp->getOperand(i_nocapture: `0`) != Op0 && ShufOp->getOperand(i_nocapture: `1`) != Op0))
2222	return nullptr;
2223
2224	Value X = ShufOp->getOperand(i_nocapture: `0`), Y = ShufOp->getOperand(i_nocapture: `1`);
2225	SmallVector<int, `16`> Mask1;
2226	ShufOp->getShuffleMask(Result&: Mask1);
2227	assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
2228
2229	// Canonicalize common operand (Op0) as X (first operand of first shuffle).
2230	if (Y == Op0) {
2231	std::swap(a&: X, b&: Y);
2232	ShuffleVectorInst::commuteShuffleMask(Mask: Mask1, InVecNumElts: NumElts);
2233	}
2234
2235	// If the mask chooses from X (operand 0), it stays the same.
2236	// If the mask chooses from the earlier shuffle, the other mask value is
2237	// transferred to the combined select shuffle:
2238	// shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2239	SmallVector<int, `16`> NewMask(NumElts);
2240	for (unsigned i = `0`; i != NumElts; ++i)
2241	NewMask [i] = Mask [i] < (signed)NumElts ? Mask [i] : Mask1 [i];
2242
2243	// A select mask with undef elements might look like an identity mask.
2244	assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) \|\|
2245	ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) &&
2246	"Unexpected shuffle mask");
2247	return new ShuffleVectorInst (X, Y, NewMask);
2248	}
2249
2250	static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf,
2251	const SimplifyQuery &SQ) {
2252	assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2253
2254	// Are we shuffling together some value and that same value after it has been
2255	// modified by a binop with a constant?
2256	Value Op0 = Shuf.getOperand(i_nocapture: `0`), Op1 = Shuf.getOperand(i_nocapture: `1`);
2257	Constant *C;
2258	bool Op0IsBinop;
2259	if (match(V: Op0, P: m_BinOp(L: m_Specific(V: Op1), R: m_Constant(C))))
2260	Op0IsBinop = true;
2261	else if (match(V: Op1, P: m_BinOp(L: m_Specific(V: Op0), R: m_Constant(C))))
2262	Op0IsBinop = false;
2263	else
2264	return nullptr;
2265
2266	// The identity constant for a binop leaves a variable operand unchanged. For
2267	// a vector, this is a splat of something like 0, -1, or 1.
2268	// If there's no identity constant for this binop, we're done.
2269	auto *BO = cast<BinaryOperator>(Val: Op0IsBinop ? Op0 : Op1);
2270	BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
2271	Constant IdC = ConstantExpr::getBinOpIdentity(Opcode: BOpcode, Ty: Shuf.getType(), AllowRHSConstant: true*);
2272	if (!IdC)
2273	return nullptr;
2274
2275	Value *X = Op0IsBinop ? Op1 : Op0;
2276
2277	// Prevent folding in the case the non-binop operand might have NaN values.
2278	// If X can have NaN elements then we have that the floating point math
2279	// operation in the transformed code may not preserve the exact NaN
2280	// bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`.
2281	// This makes the transformation incorrect since the original program would
2282	// have preserved the exact NaN bit-pattern.
2283	// Avoid the folding if X can have NaN elements.
2284	if (Shuf.getType()->getElementType()->isFloatingPointTy() &&
2285	!isKnownNeverNaN(V: X, SQ))
2286	return nullptr;
2287
2288	// Shuffle identity constants into the lanes that return the original value.
2289	// Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
2290	// Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
2291	// The existing binop constant vector remains in the same operand position.
2292	ArrayRef<int> Mask = Shuf.getShuffleMask();
2293	Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(V1: C, V2: IdC, Mask) :
2294	ConstantExpr::getShuffleVector(V1: IdC, V2: C, Mask);
2295
2296	bool MightCreatePoisonOrUB =
2297	is_contained(Range&: Mask, Element: PoisonMaskElem) &&
2298	(Instruction::isIntDivRem(Opcode: BOpcode) \|\| Instruction::isShift(Opcode: BOpcode));
2299	if (MightCreatePoisonOrUB)
2300	NewC = InstCombiner::getSafeVectorConstantForBinop(Opcode: BOpcode, In: NewC, IsRHSConstant: true);
2301
2302	// shuf (bop X, C), X, M --> bop X, C'
2303	// shuf X, (bop X, C), M --> bop X, C'
2304	Instruction *NewBO = BinaryOperator::Create(Op: BOpcode, S1: X, S2: NewC);
2305	NewBO->copyIRFlags(V: BO);
2306
2307	// An undef shuffle mask element may propagate as an undef constant element in
2308	// the new binop. That would produce poison where the original code might not.
2309	// If we already made a safe constant, then there's no danger.
2310	if (is_contained(Range&: Mask, Element: PoisonMaskElem) && !MightCreatePoisonOrUB)
2311	NewBO->dropPoisonGeneratingFlags();
2312	return NewBO;
2313	}
2314
2315	/// If we have an insert of a scalar to a non-zero element of an undefined
2316	/// vector and then shuffle that value, that's the same as inserting to the zero
2317	/// element and shuffling. Splatting from the zero element is recognized as the
2318	/// canonical form of splat.
2319	static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
2320	InstCombiner::BuilderTy &Builder) {
2321	Value Op0 = Shuf.getOperand(i_nocapture: `0`), Op1 = Shuf.getOperand(i_nocapture: `1`);
2322	ArrayRef<int> Mask = Shuf.getShuffleMask();
2323	Value *X;
2324	uint64_t IndexC;
2325
2326	// Match a shuffle that is a splat to a non-zero element.
2327	if (!match(V: Op0, P: m_OneUse(SubPattern: m_InsertElt(Val: m_Poison(), Elt: m_Value(V&: X),
2328	Idx: m_ConstantInt(V&: IndexC)))) \|\|
2329	!match(V: Op1, P: m_Poison()) \|\| match(Mask, P: m_ZeroMask ()) \|\| IndexC == `0`)
2330	return nullptr;
2331
2332	// Insert into element 0 of a poison vector.
2333	PoisonValue *PoisonVec = PoisonValue::get(T: Shuf.getType());
2334	Value *NewIns = Builder.CreateInsertElement(Vec: PoisonVec, NewElt: X, Idx: (uint64_t)`0`);
2335
2336	// Splat from element 0. Any mask element that is poison remains poison.
2337	// For example:
2338	// shuf (inselt poison, X, 2), _, <2,2,undef>
2339	// --> shuf (inselt poison, X, 0), poison, <0,0,undef>
2340	unsigned NumMaskElts =
2341	cast<FixedVectorType>(Val: Shuf.getType())->getNumElements();
2342	SmallVector<int, `16`> NewMask(NumMaskElts, `0`);
2343	for (unsigned i = `0`; i != NumMaskElts; ++i)
2344	if (Mask [i] == PoisonMaskElem)
2345	NewMask [i] = Mask [i];
2346
2347	return new ShuffleVectorInst (NewIns, NewMask);
2348	}
2349
2350	/// Try to fold shuffles that are the equivalent of a vector select.
2351	Instruction *InstCombinerImpl::foldSelectShuffle(ShuffleVectorInst &Shuf) {
2352	if (!Shuf.isSelect())
2353	return nullptr;
2354
2355	// Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2356	// Commuting undef to operand 0 conflicts with another canonicalization.
2357	unsigned NumElts = cast<FixedVectorType>(Val: Shuf.getType())->getNumElements();
2358	if (!match(V: Shuf.getOperand(i_nocapture: `1`), P: m_Undef()) &&
2359	Shuf.getMaskValue(Elt: `0`) >= (int)NumElts) {
2360	// TODO: Can we assert that both operands of a shuffle-select are not undef
2361	// (otherwise, it would have been folded by instsimplify?
2362	Shuf.commute();
2363	return &Shuf;
2364	}
2365
2366	if (Instruction *I = foldSelectShuffleOfSelectShuffle(Shuf))
2367	return I;
2368
2369	if (Instruction *I = foldSelectShuffleWith1Binop(
2370	Shuf, SQ: getSimplifyQuery().getWithInstruction(I: &Shuf)))
2371	return I;
2372
2373	BinaryOperator B0, B1;
2374	if (!match(V: Shuf.getOperand(i_nocapture: `0`), P: m_BinOp(I&: B0)) \|\|
2375	!match(V: Shuf.getOperand(i_nocapture: `1`), P: m_BinOp(I&: B1)))
2376	return nullptr;
2377
2378	// If one operand is "0 - X", allow that to be viewed as "X -1"*
2379	// (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
2380	// with a multiply, we will exit because C0/C1 will not be set.
2381	Value X, Y;
2382	Constant C0 = nullptr, C1 = nullptr;
2383	bool ConstantsAreOp1;
2384	if (match(V: B0, P: m_BinOp(L: m_Constant(C&: C0), R: m_Value(V&: X))) &&
2385	match(V: B1, P: m_BinOp(L: m_Constant(C&: C1), R: m_Value(V&: Y))))
2386	ConstantsAreOp1 = false;
2387	else if (match(V: B0, P: m_CombineOr(L: m_BinOp(L: m_Value(V&: X), R: m_Constant(C&: C0)),
2388	R: m_Neg(V: m_Value(V&: X)))) &&
2389	match(V: B1, P: m_CombineOr(L: m_BinOp(L: m_Value(V&: Y), R: m_Constant(C&: C1)),
2390	R: m_Neg(V: m_Value(V&: Y)))))
2391	ConstantsAreOp1 = true;
2392	else
2393	return nullptr;
2394
2395	// We need matching binops to fold the lanes together.
2396	BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
2397	BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
2398	bool DropNSW = false;
2399	if (ConstantsAreOp1 && Opc0 != Opc1) {
2400	// TODO: We drop "nsw" if shift is converted into multiply because it may
2401	// not be correct when the shift amount is BitWidth - 1. We could examine
2402	// each vector element to determine if it is safe to keep that flag.
2403	if (Opc0 == Instruction::Shl \|\| Opc1 == Instruction::Shl)
2404	DropNSW = true;
2405	if (BinopElts AltB0 = getAlternateBinop(BO: B0, DL)) {
2406	assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
2407	Opc0 = AltB0.Opcode;
2408	C0 = cast<Constant>(Val: AltB0.Op1);
2409	} else if (BinopElts AltB1 = getAlternateBinop(BO: B1, DL)) {
2410	assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2411	Opc1 = AltB1.Opcode;
2412	C1 = cast<Constant>(Val: AltB1.Op1);
2413	}
2414	}
2415
2416	if (Opc0 != Opc1 \|\| !C0 \|\| !C1)
2417	return nullptr;
2418
2419	// The opcodes must be the same. Use a new name to make that clear.
2420	BinaryOperator::BinaryOps BOpc = Opc0;
2421
2422	// Select the constant elements needed for the single binop.
2423	ArrayRef<int> Mask = Shuf.getShuffleMask();
2424	Constant *NewC = ConstantExpr::getShuffleVector(V1: C0, V2: C1, Mask);
2425
2426	// We are moving a binop after a shuffle. When a shuffle has an undefined
2427	// mask element, the result is undefined, but it is not poison or undefined
2428	// behavior. That is not necessarily true for div/rem/shift.
2429	bool MightCreatePoisonOrUB =
2430	is_contained(Range&: Mask, Element: PoisonMaskElem) &&
2431	(Instruction::isIntDivRem(Opcode: BOpc) \|\| Instruction::isShift(Opcode: BOpc));
2432	if (MightCreatePoisonOrUB)
2433	NewC = InstCombiner::getSafeVectorConstantForBinop(Opcode: BOpc, In: NewC,
2434	IsRHSConstant: ConstantsAreOp1);
2435
2436	Value *V;
2437	if (X == Y) {
2438	// Remove a binop and the shuffle by rearranging the constant:
2439	// shuffle (op V, C0), (op V, C1), M --> op V, C'
2440	// shuffle (op C0, V), (op C1, V), M --> op C', V
2441	V = X;
2442	} else {
2443	// If there are 2 different variable operands, we must create a new shuffle
2444	// (select) first, so check uses to ensure that we don't end up with more
2445	// instructions than we started with.
2446	if (!B0->hasOneUse() && !B1->hasOneUse())
2447	return nullptr;
2448
2449	// If we use the original shuffle mask and op1 is variable, we would be
2450	// putting an undef into operand 1 of div/rem/shift. This is either UB or
2451	// poison. We do not have to guard against UB when constants* are op1*
2452	// because safe constants guarantee that we do not overflow sdiv/srem (and
2453	// there's no danger for other opcodes).
2454	// TODO: To allow this case, create a new shuffle mask with no undefs.
2455	if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2456	return nullptr;
2457
2458	// Note: In general, we do not create new shuffles in InstCombine because we
2459	// do not know if a target can lower an arbitrary shuffle optimally. In this
2460	// case, the shuffle uses the existing mask, so there is no additional risk.
2461
2462	// Select the variable vectors first, then perform the binop:
2463	// shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2464	// shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2465	V = Builder.CreateShuffleVector(V1: X, V2: Y, Mask);
2466	}
2467
2468	Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(Opc: BOpc, LHS: V, RHS: NewC) :
2469	Builder.CreateBinOp(Opc: BOpc, LHS: NewC, RHS: V);
2470
2471	// Flags are intersected from the 2 source binops. But there are 2 exceptions:
2472	// 1. If we changed an opcode, poison conditions might have changed.
2473	// 2. If the shuffle had undef mask elements, the new binop might have undefs
2474	// where the original code did not. But if we already made a safe constant,
2475	// then there's no danger.
2476	if (auto *NewI = dyn_cast<Instruction>(Val: NewBO)) {
2477	NewI->copyIRFlags(V: B0);
2478	NewI->andIRFlags(V: B1);
2479	if (DropNSW)
2480	NewI->setHasNoSignedWrap(false);
2481	if (is_contained(Range&: Mask, Element: PoisonMaskElem) && !MightCreatePoisonOrUB)
2482	NewI->dropPoisonGeneratingFlags();
2483	}
2484	return replaceInstUsesWith(I&: Shuf, V: NewBO);
2485	}
2486
2487	/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2488	/// Example (little endian):
2489	/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2490	static Instruction *foldTruncShuffle(ShuffleVectorInst &Shuf,
2491	bool IsBigEndian) {
2492	// This must be a bitcasted shuffle of 1 vector integer operand.
2493	Type *DestType = Shuf.getType();
2494	Value *X;
2495	if (!match(V: Shuf.getOperand(i_nocapture: `0`), P: m_BitCast(Op: m_Value(V&: X))) \|\|
2496	!match(V: Shuf.getOperand(i_nocapture: `1`), P: m_Poison()) \|\| !DestType->isIntOrIntVectorTy())
2497	return nullptr;
2498
2499	// The source type must have the same number of elements as the shuffle,
2500	// and the source element type must be larger than the shuffle element type.
2501	Type *SrcType = X->getType();
2502	if (!SrcType->isVectorTy() \|\| !SrcType->isIntOrIntVectorTy() \|\|
2503	cast<FixedVectorType>(Val: SrcType)->getNumElements() !=
2504	cast<FixedVectorType>(Val: DestType)->getNumElements() \|\|
2505	SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != `0`)
2506	return nullptr;
2507
2508	assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2509	"Expected a shuffle that decreases length");
2510
2511	// Last, check that the mask chooses the correct low bits for each narrow
2512	// element in the result.
2513	uint64_t TruncRatio =
2514	SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2515	ArrayRef<int> Mask = Shuf.getShuffleMask();
2516	for (unsigned i = `0`, e = Mask.size(); i != e; ++i) {
2517	if (Mask [i] == PoisonMaskElem)
2518	continue;
2519	uint64_t LSBIndex = IsBigEndian ? (i + `1`) * TruncRatio - `1` : i * TruncRatio;
2520	assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2521	if (Mask [i] != (int)LSBIndex)
2522	return nullptr;
2523	}
2524
2525	return new TruncInst (X, DestType);
2526	}
2527
2528	/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2529	/// narrowing (concatenating with poison and extracting back to the original
2530	/// length). This allows replacing the wide select with a narrow select.
2531	static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
2532	InstCombiner::BuilderTy &Builder) {
2533	// This must be a narrowing identity shuffle. It extracts the 1st N elements
2534	// of the 1st vector operand of a shuffle.
2535	if (!match(V: Shuf.getOperand(i_nocapture: `1`), P: m_Poison()) \|\| !Shuf.isIdentityWithExtract())
2536	return nullptr;
2537
2538	// The vector being shuffled must be a vector select that we can eliminate.
2539	// TODO: The one-use requirement could be eased if X and/or Y are constants.
2540	Value Cond, X, *Y;
2541	if (!match(V: Shuf.getOperand(i_nocapture: `0`),
2542	P: m_OneUse(SubPattern: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: X), R: m_Value(V&: Y)))))
2543	return nullptr;
2544
2545	// We need a narrow condition value. It must be extended with poison elements
2546	// and have the same number of elements as this shuffle.
2547	unsigned NarrowNumElts =
2548	cast<FixedVectorType>(Val: Shuf.getType())->getNumElements();
2549	Value *NarrowCond;
2550	if (!match(V: Cond, P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: NarrowCond), v2: m_Poison()))) \|\|
2551	cast<FixedVectorType>(Val: NarrowCond->getType())->getNumElements() !=
2552	NarrowNumElts \|\|
2553	!cast<ShuffleVectorInst>(Val: Cond)->isIdentityWithPadding())
2554	return nullptr;
2555
2556	// shuf (sel (shuf NarrowCond, poison, WideMask), X, Y), poison, NarrowMask)
2557	// -->
2558	// sel NarrowCond, (shuf X, poison, NarrowMask), (shuf Y, poison, NarrowMask)
2559	Value *NarrowX = Builder.CreateShuffleVector(V: X, Mask: Shuf.getShuffleMask());
2560	Value *NarrowY = Builder.CreateShuffleVector(V: Y, Mask: Shuf.getShuffleMask());
2561	return SelectInst::Create(C: NarrowCond, S1: NarrowX, S2: NarrowY);
2562	}
2563
2564	/// Canonicalize FP negate/abs after shuffle.
2565	static Instruction *foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf,
2566	InstCombiner::BuilderTy &Builder) {
2567	auto *S0 = dyn_cast<Instruction>(Val: Shuf.getOperand(i_nocapture: `0`));
2568	Value *X;
2569	if (!S0 \|\| !match(V: S0, P: m_CombineOr(L: m_FNeg(X: m_Value(V&: X)), R: m_FAbs(Op0: m_Value(V&: X)))))
2570	return nullptr;
2571
2572	bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
2573
2574	// Match 2-input (binary) shuffle.
2575	auto *S1 = dyn_cast<Instruction>(Val: Shuf.getOperand(i_nocapture: `1`));
2576	Value *Y;
2577	if (!S1 \|\| !match(V: S1, P: m_CombineOr(L: m_FNeg(X: m_Value(V&: Y)), R: m_FAbs(Op0: m_Value(V&: Y)))) \|\|
2578	S0->getOpcode() != S1->getOpcode() \|\|
2579	(!S0->hasOneUse() && !S1->hasOneUse()))
2580	return nullptr;
2581
2582	// shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
2583	Value *NewShuf = Builder.CreateShuffleVector(V1: X, V2: Y, Mask: Shuf.getShuffleMask());
2584	Instruction *NewF;
2585	if (IsFNeg) {
2586	NewF = UnaryOperator::CreateFNeg(V: NewShuf);
2587	} else {
2588	Function *FAbs = Intrinsic::getOrInsertDeclaration(
2589	M: Shuf.getModule(), id: Intrinsic::fabs, Tys: Shuf.getType());
2590	NewF = CallInst::Create(Func: FAbs, Args: {NewShuf});
2591	}
2592	NewF->copyIRFlags(V: S0);
2593	NewF->andIRFlags(V: S1);
2594	return NewF;
2595	}
2596
2597	/// Canonicalize casts after shuffle.
2598	static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf,
2599	InstCombiner::BuilderTy &Builder) {
2600	auto *Cast0 = dyn_cast<CastInst>(Val: Shuf.getOperand(i_nocapture: `0`));
2601	if (!Cast0)
2602	return nullptr;
2603
2604	// TODO: Allow other opcodes? That would require easing the type restrictions
2605	// below here.
2606	CastInst::CastOps CastOpcode = Cast0->getOpcode();
2607	switch (CastOpcode) {
2608	case Instruction::SExt:
2609	case Instruction::ZExt:
2610	case Instruction::FPToSI:
2611	case Instruction::FPToUI:
2612	case Instruction::SIToFP:
2613	case Instruction::UIToFP:
2614	break;
2615	default:
2616	return nullptr;
2617	}
2618
2619	VectorType *CastSrcTy = cast<VectorType>(Val: Cast0->getSrcTy());
2620	VectorType *ShufTy = Shuf.getType();
2621	VectorType *ShufOpTy = cast<VectorType>(Val: Shuf.getOperand(i_nocapture: `0`)->getType());
2622
2623	// TODO: Allow length-increasing shuffles?
2624	if (ShufTy->getElementCount().getKnownMinValue() >
2625	ShufOpTy->getElementCount().getKnownMinValue())
2626	return nullptr;
2627
2628	// shuffle (cast X), Poison, identity-with-extract-mask -->
2629	// cast (shuffle X, Poison, identity-with-extract-mask).
2630	if (isa<PoisonValue>(Val: Shuf.getOperand(i_nocapture: `1`)) && Cast0->hasOneUse() &&
2631	Shuf.isIdentityWithExtract()) {
2632	auto *NewIns = Builder.CreateShuffleVector(V1: Cast0->getOperand(i_nocapture: `0`),
2633	V2: PoisonValue::get(T: CastSrcTy),
2634	Mask: Shuf.getShuffleMask());
2635	return CastInst::Create(Cast0->getOpcode(), S: NewIns, Ty: Shuf.getType());
2636	}
2637
2638	auto *Cast1 = dyn_cast<CastInst>(Val: Shuf.getOperand(i_nocapture: `1`));
2639	// Do we have 2 matching cast operands?
2640	if (!Cast1 \|\| Cast0->getOpcode() != Cast1->getOpcode() \|\|
2641	Cast0->getSrcTy() != Cast1->getSrcTy())
2642	return nullptr;
2643
2644	// TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
2645	assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2646	"Expected fixed vector operands for casts and binary shuffle");
2647	if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2648	return nullptr;
2649
2650	// At least one of the operands must have only one use (the shuffle).
2651	if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2652	return nullptr;
2653
2654	// shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2655	Value *X = Cast0->getOperand(i_nocapture: `0`);
2656	Value *Y = Cast1->getOperand(i_nocapture: `0`);
2657	Value *NewShuf = Builder.CreateShuffleVector(V1: X, V2: Y, Mask: Shuf.getShuffleMask());
2658	return CastInst::Create(CastOpcode, S: NewShuf, Ty: ShufTy);
2659	}
2660
2661	/// Try to fold an extract subvector operation.
2662	static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
2663	Value Op0 = Shuf.getOperand(i_nocapture: `0`), Op1 = Shuf.getOperand(i_nocapture: `1`);
2664	if (!Shuf.isIdentityWithExtract() \|\| !match(V: Op1, P: m_Poison()))
2665	return nullptr;
2666
2667	// Check if we are extracting all bits of an inserted scalar:
2668	// extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2669	Value *X;
2670	if (match(V: Op0, P: m_BitCast(Op: m_InsertElt(Val: m_Value(), Elt: m_Value(V&: X), Idx: m_Zero()))) &&
2671	X->getType()->getPrimitiveSizeInBits() ==
2672	Shuf.getType()->getPrimitiveSizeInBits())
2673	return new BitCastInst (X, Shuf.getType());
2674
2675	// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2676	Value *Y;
2677	ArrayRef<int> Mask;
2678	if (!match(V: Op0, P: m_Shuffle(v1: m_Value(V&: X), v2: m_Value(V&: Y), mask: m_Mask (Mask))))
2679	return nullptr;
2680
2681	// Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2682	// then combining may result in worse codegen.
2683	if (!Op0->hasOneUse())
2684	return nullptr;
2685
2686	// We are extracting a subvector from a shuffle. Remove excess elements from
2687	// the 1st shuffle mask to eliminate the extract.
2688	//
2689	// This transform is conservatively limited to identity extracts because we do
2690	// not allow arbitrary shuffle mask creation as a target-independent transform
2691	// (because we can't guarantee that will lower efficiently).
2692	//
2693	// If the extracting shuffle has an poison mask element, it transfers to the
2694	// new shuffle mask. Otherwise, copy the original mask element. Example:
2695	// shuf (shuf X, Y, <C0, C1, C2, poison, C4>), poison, <0, poison, 2, 3> -->
2696	// shuf X, Y, <C0, poison, C2, poison>
2697	unsigned NumElts = cast<FixedVectorType>(Val: Shuf.getType())->getNumElements();
2698	SmallVector<int, `16`> NewMask(NumElts);
2699	assert(NumElts < Mask.size() &&
2700	"Identity with extract must have less elements than its inputs");
2701
2702	for (unsigned i = `0`; i != NumElts; ++i) {
2703	int ExtractMaskElt = Shuf.getMaskValue(Elt: i);
2704	int MaskElt = Mask [i];
2705	NewMask [i] = ExtractMaskElt == PoisonMaskElem ? ExtractMaskElt : MaskElt;
2706	}
2707	return new ShuffleVectorInst (X, Y, NewMask);
2708	}
2709
2710	/// Try to replace a shuffle with an insertelement or try to replace a shuffle
2711	/// operand with the operand of an insertelement.
2712	static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
2713	InstCombinerImpl &IC) {
2714	Value V0 = Shuf.getOperand(i_nocapture: `0`), V1 = Shuf.getOperand(i_nocapture: `1`);
2715	SmallVector<int, `16`> Mask;
2716	Shuf.getShuffleMask(Result&: Mask);
2717
2718	int NumElts = Mask.size();
2719	int InpNumElts = cast<FixedVectorType>(Val: V0->getType())->getNumElements();
2720
2721	// This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2722	// not be able to handle it there if the insertelement has >1 use.
2723	// If the shuffle has an insertelement operand but does not choose the
2724	// inserted scalar element from that value, then we can replace that shuffle
2725	// operand with the source vector of the insertelement.
2726	Value *X;
2727	uint64_t IdxC;
2728	if (match(V: V0, P: m_InsertElt(Val: m_Value(V&: X), Elt: m_Value(), Idx: m_ConstantInt(V&: IdxC)))) {
2729	// shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2730	if (!is_contained(Range&: Mask, Element: (int)IdxC))
2731	return IC.replaceOperand(I&: Shuf, OpNum: `0`, V: X);
2732	}
2733	if (match(V: V1, P: m_InsertElt(Val: m_Value(V&: X), Elt: m_Value(), Idx: m_ConstantInt(V&: IdxC)))) {
2734	// Offset the index constant by the vector width because we are checking for
2735	// accesses to the 2nd vector input of the shuffle.
2736	IdxC += InpNumElts;
2737	// shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2738	if (!is_contained(Range&: Mask, Element: (int)IdxC))
2739	return IC.replaceOperand(I&: Shuf, OpNum: `1`, V: X);
2740	}
2741	// For the rest of the transform, the shuffle must not change vector sizes.
2742	// TODO: This restriction could be removed if the insert has only one use
2743	// (because the transform would require a new length-changing shuffle).
2744	if (NumElts != InpNumElts)
2745	return nullptr;
2746
2747	// shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2748	auto isShufflingScalarIntoOp1 = [&](Value &Scalar, ConstantInt &IndexC) {
2749	// We need an insertelement with a constant index.
2750	if (!match(V: V0, P: m_InsertElt(Val: m_Value(), Elt: m_Value(V&: Scalar),
2751	Idx: m_ConstantInt(CI&: IndexC))))
2752	return false;
2753
2754	// Test the shuffle mask to see if it splices the inserted scalar into the
2755	// operand 1 vector of the shuffle.
2756	int NewInsIndex = -`1`;
2757	for (int i = `0`; i != NumElts; ++i) {
2758	// Ignore undef mask elements.
2759	if (Mask [i] == -`1`)
2760	continue;
2761
2762	// The shuffle takes elements of operand 1 without lane changes.
2763	if (Mask [i] == NumElts + i)
2764	continue;
2765
2766	// The shuffle must choose the inserted scalar exactly once.
2767	if (NewInsIndex != -`1` \|\| Mask [i] != IndexC->getSExtValue())
2768	return false;
2769
2770	// The shuffle is placing the inserted scalar into element i.
2771	NewInsIndex = i;
2772	}
2773
2774	assert(NewInsIndex != -`1` && "Did not fold shuffle with unused operand?");
2775
2776	// Index is updated to the potentially translated insertion lane.
2777	IndexC = ConstantInt::get(Ty: IndexC->getIntegerType(), V: NewInsIndex);
2778	return true;
2779	};
2780
2781	// If the shuffle is unnecessary, insert the scalar operand directly into
2782	// operand 1 of the shuffle. Example:
2783	// shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2784	Value *Scalar;
2785	ConstantInt *IndexC;
2786	if (isShufflingScalarIntoOp1 (Scalar, IndexC))
2787	return InsertElementInst::Create(Vec: V1, NewElt: Scalar, Idx: IndexC);
2788
2789	// Try again after commuting shuffle. Example:
2790	// shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2791	// shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2792	std::swap(a&: V0, b&: V1);
2793	ShuffleVectorInst::commuteShuffleMask(Mask, InVecNumElts: NumElts);
2794	if (isShufflingScalarIntoOp1 (Scalar, IndexC))
2795	return InsertElementInst::Create(Vec: V1, NewElt: Scalar, Idx: IndexC);
2796
2797	return nullptr;
2798	}
2799
2800	static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
2801	// Match the operands as identity with padding (also known as concatenation
2802	// with undef) shuffles of the same source type. The backend is expected to
2803	// recreate these concatenations from a shuffle of narrow operands.
2804	auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Val: Shuf.getOperand(i_nocapture: `0`));
2805	auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Val: Shuf.getOperand(i_nocapture: `1`));
2806	if (!Shuffle0 \|\| !Shuffle0->isIdentityWithPadding() \|\|
2807	!Shuffle1 \|\| !Shuffle1->isIdentityWithPadding())
2808	return nullptr;
2809
2810	// We limit this transform to power-of-2 types because we expect that the
2811	// backend can convert the simplified IR patterns to identical nodes as the
2812	// original IR.
2813	// TODO: If we can verify the same behavior for arbitrary types, the
2814	// power-of-2 checks can be removed.
2815	Value *X = Shuffle0->getOperand(i_nocapture: `0`);
2816	Value *Y = Shuffle1->getOperand(i_nocapture: `0`);
2817	if (X->getType() != Y->getType() \|\|
2818	!isPowerOf2_32(Value: cast<FixedVectorType>(Val: Shuf.getType())->getNumElements()) \|\|
2819	!isPowerOf2_32(
2820	Value: cast<FixedVectorType>(Val: Shuffle0->getType())->getNumElements()) \|\|
2821	!isPowerOf2_32(Value: cast<FixedVectorType>(Val: X->getType())->getNumElements()) \|\|
2822	match(V: X, P: m_Undef()) \|\| match(V: Y, P: m_Undef()))
2823	return nullptr;
2824	assert(match(Shuffle0->getOperand(`1`), m_Undef()) &&
2825	match(Shuffle1->getOperand(`1`), m_Undef()) &&
2826	"Unexpected operand for identity shuffle");
2827
2828	// This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2829	// operands directly by adjusting the shuffle mask to account for the narrower
2830	// types:
2831	// shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2832	int NarrowElts = cast<FixedVectorType>(Val: X->getType())->getNumElements();
2833	int WideElts = cast<FixedVectorType>(Val: Shuffle0->getType())->getNumElements();
2834	assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2835
2836	ArrayRef<int> Mask = Shuf.getShuffleMask();
2837	SmallVector<int, `16`> NewMask(Mask.size(), -`1`);
2838	for (int i = `0`, e = Mask.size(); i != e; ++i) {
2839	if (Mask [i] == -`1`)
2840	continue;
2841
2842	// If this shuffle is choosing an undef element from 1 of the sources, that
2843	// element is undef.
2844	if (Mask [i] < WideElts) {
2845	if (Shuffle0->getMaskValue(Elt: Mask [i]) == -`1`)
2846	continue;
2847	} else {
2848	if (Shuffle1->getMaskValue(Elt: Mask [i] - WideElts) == -`1`)
2849	continue;
2850	}
2851
2852	// If this shuffle is choosing from the 1st narrow op, the mask element is
2853	// the same. If this shuffle is choosing from the 2nd narrow op, the mask
2854	// element is offset down to adjust for the narrow vector widths.
2855	if (Mask [i] < WideElts) {
2856	assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2857	NewMask [i] = Mask [i];
2858	} else {
2859	assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2860	NewMask [i] = Mask [i] - (WideElts - NarrowElts);
2861	}
2862	}
2863	return new ShuffleVectorInst (X, Y, NewMask);
2864	}
2865
2866	// Splatting the first element of the result of a BinOp, where any of the
2867	// BinOp's operands are the result of a first element splat can be simplified to
2868	// splatting the first element of the result of the BinOp
2869	Instruction *InstCombinerImpl::simplifyBinOpSplats(ShuffleVectorInst &SVI) {
2870	if (!match(V: SVI.getOperand(i_nocapture: `1`), P: m_Poison()) \|\|
2871	!match(Mask: SVI.getShuffleMask(), P: m_ZeroMask ()) \|\|
2872	!SVI.getOperand(i_nocapture: `0`)->hasOneUse())
2873	return nullptr;
2874
2875	Value *Op0 = SVI.getOperand(i_nocapture: `0`);
2876	Value X, Y;
2877	if (!match(V: Op0, P: m_BinOp(L: m_Shuffle(v1: m_Value(V&: X), v2: m_Poison(), mask: m_ZeroMask ()),
2878	R: m_Value(V&: Y))) &&
2879	!match(V: Op0, P: m_BinOp(L: m_Value(V&: X),
2880	R: m_Shuffle(v1: m_Value(V&: Y), v2: m_Poison(), mask: m_ZeroMask ()))))
2881	return nullptr;
2882	if (X->getType() != Y->getType())
2883	return nullptr;
2884
2885	auto *BinOp = cast<BinaryOperator>(Val: Op0);
2886	if (!isSafeToSpeculativelyExecuteWithVariableReplaced(I: BinOp))
2887	return nullptr;
2888
2889	Value *NewBO = Builder.CreateBinOp(Opc: BinOp->getOpcode(), LHS: X, RHS: Y);
2890	if (auto NewBOI = dyn_cast<Instruction>(Val: NewBO))
2891	NewBOI->copyIRFlags(V: BinOp);
2892
2893	return new ShuffleVectorInst (NewBO, SVI.getShuffleMask());
2894	}
2895
2896	Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
2897	Value *LHS = SVI.getOperand(i_nocapture: `0`);
2898	Value *RHS = SVI.getOperand(i_nocapture: `1`);
2899	SimplifyQuery ShufQuery = SQ.getWithInstruction(I: &SVI);
2900	if (auto *V = simplifyShuffleVectorInst(Op0: LHS, Op1: RHS, Mask: SVI.getShuffleMask(),
2901	RetTy: SVI.getType(), Q: ShufQuery))
2902	return replaceInstUsesWith(I&: SVI, V);
2903
2904	if (Instruction *I = simplifyBinOpSplats(SVI))
2905	return I;
2906
2907	// Canonicalize splat shuffle to use poison RHS. Handle this explicitly in
2908	// order to support scalable vectors.
2909	if (match(Mask: SVI.getShuffleMask(), P: m_ZeroMask ()) && !isa<PoisonValue>(Val: RHS))
2910	return replaceOperand(I&: SVI, OpNum: `1`, V: PoisonValue::get(T: RHS->getType()));
2911
2912	if (isa<ScalableVectorType>(Val: LHS->getType()))
2913	return nullptr;
2914
2915	unsigned VWidth = cast<FixedVectorType>(Val: SVI.getType())->getNumElements();
2916	unsigned LHSWidth = cast<FixedVectorType>(Val: LHS->getType())->getNumElements();
2917
2918	// shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2919	//
2920	// if X and Y are of the same (vector) type, and the element size is not
2921	// changed by the bitcasts, we can distribute the bitcasts through the
2922	// shuffle, hopefully reducing the number of instructions. We make sure that
2923	// at least one bitcast only has one use, so we don't increase* the number of*
2924	// instructions here.
2925	Value X, Y;
2926	if (match(V: LHS, P: m_BitCast(Op: m_Value(V&: X))) && match(V: RHS, P: m_BitCast(Op: m_Value(V&: Y))) &&
2927	X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2928	X->getType()->getScalarSizeInBits() ==
2929	SVI.getType()->getScalarSizeInBits() &&
2930	(LHS->hasOneUse() \|\| RHS->hasOneUse())) {
2931	Value *V = Builder.CreateShuffleVector(V1: X, V2: Y, Mask: SVI.getShuffleMask(),
2932	Name: SVI.getName() + ".uncasted");
2933	return new BitCastInst (V, SVI.getType());
2934	}
2935
2936	ArrayRef<int> Mask = SVI.getShuffleMask();
2937
2938	// Peek through a bitcasted shuffle operand by scaling the mask. If the
2939	// simulated shuffle can simplify, then this shuffle is unnecessary:
2940	// shuf (bitcast X), undef, Mask --> bitcast X'
2941	// TODO: This could be extended to allow length-changing shuffles.
2942	// The transform might also be obsoleted if we allowed canonicalization
2943	// of bitcasted shuffles.
2944	if (match(V: LHS, P: m_BitCast(Op: m_Value(V&: X))) && match(V: RHS, P: m_Undef()) &&
2945	X->getType()->isVectorTy() && VWidth == LHSWidth) {
2946	// Try to create a scaled mask constant.
2947	auto *XType = cast<FixedVectorType>(Val: X->getType());
2948	unsigned XNumElts = XType->getNumElements();
2949	SmallVector<int, `16`> ScaledMask;
2950	if (scaleShuffleMaskElts(NumDstElts: XNumElts, Mask, ScaledMask)) {
2951	// If the shuffled source vector simplifies, cast that value to this
2952	// shuffle's type.
2953	if (auto *V = simplifyShuffleVectorInst(Op0: X, Op1: UndefValue::get(T: XType),
2954	Mask: ScaledMask, RetTy: XType, Q: ShufQuery))
2955	return BitCastInst::Create(Instruction::BitCast, S: V, Ty: SVI.getType());
2956	}
2957	}
2958
2959	// shuffle x, x, mask --> shuffle x, undef, mask'
2960	if (LHS == RHS) {
2961	assert(!match(RHS, m_Undef()) &&
2962	"Shuffle with 2 undef ops not simplified?");
2963	return new ShuffleVectorInst (LHS, createUnaryMask(Mask, NumElts: LHSWidth));
2964	}
2965
2966	// shuffle undef, x, mask --> shuffle x, undef, mask'
2967	if (match(V: LHS, P: m_Undef())) {
2968	SVI.commute();
2969	return &SVI;
2970	}
2971
2972	if (Instruction *I = canonicalizeInsertSplat(Shuf&: SVI, Builder))
2973	return I;
2974
2975	if (Instruction *I = foldSelectShuffle(Shuf&: SVI))
2976	return I;
2977
2978	if (Instruction *I = foldTruncShuffle(Shuf&: SVI, IsBigEndian: DL.isBigEndian()))
2979	return I;
2980
2981	if (Instruction *I = narrowVectorSelect(Shuf&: SVI, Builder))
2982	return I;
2983
2984	if (Instruction *I = foldShuffleOfUnaryOps(Shuf&: SVI, Builder))
2985	return I;
2986
2987	if (Instruction *I = foldCastShuffle(Shuf&: SVI, Builder))
2988	return I;
2989
2990	APInt PoisonElts(VWidth, `0`);
2991	APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
2992	if (Value *V = SimplifyDemandedVectorElts(V: &SVI, DemandedElts: AllOnesEltMask, PoisonElts)) {
2993	if (V != &SVI)
2994	return replaceInstUsesWith(I&: SVI, V);
2995	return &SVI;
2996	}
2997
2998	if (Instruction *I = foldIdentityExtractShuffle(Shuf&: SVI))
2999	return I;
3000
3001	// These transforms have the potential to lose undef knowledge, so they are
3002	// intentionally placed after SimplifyDemandedVectorElts().
3003	if (Instruction I = foldShuffleWithInsert(Shuf&: SVI, IC&: this))
3004	return I;
3005	if (Instruction *I = foldIdentityPaddedShuffles(Shuf&: SVI))
3006	return I;
3007
3008	if (match(V: RHS, P: m_Constant())) {
3009	if (auto *SI = dyn_cast<SelectInst>(Val: LHS)) {
3010	// We cannot do this fold for elementwise select since ShuffleVector is
3011	// not elementwise.
3012	if (SI->getCondition()->getType()->isIntegerTy() &&
3013	(isa<PoisonValue>(Val: RHS) \|\|
3014	isGuaranteedNotToBePoison(V: SI->getCondition()))) {
3015	if (Instruction *I = FoldOpIntoSelect(Op&: SVI, SI))
3016	return I;
3017	}
3018	}
3019	if (auto *PN = dyn_cast<PHINode>(Val: LHS)) {
3020	if (Instruction I = foldOpIntoPhi(I&: SVI, PN, /AllowMultipleUses=/*true))
3021	return I;
3022	}
3023	}
3024
3025	if (match(V: RHS, P: m_Poison()) && canEvaluateShuffled(V: LHS, Mask)) {
3026	Value *V = evaluateInDifferentElementOrder(V: LHS, Mask, Builder);
3027	return replaceInstUsesWith(I&: SVI, V);
3028	}
3029
3030	// SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
3031	// a non-vector type. We can instead bitcast the original vector followed by
3032	// an extract of the desired element:
3033	//
3034	// %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
3035	// <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3036	// %1 = bitcast <4 x i8> %sroa to i32
3037	// Becomes:
3038	// %bc = bitcast <16 x i8> %in to <4 x i32>
3039	// %ext = extractelement <4 x i32> %bc, i32 0
3040	//
3041	// If the shuffle is extracting a contiguous range of values from the input
3042	// vector then each use which is a bitcast of the extracted size can be
3043	// replaced. This will work if the vector types are compatible, and the begin
3044	// index is aligned to a value in the casted vector type. If the begin index
3045	// isn't aligned then we can shuffle the original vector (keeping the same
3046	// vector type) before extracting.
3047	//
3048	// This code will bail out if the target type is fundamentally incompatible
3049	// with vectors of the source type.
3050	//
3051	// Example of <16 x i8>, target type i32:
3052	// Index range [4,8): v-----------v Will work.
3053	// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
3054	// <16 x i8>: \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \|
3055	// <4 x i32>: \| \| \| \| \|
3056	// +-----------+-----------+-----------+-----------+
3057	// Index range [6,10): ^-----------^ Needs an extra shuffle.
3058	// Target type i40: ^--------------^ Won't work, bail.
3059	bool MadeChange = false;
3060	if (isShuffleExtractingFromLHS(SVI, Mask)) {
3061	Value *V = LHS;
3062	unsigned MaskElems = Mask.size();
3063	auto *SrcTy = cast<FixedVectorType>(Val: V->getType());
3064	unsigned VecBitWidth = DL.getTypeSizeInBits(Ty: SrcTy);
3065	unsigned SrcElemBitWidth = DL.getTypeSizeInBits(Ty: SrcTy->getElementType());
3066	assert(SrcElemBitWidth && "vector elements must have a bitwidth");
3067	unsigned SrcNumElems = SrcTy->getNumElements();
3068	SmallVector<BitCastInst *, `8`> BCs;
3069	DenseMap<Type , Value > NewBCs;
3070	for (User *U : SVI.users())
3071	if (BitCastInst *BC = dyn_cast<BitCastInst>(Val: U)) {
3072	// Only visit bitcasts that weren't previously handled.
3073	if (BC->use_empty())
3074	continue;
3075	// Prefer to combine bitcasts of bitcasts before attempting this fold.
3076	if (BC->hasOneUse()) {
3077	auto *BC2 = dyn_cast<BitCastInst>(Val: BC->user_back());
3078	if (BC2 && isEliminableCastPair(CI1: BC, CI2: BC2))
3079	continue;
3080	}
3081	BCs.push_back(Elt: BC);
3082	}
3083	for (BitCastInst *BC : BCs) {
3084	unsigned BegIdx = Mask.front();
3085	Type *TgtTy = BC->getDestTy();
3086	unsigned TgtElemBitWidth = DL.getTypeSizeInBits(Ty: TgtTy);
3087	if (!TgtElemBitWidth)
3088	continue;
3089	unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
3090	bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
3091	bool BegIsAligned = `0` == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
3092	if (!VecBitWidthsEqual)
3093	continue;
3094	if (!VectorType::isValidElementType(ElemTy: TgtTy))
3095	continue;
3096	auto *CastSrcTy = FixedVectorType::get(ElementType: TgtTy, NumElts: TgtNumElems);
3097	if (!BegIsAligned) {
3098	// Shuffle the input so [0,NumElements) contains the output, and
3099	// [NumElems,SrcNumElems) is undef.
3100	SmallVector<int, `16`> ShuffleMask(SrcNumElems, -`1`);
3101	for (unsigned I = `0`, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
3102	ShuffleMask [I] = Idx;
3103	V = Builder.CreateShuffleVector(V, Mask: ShuffleMask,
3104	Name: SVI.getName() + ".extract");
3105	BegIdx = `0`;
3106	}
3107	unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
3108	assert(SrcElemsPerTgtElem);
3109	BegIdx /= SrcElemsPerTgtElem;
3110	auto [It, Inserted] = NewBCs.try_emplace(Key: CastSrcTy);
3111	if (Inserted)
3112	It ->second = Builder.CreateBitCast(V, DestTy: CastSrcTy, Name: SVI.getName() + ".bc");
3113	auto *Ext = Builder.CreateExtractElement(Vec: It ->second, Idx: BegIdx,
3114	Name: SVI.getName() + ".extract");
3115	// The shufflevector isn't being replaced: the bitcast that used it
3116	// is. InstCombine will visit the newly-created instructions.
3117	replaceInstUsesWith(I&: *BC, V: Ext);
3118	MadeChange = true;
3119	}
3120	}
3121
3122	// If the LHS is a shufflevector itself, see if we can combine it with this
3123	// one without producing an unusual shuffle.
3124	// Cases that might be simplified:
3125	// 1.
3126	// x1=shuffle(v1,v2,mask1)
3127	// x=shuffle(x1,undef,mask)
3128	// ==>
3129	// x=shuffle(v1,undef,newMask)
3130	// newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
3131	// 2.
3132	// x1=shuffle(v1,undef,mask1)
3133	// x=shuffle(x1,x2,mask)
3134	// where v1.size() == mask1.size()
3135	// ==>
3136	// x=shuffle(v1,x2,newMask)
3137	// newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
3138	// 3.
3139	// x2=shuffle(v2,undef,mask2)
3140	// x=shuffle(x1,x2,mask)
3141	// where v2.size() == mask2.size()
3142	// ==>
3143	// x=shuffle(x1,v2,newMask)
3144	// newMask[i] = (mask[i] < x1.size())
3145	// ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
3146	// 4.
3147	// x1=shuffle(v1,undef,mask1)
3148	// x2=shuffle(v2,undef,mask2)
3149	// x=shuffle(x1,x2,mask)
3150	// where v1.size() == v2.size()
3151	// ==>
3152	// x=shuffle(v1,v2,newMask)
3153	// newMask[i] = (mask[i] < x1.size())
3154	// ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
3155	//
3156	// Here we are really conservative:
3157	// we are absolutely afraid of producing a shuffle mask not in the input
3158	// program, because the code gen may not be smart enough to turn a merged
3159	// shuffle into two specific shuffles: it may produce worse code. As such,
3160	// we only merge two shuffles if the result is either a splat or one of the
3161	// input shuffle masks. In this case, merging the shuffles just removes
3162	// one instruction, which we know is safe. This is good for things like
3163	// turning: (splat(splat)) -> splat, or
3164	// merge(V[0..n], V[n+1..2n]) -> V[0..2n]
3165	ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(Val: LHS);
3166	ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(Val: RHS);
3167	if (LHSShuffle)
3168	if (!match(V: LHSShuffle->getOperand(i_nocapture: `1`), P: m_Poison()) &&
3169	!match(V: RHS, P: m_Poison()))
3170	LHSShuffle = nullptr;
3171	if (RHSShuffle)
3172	if (!match(V: RHSShuffle->getOperand(i_nocapture: `1`), P: m_Poison()))
3173	RHSShuffle = nullptr;
3174	if (!LHSShuffle && !RHSShuffle)
3175	return MadeChange ? &SVI : nullptr;
3176
3177	Value* LHSOp0 = nullptr;
3178	Value* LHSOp1 = nullptr;
3179	Value* RHSOp0 = nullptr;
3180	unsigned LHSOp0Width = `0`;
3181	unsigned RHSOp0Width = `0`;
3182	if (LHSShuffle) {
3183	LHSOp0 = LHSShuffle->getOperand(i_nocapture: `0`);
3184	LHSOp1 = LHSShuffle->getOperand(i_nocapture: `1`);
3185	LHSOp0Width = cast<FixedVectorType>(Val: LHSOp0->getType())->getNumElements();
3186	}
3187	if (RHSShuffle) {
3188	RHSOp0 = RHSShuffle->getOperand(i_nocapture: `0`);
3189	RHSOp0Width = cast<FixedVectorType>(Val: RHSOp0->getType())->getNumElements();
3190	}
3191	Value* newLHS = LHS;
3192	Value* newRHS = RHS;
3193	if (LHSShuffle) {
3194	// case 1
3195	if (match(V: RHS, P: m_Poison())) {
3196	newLHS = LHSOp0;
3197	newRHS = LHSOp1;
3198	}
3199	// case 2 or 4
3200	else if (LHSOp0Width == LHSWidth) {
3201	newLHS = LHSOp0;
3202	}
3203	}
3204	// case 3 or 4
3205	if (RHSShuffle && RHSOp0Width == LHSWidth) {
3206	newRHS = RHSOp0;
3207	}
3208	// case 4
3209	if (LHSOp0 == RHSOp0) {
3210	newLHS = LHSOp0;
3211	newRHS = nullptr;
3212	}
3213
3214	if (newLHS == LHS && newRHS == RHS)
3215	return MadeChange ? &SVI : nullptr;
3216
3217	ArrayRef<int> LHSMask;
3218	ArrayRef<int> RHSMask;
3219	if (newLHS != LHS)
3220	LHSMask = LHSShuffle->getShuffleMask();
3221	if (RHSShuffle && newRHS != RHS)
3222	RHSMask = RHSShuffle->getShuffleMask();
3223
3224	unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
3225	SmallVector<int, `16`> newMask;
3226	bool isSplat = true;
3227	int SplatElt = -`1`;
3228	// Create a new mask for the new ShuffleVectorInst so that the new
3229	// ShuffleVectorInst is equivalent to the original one.
3230	for (unsigned i = `0`; i < VWidth; ++i) {
3231	int eltMask;
3232	if (Mask [i] < `0`) {
3233	// This element is a poison value.
3234	eltMask = -`1`;
3235	} else if (Mask [i] < (int)LHSWidth) {
3236	// This element is from left hand side vector operand.
3237	//
3238	// If LHS is going to be replaced (case 1, 2, or 4), calculate the
3239	// new mask value for the element.
3240	if (newLHS != LHS) {
3241	eltMask = LHSMask [Mask [i]];
3242	// If the value selected is an poison value, explicitly specify it
3243	// with a -1 mask value.
3244	if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(Val: LHSOp1))
3245	eltMask = -`1`;
3246	} else
3247	eltMask = Mask [i];
3248	} else {
3249	// This element is from right hand side vector operand
3250	//
3251	// If the value selected is a poison value, explicitly specify it
3252	// with a -1 mask value. (case 1)
3253	if (match(V: RHS, P: m_Poison()))
3254	eltMask = -`1`;
3255	// If RHS is going to be replaced (case 3 or 4), calculate the
3256	// new mask value for the element.
3257	else if (newRHS != RHS) {
3258	eltMask = RHSMask [Mask [i]-LHSWidth];
3259	// If the value selected is an poison value, explicitly specify it
3260	// with a -1 mask value.
3261	if (eltMask >= (int)RHSOp0Width) {
3262	assert(match(RHSShuffle->getOperand(`1`), m_Poison()) &&
3263	"should have been check above");
3264	eltMask = -`1`;
3265	}
3266	} else
3267	eltMask = Mask [i]-LHSWidth;
3268
3269	// If LHS's width is changed, shift the mask value accordingly.
3270	// If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
3271	// references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
3272	// If newRHS == newLHS, we want to remap any references from newRHS to
3273	// newLHS so that we can properly identify splats that may occur due to
3274	// obfuscation across the two vectors.
3275	if (eltMask >= `0` && newRHS != nullptr && newLHS != newRHS)
3276	eltMask += newLHSWidth;
3277	}
3278
3279	// Check if this could still be a splat.
3280	if (eltMask >= `0`) {
3281	if (SplatElt >= `0` && SplatElt != eltMask)
3282	isSplat = false;
3283	SplatElt = eltMask;
3284	}
3285
3286	newMask.push_back(Elt: eltMask);
3287	}
3288
3289	// If the result mask is equal to one of the original shuffle masks,
3290	// or is a splat, do the replacement.
3291	if (isSplat \|\| newMask == LHSMask \|\| newMask == RHSMask \|\| newMask == Mask) {
3292	if (!newRHS)
3293	newRHS = PoisonValue::get(T: newLHS->getType());
3294	return new ShuffleVectorInst (newLHS, newRHS, newMask);
3295	}
3296
3297	return MadeChange ? &SVI : nullptr;
3298	}
3299

Browse the source code of llvm_projects/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp