InstCombineVectorOps.cpp source code [llvm_projects/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp]

1	//===- InstCombineVectorOps.cpp -------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements instcombine for ExtractElement, InsertElement and
10	// ShuffleVector.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "InstCombineInternal.h"
15	#include "llvm/ADT/APInt.h"
16	#include "llvm/ADT/ArrayRef.h"
17	#include "llvm/ADT/DenseMap.h"
18	#include "llvm/ADT/STLExtras.h"
19	#include "llvm/ADT/SmallBitVector.h"
20	#include "llvm/ADT/SmallVector.h"
21	#include "llvm/ADT/Statistic.h"
22	#include "llvm/Analysis/InstructionSimplify.h"
23	#include "llvm/Analysis/VectorUtils.h"
24	#include "llvm/IR/BasicBlock.h"
25	#include "llvm/IR/Constant.h"
26	#include "llvm/IR/Constants.h"
27	#include "llvm/IR/DerivedTypes.h"
28	#include "llvm/IR/InstrTypes.h"
29	#include "llvm/IR/Instruction.h"
30	#include "llvm/IR/Instructions.h"
31	#include "llvm/IR/Operator.h"
32	#include "llvm/IR/PatternMatch.h"
33	#include "llvm/IR/Type.h"
34	#include "llvm/IR/User.h"
35	#include "llvm/IR/Value.h"
36	#include "llvm/Support/Casting.h"
37	#include "llvm/Support/ErrorHandling.h"
38	#include "llvm/Transforms/InstCombine/InstCombiner.h"
39	#include <cassert>
40	#include <cstdint>
41	#include <iterator>
42	#include <utility>
43
44	#define DEBUG_TYPE "instcombine"
45
46	using namespace llvm;
47	using namespace PatternMatch;
48
49	STATISTIC(NumAggregateReconstructionsSimplified,
50	"Number of aggregate reconstructions turned into reuse of the "
51	"original aggregate");
52
53	/// Return true if the value is cheaper to scalarize than it is to leave as a
54	/// vector operation. If the extract index \p EI is a constant integer then
55	/// some operations may be cheap to scalarize.
56	///
57	/// FIXME: It's possible to create more instructions than previously existed.
58	static bool cheapToScalarize(Value V, Value EI) {
59	ConstantInt *CEI = dyn_cast<ConstantInt>(Val: EI);
60
61	// If we can pick a scalar constant value out of a vector, that is free.
62	if (auto *C = dyn_cast<Constant>(Val: V))
63	return CEI \|\| C->getSplatValue();
64
65	if (CEI && match(V, P: m_Intrinsic<Intrinsic::experimental_stepvector>())) {
66	ElementCount EC = cast<VectorType>(Val: V->getType())->getElementCount();
67	// Index needs to be lower than the minimum size of the vector, because
68	// for scalable vector, the vector size is known at run time.
69	return CEI->getValue().ult(RHS: EC.getKnownMinValue());
70	}
71
72	// An insertelement to the same constant index as our extract will simplify
73	// to the scalar inserted element. An insertelement to a different constant
74	// index is irrelevant to our extract.
75	if (match(V, P: m_InsertElt(Val: m_Value(), Elt: m_Value(), Idx: m_ConstantInt())))
76	return CEI;
77
78	if (match(V, P: m_OneUse(SubPattern: m_Load(Op: m_Value()))))
79	return true;
80
81	if (match(V, P: m_OneUse(SubPattern: m_UnOp())))
82	return true;
83
84	Value V0, V1;
85	if (match(V, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: V0), R: m_Value(V&: V1)))))
86	if (cheapToScalarize(V: V0, EI) \|\| cheapToScalarize(V: V1, EI))
87	return true;
88
89	CmpInst::Predicate UnusedPred;
90	if (match(V, P: m_OneUse(SubPattern: m_Cmp(Pred&: UnusedPred, L: m_Value(V&: V0), R: m_Value(V&: V1)))))
91	if (cheapToScalarize(V: V0, EI) \|\| cheapToScalarize(V: V1, EI))
92	return true;
93
94	return false;
95	}
96
97	// If we have a PHI node with a vector type that is only used to feed
98	// itself and be an operand of extractelement at a constant location,
99	// try to replace the PHI of the vector type with a PHI of a scalar type.
100	Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
101	PHINode *PN) {
102	SmallVector<Instruction *, `2`> Extracts;
103	// The users we want the PHI to have are:
104	// 1) The EI ExtractElement (we already know this)
105	// 2) Possibly more ExtractElements with the same index.
106	// 3) Another operand, which will feed back into the PHI.
107	Instruction PHIUser = nullptr*;
108	for (auto *U : PN->users()) {
109	if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(Val: U)) {
110	if (EI.getIndexOperand() == EU->getIndexOperand())
111	Extracts.push_back(Elt: EU);
112	else
113	return nullptr;
114	} else if (!PHIUser) {
115	PHIUser = cast<Instruction>(Val: U);
116	} else {
117	return nullptr;
118	}
119	}
120
121	if (!PHIUser)
122	return nullptr;
123
124	// Verify that this PHI user has one use, which is the PHI itself,
125	// and that it is a binary operation which is cheap to scalarize.
126	// otherwise return nullptr.
127	if (!PHIUser->hasOneUse() \|\| !(PHIUser->user_back() == PN) \|\|
128	!(isa<BinaryOperator>(Val: PHIUser)) \|\|
129	!cheapToScalarize(V: PHIUser, EI: EI.getIndexOperand()))
130	return nullptr;
131
132	// Create a scalar PHI node that will replace the vector PHI node
133	// just before the current PHI node.
134	PHINode *scalarPHI = cast<PHINode>(Val: InsertNewInstWith(
135	New: PHINode::Create(Ty: EI.getType(), NumReservedValues: PN->getNumIncomingValues(), NameStr: ""), Old: PN->getIterator()));
136	// Scalarize each PHI operand.
137	for (unsigned i = `0`; i < PN->getNumIncomingValues(); i++) {
138	Value *PHIInVal = PN->getIncomingValue(i);
139	BasicBlock *inBB = PN->getIncomingBlock(i);
140	Value *Elt = EI.getIndexOperand();
141	// If the operand is the PHI induction variable:
142	if (PHIInVal == PHIUser) {
143	// Scalarize the binary operation. Its first operand is the
144	// scalar PHI, and the second operand is extracted from the other
145	// vector operand.
146	BinaryOperator *B0 = cast<BinaryOperator>(Val: PHIUser);
147	unsigned opId = (B0->getOperand(i_nocapture: `0`) == PN) ? `1` : `0`;
148	Value *Op = InsertNewInstWith(
149	New: ExtractElementInst::Create(Vec: B0->getOperand(i_nocapture: opId), Idx: Elt,
150	NameStr: B0->getOperand(i_nocapture: opId)->getName() + ".Elt"),
151	Old: B0->getIterator());
152	Value *newPHIUser = InsertNewInstWith(
153	New: BinaryOperator::CreateWithCopiedFlags(Opc: B0->getOpcode(),
154	V1: scalarPHI, V2: Op, CopyO: B0), Old: B0->getIterator());
155	scalarPHI->addIncoming(V: newPHIUser, BB: inBB);
156	} else {
157	// Scalarize PHI input:
158	Instruction *newEI = ExtractElementInst::Create(Vec: PHIInVal, Idx: Elt, NameStr: "");
159	// Insert the new instruction into the predecessor basic block.
160	Instruction *pos = dyn_cast<Instruction>(Val: PHIInVal);
161	BasicBlock::iterator InsertPos;
162	if (pos && !isa<PHINode>(Val: pos)) {
163	InsertPos = ++pos->getIterator();
164	} else {
165	InsertPos = inBB->getFirstInsertionPt();
166	}
167
168	InsertNewInstWith(New: newEI, Old: InsertPos);
169
170	scalarPHI->addIncoming(V: newEI, BB: inBB);
171	}
172	}
173
174	for (auto *E : Extracts) {
175	replaceInstUsesWith(I&: *E, V: scalarPHI);
176	// Add old extract to worklist for DCE.
177	addToWorklist(I: E);
178	}
179
180	return &EI;
181	}
182
183	Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
184	Value *X;
185	uint64_t ExtIndexC;
186	if (!match(V: Ext.getVectorOperand(), P: m_BitCast(Op: m_Value(V&: X))) \|\|
187	!match(V: Ext.getIndexOperand(), P: m_ConstantInt(V&: ExtIndexC)))
188	return nullptr;
189
190	ElementCount NumElts =
191	cast<VectorType>(Val: Ext.getVectorOperandType())->getElementCount();
192	Type *DestTy = Ext.getType();
193	unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
194	bool IsBigEndian = DL.isBigEndian();
195
196	// If we are casting an integer to vector and extracting a portion, that is
197	// a shift-right and truncate.
198	if (X->getType()->isIntegerTy()) {
199	assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
200	"Expected fixed vector type for bitcast from scalar integer");
201
202	// Big endian requires adjusting the extract index since MSB is at index 0.
203	// LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
204	// BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
205	if (IsBigEndian)
206	ExtIndexC = NumElts.getKnownMinValue() - `1` - ExtIndexC;
207	unsigned ShiftAmountC = ExtIndexC * DestWidth;
208	if (!ShiftAmountC \|\|
209	(isDesirableIntType(BitWidth: X->getType()->getPrimitiveSizeInBits()) &&
210	Ext.getVectorOperand()->hasOneUse())) {
211	if (ShiftAmountC)
212	X = Builder.CreateLShr(LHS: X, RHS: ShiftAmountC, Name: "extelt.offset");
213	if (DestTy->isFloatingPointTy()) {
214	Type *DstIntTy = IntegerType::getIntNTy(C&: X->getContext(), N: DestWidth);
215	Value *Trunc = Builder.CreateTrunc(V: X, DestTy: DstIntTy);
216	return new BitCastInst (Trunc, DestTy);
217	}
218	return new TruncInst (X, DestTy);
219	}
220	}
221
222	if (!X->getType()->isVectorTy())
223	return nullptr;
224
225	// If this extractelement is using a bitcast from a vector of the same number
226	// of elements, see if we can find the source element from the source vector:
227	// extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
228	auto *SrcTy = cast<VectorType>(Val: X->getType());
229	ElementCount NumSrcElts = SrcTy->getElementCount();
230	if (NumSrcElts == NumElts)
231	if (Value *Elt = findScalarElement(V: X, EltNo: ExtIndexC))
232	return new BitCastInst (Elt, DestTy);
233
234	assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
235	"Src and Dst must be the same sort of vector type");
236
237	// If the source elements are wider than the destination, try to shift and
238	// truncate a subset of scalar bits of an insert op.
239	if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
240	Value *Scalar;
241	Value *Vec;
242	uint64_t InsIndexC;
243	if (!match(V: X, P: m_InsertElt(Val: m_Value(V&: Vec), Elt: m_Value(V&: Scalar),
244	Idx: m_ConstantInt(V&: InsIndexC))))
245	return nullptr;
246
247	// The extract must be from the subset of vector elements that we inserted
248	// into. Example: if we inserted element 1 of a <2 x i64> and we are
249	// extracting an i16 (narrowing ratio = 4), then this extract must be from 1
250	// of elements 4-7 of the bitcasted vector.
251	unsigned NarrowingRatio =
252	NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
253
254	if (ExtIndexC / NarrowingRatio != InsIndexC) {
255	// Remove insertelement, if we don't use the inserted element.
256	// extractelement (bitcast (insertelement (Vec, b)), a) ->
257	// extractelement (bitcast (Vec), a)
258	// FIXME: this should be removed to SimplifyDemandedVectorElts,
259	// once scale vectors are supported.
260	if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
261	Value *NewBC = Builder.CreateBitCast(V: Vec, DestTy: Ext.getVectorOperandType());
262	return ExtractElementInst::Create(Vec: NewBC, Idx: Ext.getIndexOperand());
263	}
264	return nullptr;
265	}
266
267	// We are extracting part of the original scalar. How that scalar is
268	// inserted into the vector depends on the endian-ness. Example:
269	// Vector Byte Elt Index: 0 1 2 3 4 5 6 7
270	// +--+--+--+--+--+--+--+--+
271	// inselt <2 x i32> V, <i32> S, 1: \|V0\|V1\|V2\|V3\|S0\|S1\|S2\|S3\|
272	// extelt <4 x i16> V', 3: \| \|S2\|S3\|
273	// +--+--+--+--+--+--+--+--+
274	// If this is little-endian, S2\|S3 are the MSB of the 32-bit 'S' value.
275	// If this is big-endian, S2\|S3 are the LSB of the 32-bit 'S' value.
276	// In this example, we must right-shift little-endian. Big-endian is just a
277	// truncate.
278	unsigned Chunk = ExtIndexC % NarrowingRatio;
279	if (IsBigEndian)
280	Chunk = NarrowingRatio - `1` - Chunk;
281
282	// Bail out if this is an FP vector to FP vector sequence. That would take
283	// more instructions than we started with unless there is no shift, and it
284	// may not be handled as well in the backend.
285	bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
286	bool NeedDestBitcast = DestTy->isFloatingPointTy();
287	if (NeedSrcBitcast && NeedDestBitcast)
288	return nullptr;
289
290	unsigned SrcWidth = SrcTy->getScalarSizeInBits();
291	unsigned ShAmt = Chunk * DestWidth;
292
293	// TODO: This limitation is more strict than necessary. We could sum the
294	// number of new instructions and subtract the number eliminated to know if
295	// we can proceed.
296	if (!X->hasOneUse() \|\| !Ext.getVectorOperand()->hasOneUse())
297	if (NeedSrcBitcast \|\| NeedDestBitcast)
298	return nullptr;
299
300	if (NeedSrcBitcast) {
301	Type *SrcIntTy = IntegerType::getIntNTy(C&: Scalar->getContext(), N: SrcWidth);
302	Scalar = Builder.CreateBitCast(V: Scalar, DestTy: SrcIntTy);
303	}
304
305	if (ShAmt) {
306	// Bail out if we could end with more instructions than we started with.
307	if (!Ext.getVectorOperand()->hasOneUse())
308	return nullptr;
309	Scalar = Builder.CreateLShr(LHS: Scalar, RHS: ShAmt);
310	}
311
312	if (NeedDestBitcast) {
313	Type *DestIntTy = IntegerType::getIntNTy(C&: Scalar->getContext(), N: DestWidth);
314	return new BitCastInst (Builder.CreateTrunc(V: Scalar, DestTy: DestIntTy), DestTy);
315	}
316	return new TruncInst (Scalar, DestTy);
317	}
318
319	return nullptr;
320	}
321
322	/// Find elements of V demanded by UserInstr.
323	static APInt findDemandedEltsBySingleUser(Value V, Instruction UserInstr) {
324	unsigned VWidth = cast<FixedVectorType>(Val: V->getType())->getNumElements();
325
326	// Conservatively assume that all elements are needed.
327	APInt UsedElts(APInt::getAllOnes(numBits: VWidth));
328
329	switch (UserInstr->getOpcode()) {
330	case Instruction::ExtractElement: {
331	ExtractElementInst *EEI = cast<ExtractElementInst>(Val: UserInstr);
332	assert(EEI->getVectorOperand() == V);
333	ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(Val: EEI->getIndexOperand());
334	if (EEIIndexC && EEIIndexC->getValue().ult(RHS: VWidth)) {
335	UsedElts = APInt::getOneBitSet(numBits: VWidth, BitNo: EEIIndexC->getZExtValue());
336	}
337	break;
338	}
339	case Instruction::ShuffleVector: {
340	ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(Val: UserInstr);
341	unsigned MaskNumElts =
342	cast<FixedVectorType>(Val: UserInstr->getType())->getNumElements();
343
344	UsedElts = APInt (VWidth, `0`);
345	for (unsigned i = `0`; i < MaskNumElts; i++) {
346	unsigned MaskVal = Shuffle->getMaskValue(Elt: i);
347	if (MaskVal == -`1u` \|\| MaskVal >= `2` * VWidth)
348	continue;
349	if (Shuffle->getOperand(i_nocapture: `0`) == V && (MaskVal < VWidth))
350	UsedElts.setBit(MaskVal);
351	if (Shuffle->getOperand(i_nocapture: `1`) == V &&
352	((MaskVal >= VWidth) && (MaskVal < `2` * VWidth)))
353	UsedElts.setBit(MaskVal - VWidth);
354	}
355	break;
356	}
357	default:
358	break;
359	}
360	return UsedElts;
361	}
362
363	/// Find union of elements of V demanded by all its users.
364	/// If it is known by querying findDemandedEltsBySingleUser that
365	/// no user demands an element of V, then the corresponding bit
366	/// remains unset in the returned value.
367	static APInt findDemandedEltsByAllUsers(Value *V) {
368	unsigned VWidth = cast<FixedVectorType>(Val: V->getType())->getNumElements();
369
370	APInt UnionUsedElts(VWidth, `0`);
371	for (const Use &U : V->uses()) {
372	if (Instruction *I = dyn_cast<Instruction>(Val: U.getUser())) {
373	UnionUsedElts \|= findDemandedEltsBySingleUser(V, UserInstr: I);
374	} else {
375	UnionUsedElts = APInt::getAllOnes(numBits: VWidth);
376	break;
377	}
378
379	if (UnionUsedElts.isAllOnes())
380	break;
381	}
382
383	return UnionUsedElts;
384	}
385
386	/// Given a constant index for a extractelement or insertelement instruction,
387	/// return it with the canonical type if it isn't already canonical. We
388	/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
389	/// matter, we just want a consistent type to simplify CSE.
390	static ConstantInt getPreferredVectorIndex(ConstantInt IndexC) {
391	const unsigned IndexBW = IndexC->getBitWidth();
392	if (IndexBW == `64` \|\| IndexC->getValue().getActiveBits() > `64`)
393	return nullptr;
394	return ConstantInt::get(Context&: IndexC->getContext(),
395	V: IndexC->getValue().zextOrTrunc(width: `64`));
396	}
397
398	Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
399	Value *SrcVec = EI.getVectorOperand();
400	Value *Index = EI.getIndexOperand();
401	if (Value *V = simplifyExtractElementInst(Vec: SrcVec, Idx: Index,
402	Q: SQ.getWithInstruction(I: &EI)))
403	return replaceInstUsesWith(I&: EI, V);
404
405	// extractelt (select %x, %vec1, %vec2), %const ->
406	// select %x, %vec1[%const], %vec2[%const]
407	// TODO: Support constant folding of multiple select operands:
408	// extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)
409	// If the extractelement will for instance try to do out of bounds accesses
410	// because of the values of %c1 and/or %c2, the sequence could be optimized
411	// early. This is currently not possible because constant folding will reach
412	// an unreachable assertion if it doesn't find a constant operand.
413	if (SelectInst *SI = dyn_cast<SelectInst>(Val: EI.getVectorOperand()))
414	if (SI->getCondition()->getType()->isIntegerTy() &&
415	isa<Constant>(Val: EI.getIndexOperand()))
416	if (Instruction *R = FoldOpIntoSelect(Op&: EI, SI))
417	return R;
418
419	// If extracting a specified index from the vector, see if we can recursively
420	// find a previously computed scalar that was inserted into the vector.
421	auto *IndexC = dyn_cast<ConstantInt>(Val: Index);
422	bool HasKnownValidIndex = false;
423	if (IndexC) {
424	// Canonicalize type of constant indices to i64 to simplify CSE
425	if (auto *NewIdx = getPreferredVectorIndex(IndexC))
426	return replaceOperand(I&: EI, OpNum: `1`, V: NewIdx);
427
428	ElementCount EC = EI.getVectorOperandType()->getElementCount();
429	unsigned NumElts = EC.getKnownMinValue();
430	HasKnownValidIndex = IndexC->getValue().ult(RHS: NumElts);
431
432	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: SrcVec)) {
433	Intrinsic::ID IID = II->getIntrinsicID();
434	// Index needs to be lower than the minimum size of the vector, because
435	// for scalable vector, the vector size is known at run time.
436	if (IID == Intrinsic::experimental_stepvector &&
437	IndexC->getValue().ult(RHS: NumElts)) {
438	Type *Ty = EI.getType();
439	unsigned BitWidth = Ty->getIntegerBitWidth();
440	Value *Idx;
441	// Return index when its value does not exceed the allowed limit
442	// for the element type of the vector, otherwise return undefined.
443	if (IndexC->getValue().getActiveBits() <= BitWidth)
444	Idx = ConstantInt::get(Ty, V: IndexC->getValue().zextOrTrunc(width: BitWidth));
445	else
446	Idx = PoisonValue::get(T: Ty);
447	return replaceInstUsesWith(I&: EI, V: Idx);
448	}
449	}
450
451	// InstSimplify should handle cases where the index is invalid.
452	// For fixed-length vector, it's invalid to extract out-of-range element.
453	if (!EC.isScalable() && IndexC->getValue().uge(RHS: NumElts))
454	return nullptr;
455
456	if (Instruction *I = foldBitcastExtElt(Ext&: EI))
457	return I;
458
459	// If there's a vector PHI feeding a scalar use through this extractelement
460	// instruction, try to scalarize the PHI.
461	if (auto *Phi = dyn_cast<PHINode>(Val: SrcVec))
462	if (Instruction *ScalarPHI = scalarizePHI(EI, PN: Phi))
463	return ScalarPHI;
464	}
465
466	// TODO come up with a n-ary matcher that subsumes both unary and
467	// binary matchers.
468	UnaryOperator *UO;
469	if (match(V: SrcVec, P: m_UnOp(I&: UO)) && cheapToScalarize(V: SrcVec, EI: Index)) {
470	// extelt (unop X), Index --> unop (extelt X, Index)
471	Value *X = UO->getOperand(i_nocapture: `0`);
472	Value *E = Builder.CreateExtractElement(Vec: X, Idx: Index);
473	return UnaryOperator::CreateWithCopiedFlags(Opc: UO->getOpcode(), V: E, CopyO: UO);
474	}
475
476	// If the binop is not speculatable, we cannot hoist the extractelement if
477	// it may make the operand poison.
478	BinaryOperator *BO;
479	if (match(V: SrcVec, P: m_BinOp(I&: BO)) && cheapToScalarize(V: SrcVec, EI: Index) &&
480	(HasKnownValidIndex \|\| isSafeToSpeculativelyExecute(I: BO))) {
481	// extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
482	Value X = BO->getOperand(i_nocapture: `0`), Y = BO->getOperand(i_nocapture: `1`);
483	Value *E0 = Builder.CreateExtractElement(Vec: X, Idx: Index);
484	Value *E1 = Builder.CreateExtractElement(Vec: Y, Idx: Index);
485	return BinaryOperator::CreateWithCopiedFlags(Opc: BO->getOpcode(), V1: E0, V2: E1, CopyO: BO);
486	}
487
488	Value X, Y;
489	CmpInst::Predicate Pred;
490	if (match(V: SrcVec, P: m_Cmp(Pred, L: m_Value(V&: X), R: m_Value(V&: Y))) &&
491	cheapToScalarize(V: SrcVec, EI: Index)) {
492	// extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
493	Value *E0 = Builder.CreateExtractElement(Vec: X, Idx: Index);
494	Value *E1 = Builder.CreateExtractElement(Vec: Y, Idx: Index);
495	CmpInst *SrcCmpInst = cast<CmpInst>(Val: SrcVec);
496	return CmpInst::CreateWithCopiedFlags(Op: SrcCmpInst->getOpcode(), Pred, S1: E0, S2: E1,
497	FlagsSource: SrcCmpInst);
498	}
499
500	if (auto *I = dyn_cast<Instruction>(Val: SrcVec)) {
501	if (auto *IE = dyn_cast<InsertElementInst>(Val: I)) {
502	// instsimplify already handled the case where the indices are constants
503	// and equal by value, if both are constants, they must not be the same
504	// value, extract from the pre-inserted value instead.
505	if (isa<Constant>(Val: IE->getOperand(i_nocapture: `2`)) && IndexC)
506	return replaceOperand(I&: EI, OpNum: `0`, V: IE->getOperand(i_nocapture: `0`));
507	} else if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: I)) {
508	auto *VecType = cast<VectorType>(Val: GEP->getType());
509	ElementCount EC = VecType->getElementCount();
510	uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : `0`;
511	if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
512	// Find out why we have a vector result - these are a few examples:
513	// 1. We have a scalar pointer and a vector of indices, or
514	// 2. We have a vector of pointers and a scalar index, or
515	// 3. We have a vector of pointers and a vector of indices, etc.
516	// Here we only consider combining when there is exactly one vector
517	// operand, since the optimization is less obviously a win due to
518	// needing more than one extractelements.
519
520	unsigned VectorOps =
521	llvm::count_if(Range: GEP->operands(), P: [](const Value *V) {
522	return isa<VectorType>(Val: V->getType());
523	});
524	if (VectorOps == `1`) {
525	Value *NewPtr = GEP->getPointerOperand();
526	if (isa<VectorType>(Val: NewPtr->getType()))
527	NewPtr = Builder.CreateExtractElement(Vec: NewPtr, Idx: IndexC);
528
529	SmallVector<Value *> NewOps;
530	for (unsigned I = `1`; I != GEP->getNumOperands(); ++I) {
531	Value *Op = GEP->getOperand(i_nocapture: I);
532	if (isa<VectorType>(Val: Op->getType()))
533	NewOps.push_back(Elt: Builder.CreateExtractElement(Vec: Op, Idx: IndexC));
534	else
535	NewOps.push_back(Elt: Op);
536	}
537
538	GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
539	PointeeType: GEP->getSourceElementType(), Ptr: NewPtr, IdxList: NewOps);
540	NewGEP->setIsInBounds(GEP->isInBounds());
541	return NewGEP;
542	}
543	}
544	} else if (auto *SVI = dyn_cast<ShuffleVectorInst>(Val: I)) {
545	// If this is extracting an element from a shufflevector, figure out where
546	// it came from and extract from the appropriate input element instead.
547	// Restrict the following transformation to fixed-length vector.
548	if (isa<FixedVectorType>(Val: SVI->getType()) && isa<ConstantInt>(Val: Index)) {
549	int SrcIdx =
550	SVI->getMaskValue(Elt: cast<ConstantInt>(Val: Index)->getZExtValue());
551	Value *Src;
552	unsigned LHSWidth = cast<FixedVectorType>(Val: SVI->getOperand(i_nocapture: `0`)->getType())
553	->getNumElements();
554
555	if (SrcIdx < `0`)
556	return replaceInstUsesWith(I&: EI, V: PoisonValue::get(T: EI.getType()));
557	if (SrcIdx < (int)LHSWidth)
558	Src = SVI->getOperand(i_nocapture: `0`);
559	else {
560	SrcIdx -= LHSWidth;
561	Src = SVI->getOperand(i_nocapture: `1`);
562	}
563	Type *Int64Ty = Type::getInt64Ty(C&: EI.getContext());
564	return ExtractElementInst::Create(
565	Vec: Src, Idx: ConstantInt::get(Ty: Int64Ty, V: SrcIdx, IsSigned: false));
566	}
567	} else if (auto *CI = dyn_cast<CastInst>(Val: I)) {
568	// Canonicalize extractelement(cast) -> cast(extractelement).
569	// Bitcasts can change the number of vector elements, and they cost
570	// nothing.
571	if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
572	Value *EE = Builder.CreateExtractElement(Vec: CI->getOperand(i_nocapture: `0`), Idx: Index);
573	return CastInst::Create(CI->getOpcode(), S: EE, Ty: EI.getType());
574	}
575	}
576	}
577
578	// Run demanded elements after other transforms as this can drop flags on
579	// binops. If there's two paths to the same final result, we prefer the
580	// one which doesn't force us to drop flags.
581	if (IndexC) {
582	ElementCount EC = EI.getVectorOperandType()->getElementCount();
583	unsigned NumElts = EC.getKnownMinValue();
584	// This instruction only demands the single element from the input vector.
585	// Skip for scalable type, the number of elements is unknown at
586	// compile-time.
587	if (!EC.isScalable() && NumElts != `1`) {
588	// If the input vector has a single use, simplify it based on this use
589	// property.
590	if (SrcVec->hasOneUse()) {
591	APInt PoisonElts(NumElts, `0`);
592	APInt DemandedElts(NumElts, `0`);
593	DemandedElts.setBit(IndexC->getZExtValue());
594	if (Value *V =
595	SimplifyDemandedVectorElts(V: SrcVec, DemandedElts, PoisonElts))
596	return replaceOperand(I&: EI, OpNum: `0`, V);
597	} else {
598	// If the input vector has multiple uses, simplify it based on a union
599	// of all elements used.
600	APInt DemandedElts = findDemandedEltsByAllUsers(V: SrcVec);
601	if (!DemandedElts.isAllOnes()) {
602	APInt PoisonElts(NumElts, `0`);
603	if (Value *V = SimplifyDemandedVectorElts(
604	V: SrcVec, DemandedElts, PoisonElts, Depth: `0` / Depth /,
605	AllowMultipleUsers: true / AllowMultipleUsers /)) {
606	if (V != SrcVec) {
607	Worklist.addValue(V: SrcVec);
608	SrcVec->replaceAllUsesWith(V);
609	return &EI;
610	}
611	}
612	}
613	}
614	}
615	}
616	return nullptr;
617	}
618
619	/// If V is a shuffle of values that ONLY returns elements from either LHS or
620	/// RHS, return the shuffle mask and true. Otherwise, return false.
621	static bool collectSingleShuffleElements(Value V, Value LHS, Value *RHS,
622	SmallVectorImpl<int> &Mask) {
623	assert(LHS->getType() == RHS->getType() &&
624	"Invalid CollectSingleShuffleElements");
625	unsigned NumElts = cast<FixedVectorType>(Val: V->getType())->getNumElements();
626
627	if (match(V, P: m_Poison())) {
628	Mask.assign(NumElts, Elt: -`1`);
629	return true;
630	}
631
632	if (V == LHS) {
633	for (unsigned i = `0`; i != NumElts; ++i)
634	Mask.push_back(Elt: i);
635	return true;
636	}
637
638	if (V == RHS) {
639	for (unsigned i = `0`; i != NumElts; ++i)
640	Mask.push_back(Elt: i + NumElts);
641	return true;
642	}
643
644	if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(Val: V)) {
645	// If this is an insert of an extract from some other vector, include it.
646	Value *VecOp = IEI->getOperand(i_nocapture: `0`);
647	Value *ScalarOp = IEI->getOperand(i_nocapture: `1`);
648	Value *IdxOp = IEI->getOperand(i_nocapture: `2`);
649
650	if (!isa<ConstantInt>(Val: IdxOp))
651	return false;
652	unsigned InsertedIdx = cast<ConstantInt>(Val: IdxOp)->getZExtValue();
653
654	if (isa<PoisonValue>(Val: ScalarOp)) { // inserting poison into vector.
655	// We can handle this if the vector we are inserting into is
656	// transitively ok.
657	if (collectSingleShuffleElements(V: VecOp, LHS, RHS, Mask)) {
658	// If so, update the mask to reflect the inserted poison.
659	Mask [InsertedIdx] = -`1`;
660	return true;
661	}
662	} else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(Val: ScalarOp)){
663	if (isa<ConstantInt>(Val: EI->getOperand(i_nocapture: `1`))) {
664	unsigned ExtractedIdx =
665	cast<ConstantInt>(Val: EI->getOperand(i_nocapture: `1`))->getZExtValue();
666	unsigned NumLHSElts =
667	cast<FixedVectorType>(Val: LHS->getType())->getNumElements();
668
669	// This must be extracting from either LHS or RHS.
670	if (EI->getOperand(i_nocapture: `0`) == LHS \|\| EI->getOperand(i_nocapture: `0`) == RHS) {
671	// We can handle this if the vector we are inserting into is
672	// transitively ok.
673	if (collectSingleShuffleElements(V: VecOp, LHS, RHS, Mask)) {
674	// If so, update the mask to reflect the inserted value.
675	if (EI->getOperand(i_nocapture: `0`) == LHS) {
676	Mask [InsertedIdx % NumElts] = ExtractedIdx;
677	} else {
678	assert(EI->getOperand(`0`) == RHS);
679	Mask [InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
680	}
681	return true;
682	}
683	}
684	}
685	}
686	}
687
688	return false;
689	}
690
691	/// If we have insertion into a vector that is wider than the vector that we
692	/// are extracting from, try to widen the source vector to allow a single
693	/// shufflevector to replace one or more insert/extract pairs.
694	static bool replaceExtractElements(InsertElementInst *InsElt,
695	ExtractElementInst *ExtElt,
696	InstCombinerImpl &IC) {
697	auto *InsVecType = cast<FixedVectorType>(Val: InsElt->getType());
698	auto *ExtVecType = cast<FixedVectorType>(Val: ExtElt->getVectorOperandType());
699	unsigned NumInsElts = InsVecType->getNumElements();
700	unsigned NumExtElts = ExtVecType->getNumElements();
701
702	// The inserted-to vector must be wider than the extracted-from vector.
703	if (InsVecType->getElementType() != ExtVecType->getElementType() \|\|
704	NumExtElts >= NumInsElts)
705	return false;
706
707	// Create a shuffle mask to widen the extended-from vector using poison
708	// values. The mask selects all of the values of the original vector followed
709	// by as many poison values as needed to create a vector of the same length
710	// as the inserted-to vector.
711	SmallVector<int, `16`> ExtendMask;
712	for (unsigned i = `0`; i < NumExtElts; ++i)
713	ExtendMask.push_back(Elt: i);
714	for (unsigned i = NumExtElts; i < NumInsElts; ++i)
715	ExtendMask.push_back(Elt: -`1`);
716
717	Value *ExtVecOp = ExtElt->getVectorOperand();
718	auto *ExtVecOpInst = dyn_cast<Instruction>(Val: ExtVecOp);
719	BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(Val: ExtVecOpInst))
720	? ExtVecOpInst->getParent()
721	: ExtElt->getParent();
722
723	// TODO: This restriction matches the basic block check below when creating
724	// new extractelement instructions. If that limitation is removed, this one
725	// could also be removed. But for now, we just bail out to ensure that we
726	// will replace the extractelement instruction that is feeding our
727	// insertelement instruction. This allows the insertelement to then be
728	// replaced by a shufflevector. If the insertelement is not replaced, we can
729	// induce infinite looping because there's an optimization for extractelement
730	// that will delete our widening shuffle. This would trigger another attempt
731	// here to create that shuffle, and we spin forever.
732	if (InsertionBlock != InsElt->getParent())
733	return false;
734
735	// TODO: This restriction matches the check in visitInsertElementInst() and
736	// prevents an infinite loop caused by not turning the extract/insert pair
737	// into a shuffle. We really should not need either check, but we're lacking
738	// folds for shufflevectors because we're afraid to generate shuffle masks
739	// that the backend can't handle.
740	if (InsElt->hasOneUse() && isa<InsertElementInst>(Val: InsElt->user_back()))
741	return false;
742
743	auto WideVec = new* ShuffleVectorInst (ExtVecOp, ExtendMask);
744
745	// Insert the new shuffle after the vector operand of the extract is defined
746	// (as long as it's not a PHI) or at the start of the basic block of the
747	// extract, so any subsequent extracts in the same basic block can use it.
748	// TODO: Insert before the earliest ExtractElementInst that is replaced.
749	if (ExtVecOpInst && !isa<PHINode>(Val: ExtVecOpInst))
750	WideVec->insertAfter(InsertPos: ExtVecOpInst);
751	else
752	IC.InsertNewInstWith(New: WideVec, Old: ExtElt->getParent()->getFirstInsertionPt());
753
754	// Replace extracts from the original narrow vector with extracts from the new
755	// wide vector.
756	for (User *U : ExtVecOp->users()) {
757	ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(Val: U);
758	if (!OldExt \|\| OldExt->getParent() != WideVec->getParent())
759	continue;
760	auto *NewExt = ExtractElementInst::Create(Vec: WideVec, Idx: OldExt->getOperand(i_nocapture: `1`));
761	IC.InsertNewInstWith(New: NewExt, Old: OldExt->getIterator());
762	IC.replaceInstUsesWith(I&: *OldExt, V: NewExt);
763	// Add the old extracts to the worklist for DCE. We can't remove the
764	// extracts directly, because they may still be used by the calling code.
765	IC.addToWorklist(I: OldExt);
766	}
767
768	return true;
769	}
770
771	/// We are building a shuffle to create V, which is a sequence of insertelement,
772	/// extractelement pairs. If PermittedRHS is set, then we must either use it or
773	/// not rely on the second vector source. Return a std::pair containing the
774	/// left and right vectors of the proposed shuffle (or 0), and set the Mask
775	/// parameter as required.
776	///
777	/// Note: we intentionally don't try to fold earlier shuffles since they have
778	/// often been chosen carefully to be efficiently implementable on the target.
779	using ShuffleOps = std::pair<Value , Value >;
780
781	static ShuffleOps collectShuffleElements(Value V, SmallVectorImpl<int*> &Mask,
782	Value *PermittedRHS,
783	InstCombinerImpl &IC, bool &Rerun) {
784	assert(V->getType()->isVectorTy() && "Invalid shuffle!");
785	unsigned NumElts = cast<FixedVectorType>(Val: V->getType())->getNumElements();
786
787	if (match(V, P: m_Poison())) {
788	Mask.assign(NumElts, Elt: -`1`);
789	return std::make_pair(
790	x: PermittedRHS ? PoisonValue::get(T: PermittedRHS->getType()) : V, y: nullptr);
791	}
792
793	if (isa<ConstantAggregateZero>(Val: V)) {
794	Mask.assign(NumElts, Elt: `0`);
795	return std::make_pair(x&: V, y: nullptr);
796	}
797
798	if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(Val: V)) {
799	// If this is an insert of an extract from some other vector, include it.
800	Value *VecOp = IEI->getOperand(i_nocapture: `0`);
801	Value *ScalarOp = IEI->getOperand(i_nocapture: `1`);
802	Value *IdxOp = IEI->getOperand(i_nocapture: `2`);
803
804	if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(Val: ScalarOp)) {
805	if (isa<ConstantInt>(Val: EI->getOperand(i_nocapture: `1`)) && isa<ConstantInt>(Val: IdxOp)) {
806	unsigned ExtractedIdx =
807	cast<ConstantInt>(Val: EI->getOperand(i_nocapture: `1`))->getZExtValue();
808	unsigned InsertedIdx = cast<ConstantInt>(Val: IdxOp)->getZExtValue();
809
810	// Either the extracted from or inserted into vector must be RHSVec,
811	// otherwise we'd end up with a shuffle of three inputs.
812	if (EI->getOperand(i_nocapture: `0`) == PermittedRHS \|\| PermittedRHS == nullptr) {
813	Value *RHS = EI->getOperand(i_nocapture: `0`);
814	ShuffleOps LR = collectShuffleElements(V: VecOp, Mask, PermittedRHS: RHS, IC, Rerun);
815	assert(LR.second == nullptr \|\| LR.second == RHS);
816
817	if (LR.first->getType() != RHS->getType()) {
818	// Although we are giving up for now, see if we can create extracts
819	// that match the inserts for another round of combining.
820	if (replaceExtractElements(InsElt: IEI, ExtElt: EI, IC))
821	Rerun = true;
822
823	// We tried our best, but we can't find anything compatible with RHS
824	// further up the chain. Return a trivial shuffle.
825	for (unsigned i = `0`; i < NumElts; ++i)
826	Mask [i] = i;
827	return std::make_pair(x&: V, y: nullptr);
828	}
829
830	unsigned NumLHSElts =
831	cast<FixedVectorType>(Val: RHS->getType())->getNumElements();
832	Mask [InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
833	return std::make_pair(x&: LR.first, y&: RHS);
834	}
835
836	if (VecOp == PermittedRHS) {
837	// We've gone as far as we can: anything on the other side of the
838	// extractelement will already have been converted into a shuffle.
839	unsigned NumLHSElts =
840	cast<FixedVectorType>(Val: EI->getOperand(i_nocapture: `0`)->getType())
841	->getNumElements();
842	for (unsigned i = `0`; i != NumElts; ++i)
843	Mask.push_back(Elt: i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
844	return std::make_pair(x: EI->getOperand(i_nocapture: `0`), y&: PermittedRHS);
845	}
846
847	// If this insertelement is a chain that comes from exactly these two
848	// vectors, return the vector and the effective shuffle.
849	if (EI->getOperand(i_nocapture: `0`)->getType() == PermittedRHS->getType() &&
850	collectSingleShuffleElements(V: IEI, LHS: EI->getOperand(i_nocapture: `0`), RHS: PermittedRHS,
851	Mask))
852	return std::make_pair(x: EI->getOperand(i_nocapture: `0`), y&: PermittedRHS);
853	}
854	}
855	}
856
857	// Otherwise, we can't do anything fancy. Return an identity vector.
858	for (unsigned i = `0`; i != NumElts; ++i)
859	Mask.push_back(Elt: i);
860	return std::make_pair(x&: V, y: nullptr);
861	}
862
863	/// Look for chain of insertvalue's that fully define an aggregate, and trace
864	/// back the values inserted, see if they are all were extractvalue'd from
865	/// the same source aggregate from the exact same element indexes.
866	/// If they were, just reuse the source aggregate.
867	/// This potentially deals with PHI indirections.
868	Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
869	InsertValueInst &OrigIVI) {
870	Type *AggTy = OrigIVI.getType();
871	unsigned NumAggElts;
872	switch (AggTy->getTypeID()) {
873	case Type::StructTyID:
874	NumAggElts = AggTy->getStructNumElements();
875	break;
876	case Type::ArrayTyID:
877	NumAggElts = AggTy->getArrayNumElements();
878	break;
879	default:
880	llvm_unreachable("Unhandled aggregate type?");
881	}
882
883	// Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
884	// to handle clang C++ exception struct (which is hardcoded as {i8, i32}),*
885	// FIXME: any interesting patterns to be caught with larger limit?
886	assert(NumAggElts > `0` && "Aggregate should have elements.");
887	if (NumAggElts > `2`)
888	return nullptr;
889
890	static constexpr auto NotFound = std::nullopt;
891	static constexpr auto FoundMismatch = nullptr;
892
893	// Try to find a value of each element of an aggregate.
894	// FIXME: deal with more complex, not one-dimensional, aggregate types
895	SmallVector<std::optional<Instruction *>, `2`> AggElts(NumAggElts, NotFound);
896
897	// Do we know values for each element of the aggregate?
898	auto KnowAllElts = [&AggElts]() {
899	return !llvm::is_contained(Range&: AggElts, Element: NotFound);
900	};
901
902	int Depth = `0`;
903
904	// Arbitrary `insertvalue` visitation depth limit. Let's be okay with
905	// every element being overwritten twice, which should never happen.
906	static const int DepthLimit = `2` * NumAggElts;
907
908	// Recurse up the chain of `insertvalue` aggregate operands until either we've
909	// reconstructed full initializer or can't visit any more `insertvalue`'s.
910	for (InsertValueInst *CurrIVI = &OrigIVI;
911	Depth < DepthLimit && CurrIVI && !KnowAllElts ();
912	CurrIVI = dyn_cast<InsertValueInst>(Val: CurrIVI->getAggregateOperand()),
913	++Depth) {
914	auto *InsertedValue =
915	dyn_cast<Instruction>(Val: CurrIVI->getInsertedValueOperand());
916	if (!InsertedValue)
917	return nullptr; // Inserted value must be produced by an instruction.
918
919	ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
920
921	// Don't bother with more than single-level aggregates.
922	if (Indices.size() != `1`)
923	return nullptr; // FIXME: deal with more complex aggregates?
924
925	// Now, we may have already previously recorded the value for this element
926	// of an aggregate. If we did, that means the CurrIVI will later be
927	// overwritten with the already-recorded value. But if not, let's record it!
928	std::optional<Instruction *> &Elt = AggElts [Indices.front()];
929	Elt = Elt.value_or(u&: InsertedValue);
930
931	// FIXME: should we handle chain-terminating undef base operand?
932	}
933
934	// Was that sufficient to deduce the full initializer for the aggregate?
935	if (!KnowAllElts ())
936	return nullptr; // Give up then.
937
938	// We now want to find the source[s] of the aggregate elements we've found.
939	// And with "source" we mean the original aggregate[s] from which
940	// the inserted elements were extracted. This may require PHI translation.
941
942	enum class AggregateDescription {
943	/// When analyzing the value that was inserted into an aggregate, we did
944	/// not manage to find defining `extractvalue` instruction to analyze.
945	NotFound,
946	/// When analyzing the value that was inserted into an aggregate, we did
947	/// manage to find defining `extractvalue` instruction[s], and everything
948	/// matched perfectly - aggregate type, element insertion/extraction index.
949	Found,
950	/// When analyzing the value that was inserted into an aggregate, we did
951	/// manage to find defining `extractvalue` instruction, but there was
952	/// a mismatch: either the source type from which the extraction was didn't
953	/// match the aggregate type into which the insertion was,
954	/// or the extraction/insertion channels mismatched,
955	/// or different elements had different source aggregates.
956	FoundMismatch
957	};
958	auto Describe = [](std::optional<Value *> SourceAggregate) {
959	if (SourceAggregate == NotFound)
960	return AggregateDescription::NotFound;
961	if (*SourceAggregate == FoundMismatch)
962	return AggregateDescription::FoundMismatch;
963	return AggregateDescription::Found;
964	};
965
966	// Given the value \p Elt that was being inserted into element \p EltIdx of an
967	// aggregate AggTy, see if \p Elt was originally defined by an
968	// appropriate extractvalue (same element index, same aggregate type).
969	// If found, return the source aggregate from which the extraction was.
970	// If \p PredBB is provided, does PHI translation of an \p Elt first.
971	auto FindSourceAggregate =
972	[&](Instruction Elt, unsigned* EltIdx, std::optional<BasicBlock *> UseBB,
973	std::optional<BasicBlock > PredBB) -> std::optional<Value > {
974	// For now(?), only deal with, at most, a single level of PHI indirection.
975	if (UseBB && PredBB)
976	Elt = dyn_cast<Instruction>(Val: Elt->DoPHITranslation(CurBB: UseBB, PredBB: PredBB));
977	// FIXME: deal with multiple levels of PHI indirection?
978
979	// Did we find an extraction?
980	auto *EVI = dyn_cast_or_null<ExtractValueInst>(Val: Elt);
981	if (!EVI)
982	return NotFound;
983
984	Value *SourceAggregate = EVI->getAggregateOperand();
985
986	// Is the extraction from the same type into which the insertion was?
987	if (SourceAggregate->getType() != AggTy)
988	return FoundMismatch;
989	// And the element index doesn't change between extraction and insertion?
990	if (EVI->getNumIndices() != `1` \|\| EltIdx != EVI->getIndices().front())
991	return FoundMismatch;
992
993	return SourceAggregate; // AggregateDescription::Found
994	};
995
996	// Given elements AggElts that were constructing an aggregate OrigIVI,
997	// see if we can find appropriate source aggregate for each of the elements,
998	// and see it's the same aggregate for each element. If so, return it.
999	auto FindCommonSourceAggregate =
1000	[&](std::optional<BasicBlock *> UseBB,
1001	std::optional<BasicBlock > PredBB) -> std::optional<Value > {
1002	std::optional<Value *> SourceAggregate;
1003
1004	for (auto I : enumerate(First&: AggElts)) {
1005	assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
1006	"We don't store nullptr in SourceAggregate!");
1007	assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
1008	(I.index() != `0`) &&
1009	"SourceAggregate should be valid after the first element,");
1010
1011	// For this element, is there a plausible source aggregate?
1012	// FIXME: we could special-case undef element, IFF we know that in the
1013	// source aggregate said element isn't poison.
1014	std::optional<Value *> SourceAggregateForElement =
1015	FindSourceAggregate (*I.value(), I.index(), UseBB, PredBB);
1016
1017	// Okay, what have we found? Does that correlate with previous findings?
1018
1019	// Regardless of whether or not we have previously found source
1020	// aggregate for previous elements (if any), if we didn't find one for
1021	// this element, passthrough whatever we have just found.
1022	if (Describe (SourceAggregateForElement) != AggregateDescription::Found)
1023	return SourceAggregateForElement;
1024
1025	// Okay, we have found source aggregate for this element.
1026	// Let's see what we already know from previous elements, if any.
1027	switch (Describe (SourceAggregate)) {
1028	case AggregateDescription::NotFound:
1029	// This is apparently the first element that we have examined.
1030	SourceAggregate = SourceAggregateForElement; // Record the aggregate!
1031	continue; // Great, now look at next element.
1032	case AggregateDescription::Found:
1033	// We have previously already successfully examined other elements.
1034	// Is this the same source aggregate we've found for other elements?
1035	if (SourceAggregateForElement != SourceAggregate)
1036	return FoundMismatch;
1037	continue; // Still the same aggregate, look at next element.
1038	case AggregateDescription::FoundMismatch:
1039	llvm_unreachable("Can't happen. We would have early-exited then.");
1040	};
1041	}
1042
1043	assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1044	"Must be a valid Value");
1045	return *SourceAggregate;
1046	};
1047
1048	std::optional<Value *> SourceAggregate;
1049
1050	// Can we find the source aggregate without looking at predecessors?
1051	SourceAggregate = FindCommonSourceAggregate (/UseBB=/std::nullopt,
1052	/PredBB=/std::nullopt);
1053	if (Describe (SourceAggregate) != AggregateDescription::NotFound) {
1054	if (Describe (SourceAggregate) == AggregateDescription::FoundMismatch)
1055	return nullptr; // Conflicting source aggregates!
1056	++NumAggregateReconstructionsSimplified;
1057	return replaceInstUsesWith(I&: OrigIVI, V: *SourceAggregate);
1058	}
1059
1060	// Okay, apparently we need to look at predecessors.
1061
1062	// We should be smart about picking the "use" basic block, which will be the
1063	// merge point for aggregate, where we'll insert the final PHI that will be
1064	// used instead of OrigIVI. Basic block of OrigIVI is not* the right choice.*
1065	// We should look in which blocks each of the AggElts is being defined,
1066	// they all should be defined in the same basic block.
1067	BasicBlock UseBB = nullptr*;
1068
1069	for (const std::optional<Instruction *> &I : AggElts) {
1070	BasicBlock BB = (I)->getParent();
1071	// If it's the first instruction we've encountered, record the basic block.
1072	if (!UseBB) {
1073	UseBB = BB;
1074	continue;
1075	}
1076	// Otherwise, this must be the same basic block we've seen previously.
1077	if (UseBB != BB)
1078	return nullptr;
1079	}
1080
1081	// If all* of the elements are basic-block-independent, meaning they are*
1082	// either function arguments, or constant expressions, then if we didn't
1083	// handle them without predecessor-aware handling, we won't handle them now.
1084	if (!UseBB)
1085	return nullptr;
1086
1087	// If we didn't manage to find source aggregate without looking at
1088	// predecessors, and there are no predecessors to look at, then we're done.
1089	if (pred_empty(BB: UseBB))
1090	return nullptr;
1091
1092	// Arbitrary predecessor count limit.
1093	static const int PredCountLimit = `64`;
1094
1095	// Cache the (non-uniqified!) list of predecessors in a vector,
1096	// checking the limit at the same time for efficiency.
1097	SmallVector<BasicBlock , `4`> Preds; // May have duplicates!*
1098	for (BasicBlock *Pred : predecessors(BB: UseBB)) {
1099	// Don't bother if there are too many predecessors.
1100	if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1101	return nullptr;
1102	Preds.emplace_back(Args&: Pred);
1103	}
1104
1105	// For each predecessor, what is the source aggregate,
1106	// from which all the elements were originally extracted from?
1107	// Note that we want for the map to have stable iteration order!
1108	SmallDenseMap<BasicBlock , Value , `4`> SourceAggregates;
1109	for (BasicBlock *Pred : Preds) {
1110	std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1111	SourceAggregates.insert(KV: {Pred, nullptr});
1112	// Did we already evaluate this predecessor?
1113	if (!IV.second)
1114	continue;
1115
1116	// Let's hope that when coming from predecessor Pred, all elements of the
1117	// aggregate produced by OrigIVI must have been originally extracted from
1118	// the same aggregate. Is that so? Can we find said original aggregate?
1119	SourceAggregate = FindCommonSourceAggregate (UseBB, Pred);
1120	if (Describe (SourceAggregate) != AggregateDescription::Found)
1121	return nullptr; // Give up.
1122	IV.first ->second = *SourceAggregate;
1123	}
1124
1125	// All good! Now we just need to thread the source aggregates here.
1126	// Note that we have to insert the new PHI here, ourselves, because we can't
1127	// rely on InstCombinerImpl::run() inserting it into the right basic block.
1128	// Note that the same block can be a predecessor more than once,
1129	// and we need to preserve that invariant for the PHI node.
1130	BuilderTy::InsertPointGuard Guard(Builder);
1131	Builder.SetInsertPoint(TheBB: UseBB, IP: UseBB->getFirstNonPHIIt());
1132	auto *PHI =
1133	Builder.CreatePHI(Ty: AggTy, NumReservedValues: Preds.size(), Name: OrigIVI.getName() + ".merged");
1134	for (BasicBlock *Pred : Preds)
1135	PHI->addIncoming(V: SourceAggregates [Pred], BB: Pred);
1136
1137	++NumAggregateReconstructionsSimplified;
1138	return replaceInstUsesWith(I&: OrigIVI, V: PHI);
1139	}
1140
1141	/// Try to find redundant insertvalue instructions, like the following ones:
1142	/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1143	/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1144	/// Here the second instruction inserts values at the same indices, as the
1145	/// first one, making the first one redundant.
1146	/// It should be transformed to:
1147	/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1148	Instruction *InstCombinerImpl::visitInsertValueInst(InsertValueInst &I) {
1149	if (Value *V = simplifyInsertValueInst(
1150	Agg: I.getAggregateOperand(), Val: I.getInsertedValueOperand(), Idxs: I.getIndices(),
1151	Q: SQ.getWithInstruction(I: &I)))
1152	return replaceInstUsesWith(I, V);
1153
1154	bool IsRedundant = false;
1155	ArrayRef<unsigned int> FirstIndices = I.getIndices();
1156
1157	// If there is a chain of insertvalue instructions (each of them except the
1158	// last one has only one use and it's another insertvalue insn from this
1159	// chain), check if any of the 'children' uses the same indices as the first
1160	// instruction. In this case, the first one is redundant.
1161	Value *V = &I;
1162	unsigned Depth = `0`;
1163	while (V->hasOneUse() && Depth < `10`) {
1164	User *U = V->user_back();
1165	auto UserInsInst = dyn_cast<InsertValueInst>(Val: U);
1166	if (!UserInsInst \|\| U->getOperand(i: `0`) != V)
1167	break;
1168	if (UserInsInst->getIndices() == FirstIndices) {
1169	IsRedundant = true;
1170	break;
1171	}
1172	V = UserInsInst;
1173	Depth++;
1174	}
1175
1176	if (IsRedundant)
1177	return replaceInstUsesWith(I, V: I.getOperand(i_nocapture: `0`));
1178
1179	if (Instruction *NewI = foldAggregateConstructionIntoAggregateReuse(OrigIVI&: I))
1180	return NewI;
1181
1182	return nullptr;
1183	}
1184
1185	static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
1186	// Can not analyze scalable type, the number of elements is not a compile-time
1187	// constant.
1188	if (isa<ScalableVectorType>(Val: Shuf.getOperand(i_nocapture: `0`)->getType()))
1189	return false;
1190
1191	int MaskSize = Shuf.getShuffleMask().size();
1192	int VecSize =
1193	cast<FixedVectorType>(Val: Shuf.getOperand(i_nocapture: `0`)->getType())->getNumElements();
1194
1195	// A vector select does not change the size of the operands.
1196	if (MaskSize != VecSize)
1197	return false;
1198
1199	// Each mask element must be undefined or choose a vector element from one of
1200	// the source operands without crossing vector lanes.
1201	for (int i = `0`; i != MaskSize; ++i) {
1202	int Elt = Shuf.getMaskValue(Elt: i);
1203	if (Elt != -`1` && Elt != i && Elt != i + VecSize)
1204	return false;
1205	}
1206
1207	return true;
1208	}
1209
1210	/// Turn a chain of inserts that splats a value into an insert + shuffle:
1211	/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1212	/// shufflevector(insertelt(X, %k, 0), poison, zero)
1213	static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
1214	// We are interested in the last insert in a chain. So if this insert has a
1215	// single user and that user is an insert, bail.
1216	if (InsElt.hasOneUse() && isa<InsertElementInst>(Val: InsElt.user_back()))
1217	return nullptr;
1218
1219	VectorType *VecTy = InsElt.getType();
1220	// Can not handle scalable type, the number of elements is not a compile-time
1221	// constant.
1222	if (isa<ScalableVectorType>(Val: VecTy))
1223	return nullptr;
1224	unsigned NumElements = cast<FixedVectorType>(Val: VecTy)->getNumElements();
1225
1226	// Do not try to do this for a one-element vector, since that's a nop,
1227	// and will cause an inf-loop.
1228	if (NumElements == `1`)
1229	return nullptr;
1230
1231	Value *SplatVal = InsElt.getOperand(i_nocapture: `1`);
1232	InsertElementInst *CurrIE = &InsElt;
1233	SmallBitVector ElementPresent(NumElements, false);
1234	InsertElementInst FirstIE = nullptr*;
1235
1236	// Walk the chain backwards, keeping track of which indices we inserted into,
1237	// until we hit something that isn't an insert of the splatted value.
1238	while (CurrIE) {
1239	auto *Idx = dyn_cast<ConstantInt>(Val: CurrIE->getOperand(i_nocapture: `2`));
1240	if (!Idx \|\| CurrIE->getOperand(i_nocapture: `1`) != SplatVal)
1241	return nullptr;
1242
1243	auto *NextIE = dyn_cast<InsertElementInst>(Val: CurrIE->getOperand(i_nocapture: `0`));
1244	// Check none of the intermediate steps have any additional uses, except
1245	// for the root insertelement instruction, which can be re-used, if it
1246	// inserts at position 0.
1247	if (CurrIE != &InsElt &&
1248	(!CurrIE->hasOneUse() && (NextIE != nullptr \|\| !Idx->isZero())))
1249	return nullptr;
1250
1251	ElementPresent [Idx->getZExtValue()] = true;
1252	FirstIE = CurrIE;
1253	CurrIE = NextIE;
1254	}
1255
1256	// If this is just a single insertelement (not a sequence), we are done.
1257	if (FirstIE == &InsElt)
1258	return nullptr;
1259
1260	// If we are not inserting into a poison vector, make sure we've seen an
1261	// insert into every element.
1262	// TODO: If the base vector is not undef, it might be better to create a splat
1263	// and then a select-shuffle (blend) with the base vector.
1264	if (!match(V: FirstIE->getOperand(i_nocapture: `0`), P: m_Poison()))
1265	if (!ElementPresent.all())
1266	return nullptr;
1267
1268	// Create the insert + shuffle.
1269	Type *Int64Ty = Type::getInt64Ty(C&: InsElt.getContext());
1270	PoisonValue *PoisonVec = PoisonValue::get(T: VecTy);
1271	Constant *Zero = ConstantInt::get(Ty: Int64Ty, V: `0`);
1272	if (!cast<ConstantInt>(Val: FirstIE->getOperand(i_nocapture: `2`))->isZero())
1273	FirstIE = InsertElementInst::Create(Vec: PoisonVec, NewElt: SplatVal, Idx: Zero, NameStr: "",
1274	InsertBefore: InsElt.getIterator());
1275
1276	// Splat from element 0, but replace absent elements with poison in the mask.
1277	SmallVector<int, `16`> Mask(NumElements, `0`);
1278	for (unsigned i = `0`; i != NumElements; ++i)
1279	if (!ElementPresent [i])
1280	Mask [i] = -`1`;
1281
1282	return new ShuffleVectorInst (FirstIE, Mask);
1283	}
1284
1285	/// Try to fold an insert element into an existing splat shuffle by changing
1286	/// the shuffle's mask to include the index of this insert element.
1287	static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
1288	// Check if the vector operand of this insert is a canonical splat shuffle.
1289	auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: InsElt.getOperand(i_nocapture: `0`));
1290	if (!Shuf \|\| !Shuf->isZeroEltSplat())
1291	return nullptr;
1292
1293	// Bail out early if shuffle is scalable type. The number of elements in
1294	// shuffle mask is unknown at compile-time.
1295	if (isa<ScalableVectorType>(Val: Shuf->getType()))
1296	return nullptr;
1297
1298	// Check for a constant insertion index.
1299	uint64_t IdxC;
1300	if (!match(V: InsElt.getOperand(i_nocapture: `2`), P: m_ConstantInt(V&: IdxC)))
1301	return nullptr;
1302
1303	// Check if the splat shuffle's input is the same as this insert's scalar op.
1304	Value *X = InsElt.getOperand(i_nocapture: `1`);
1305	Value *Op0 = Shuf->getOperand(i_nocapture: `0`);
1306	if (!match(V: Op0, P: m_InsertElt(Val: m_Undef(), Elt: m_Specific(V: X), Idx: m_ZeroInt())))
1307	return nullptr;
1308
1309	// Replace the shuffle mask element at the index of this insert with a zero.
1310	// For example:
1311	// inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1312	// --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1313	unsigned NumMaskElts =
1314	cast<FixedVectorType>(Val: Shuf->getType())->getNumElements();
1315	SmallVector<int, `16`> NewMask(NumMaskElts);
1316	for (unsigned i = `0`; i != NumMaskElts; ++i)
1317	NewMask [i] = i == IdxC ? `0` : Shuf->getMaskValue(Elt: i);
1318
1319	return new ShuffleVectorInst (Op0, NewMask);
1320	}
1321
1322	/// Try to fold an extract+insert element into an existing identity shuffle by
1323	/// changing the shuffle's mask to include the index of this insert element.
1324	static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {
1325	// Check if the vector operand of this insert is an identity shuffle.
1326	auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: InsElt.getOperand(i_nocapture: `0`));
1327	if (!Shuf \|\| !match(V: Shuf->getOperand(i_nocapture: `1`), P: m_Poison()) \|\|
1328	!(Shuf->isIdentityWithExtract() \|\| Shuf->isIdentityWithPadding()))
1329	return nullptr;
1330
1331	// Bail out early if shuffle is scalable type. The number of elements in
1332	// shuffle mask is unknown at compile-time.
1333	if (isa<ScalableVectorType>(Val: Shuf->getType()))
1334	return nullptr;
1335
1336	// Check for a constant insertion index.
1337	uint64_t IdxC;
1338	if (!match(V: InsElt.getOperand(i_nocapture: `2`), P: m_ConstantInt(V&: IdxC)))
1339	return nullptr;
1340
1341	// Check if this insert's scalar op is extracted from the identity shuffle's
1342	// input vector.
1343	Value *Scalar = InsElt.getOperand(i_nocapture: `1`);
1344	Value *X = Shuf->getOperand(i_nocapture: `0`);
1345	if (!match(V: Scalar, P: m_ExtractElt(Val: m_Specific(V: X), Idx: m_SpecificInt(V: IdxC))))
1346	return nullptr;
1347
1348	// Replace the shuffle mask element at the index of this extract+insert with
1349	// that same index value.
1350	// For example:
1351	// inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1352	unsigned NumMaskElts =
1353	cast<FixedVectorType>(Val: Shuf->getType())->getNumElements();
1354	SmallVector<int, `16`> NewMask(NumMaskElts);
1355	ArrayRef<int> OldMask = Shuf->getShuffleMask();
1356	for (unsigned i = `0`; i != NumMaskElts; ++i) {
1357	if (i != IdxC) {
1358	// All mask elements besides the inserted element remain the same.
1359	NewMask [i] = OldMask [i];
1360	} else if (OldMask [i] == (int)IdxC) {
1361	// If the mask element was already set, there's nothing to do
1362	// (demanded elements analysis may unset it later).
1363	return nullptr;
1364	} else {
1365	assert(OldMask[i] == PoisonMaskElem &&
1366	"Unexpected shuffle mask element for identity shuffle");
1367	NewMask [i] = IdxC;
1368	}
1369	}
1370
1371	return new ShuffleVectorInst (X, Shuf->getOperand(i_nocapture: `1`), NewMask);
1372	}
1373
1374	/// If we have an insertelement instruction feeding into another insertelement
1375	/// and the 2nd is inserting a constant into the vector, canonicalize that
1376	/// constant insertion before the insertion of a variable:
1377	///
1378	/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1379	/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1380	///
1381	/// This has the potential of eliminating the 2nd insertelement instruction
1382	/// via constant folding of the scalar constant into a vector constant.
1383	static Instruction *hoistInsEltConst(InsertElementInst &InsElt2,
1384	InstCombiner::BuilderTy &Builder) {
1385	auto *InsElt1 = dyn_cast<InsertElementInst>(Val: InsElt2.getOperand(i_nocapture: `0`));
1386	if (!InsElt1 \|\| !InsElt1->hasOneUse())
1387	return nullptr;
1388
1389	Value X, Y;
1390	Constant *ScalarC;
1391	ConstantInt IdxC1, IdxC2;
1392	if (match(V: InsElt1->getOperand(i_nocapture: `0`), P: m_Value(V&: X)) &&
1393	match(V: InsElt1->getOperand(i_nocapture: `1`), P: m_Value(V&: Y)) && !isa<Constant>(Val: Y) &&
1394	match(V: InsElt1->getOperand(i_nocapture: `2`), P: m_ConstantInt(CI&: IdxC1)) &&
1395	match(V: InsElt2.getOperand(i_nocapture: `1`), P: m_Constant(C&: ScalarC)) &&
1396	match(V: InsElt2.getOperand(i_nocapture: `2`), P: m_ConstantInt(CI&: IdxC2)) && IdxC1 != IdxC2) {
1397	Value *NewInsElt1 = Builder.CreateInsertElement(Vec: X, NewElt: ScalarC, Idx: IdxC2);
1398	return InsertElementInst::Create(Vec: NewInsElt1, NewElt: Y, Idx: IdxC1);
1399	}
1400
1401	return nullptr;
1402	}
1403
1404	/// insertelt (shufflevector X, CVec, Mask\|insertelt X, C1, CIndex1), C, CIndex
1405	/// --> shufflevector X, CVec', Mask'
1406	static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
1407	auto *Inst = dyn_cast<Instruction>(Val: InsElt.getOperand(i_nocapture: `0`));
1408	// Bail out if the parent has more than one use. In that case, we'd be
1409	// replacing the insertelt with a shuffle, and that's not a clear win.
1410	if (!Inst \|\| !Inst->hasOneUse())
1411	return nullptr;
1412	if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: InsElt.getOperand(i_nocapture: `0`))) {
1413	// The shuffle must have a constant vector operand. The insertelt must have
1414	// a constant scalar being inserted at a constant position in the vector.
1415	Constant ShufConstVec, InsEltScalar;
1416	uint64_t InsEltIndex;
1417	if (!match(V: Shuf->getOperand(i_nocapture: `1`), P: m_Constant(C&: ShufConstVec)) \|\|
1418	!match(V: InsElt.getOperand(i_nocapture: `1`), P: m_Constant(C&: InsEltScalar)) \|\|
1419	!match(V: InsElt.getOperand(i_nocapture: `2`), P: m_ConstantInt(V&: InsEltIndex)))
1420	return nullptr;
1421
1422	// Adding an element to an arbitrary shuffle could be expensive, but a
1423	// shuffle that selects elements from vectors without crossing lanes is
1424	// assumed cheap.
1425	// If we're just adding a constant into that shuffle, it will still be
1426	// cheap.
1427	if (!isShuffleEquivalentToSelect(Shuf&: *Shuf))
1428	return nullptr;
1429
1430	// From the above 'select' check, we know that the mask has the same number
1431	// of elements as the vector input operands. We also know that each constant
1432	// input element is used in its lane and can not be used more than once by
1433	// the shuffle. Therefore, replace the constant in the shuffle's constant
1434	// vector with the insertelt constant. Replace the constant in the shuffle's
1435	// mask vector with the insertelt index plus the length of the vector
1436	// (because the constant vector operand of a shuffle is always the 2nd
1437	// operand).
1438	ArrayRef<int> Mask = Shuf->getShuffleMask();
1439	unsigned NumElts = Mask.size();
1440	SmallVector<Constant *, `16`> NewShufElts(NumElts);
1441	SmallVector<int, `16`> NewMaskElts(NumElts);
1442	for (unsigned I = `0`; I != NumElts; ++I) {
1443	if (I == InsEltIndex) {
1444	NewShufElts [I] = InsEltScalar;
1445	NewMaskElts [I] = InsEltIndex + NumElts;
1446	} else {
1447	// Copy over the existing values.
1448	NewShufElts [I] = ShufConstVec->getAggregateElement(Elt: I);
1449	NewMaskElts [I] = Mask [I];
1450	}
1451
1452	// Bail if we failed to find an element.
1453	if (!NewShufElts [I])
1454	return nullptr;
1455	}
1456
1457	// Create new operands for a shuffle that includes the constant of the
1458	// original insertelt. The old shuffle will be dead now.
1459	return new ShuffleVectorInst (Shuf->getOperand(i_nocapture: `0`),
1460	ConstantVector::get(V: NewShufElts), NewMaskElts);
1461	} else if (auto *IEI = dyn_cast<InsertElementInst>(Val: Inst)) {
1462	// Transform sequences of insertelements ops with constant data/indexes into
1463	// a single shuffle op.
1464	// Can not handle scalable type, the number of elements needed to create
1465	// shuffle mask is not a compile-time constant.
1466	if (isa<ScalableVectorType>(Val: InsElt.getType()))
1467	return nullptr;
1468	unsigned NumElts =
1469	cast<FixedVectorType>(Val: InsElt.getType())->getNumElements();
1470
1471	uint64_t InsertIdx[`2`];
1472	Constant *Val[`2`];
1473	if (!match(V: InsElt.getOperand(i_nocapture: `2`), P: m_ConstantInt(V&: InsertIdx[`0`])) \|\|
1474	!match(V: InsElt.getOperand(i_nocapture: `1`), P: m_Constant(C&: Val[`0`])) \|\|
1475	!match(V: IEI->getOperand(i_nocapture: `2`), P: m_ConstantInt(V&: InsertIdx[`1`])) \|\|
1476	!match(V: IEI->getOperand(i_nocapture: `1`), P: m_Constant(C&: Val[`1`])))
1477	return nullptr;
1478	SmallVector<Constant *, `16`> Values(NumElts);
1479	SmallVector<int, `16`> Mask(NumElts);
1480	auto ValI = std::begin(arr&: Val);
1481	// Generate new constant vector and mask.
1482	// We have 2 values/masks from the insertelements instructions. Insert them
1483	// into new value/mask vectors.
1484	for (uint64_t I : InsertIdx) {
1485	if (!Values [I]) {
1486	Values [I] = *ValI;
1487	Mask [I] = NumElts + I;
1488	}
1489	++ValI;
1490	}
1491	// Remaining values are filled with 'poison' values.
1492	for (unsigned I = `0`; I < NumElts; ++I) {
1493	if (!Values [I]) {
1494	Values [I] = PoisonValue::get(T: InsElt.getType()->getElementType());
1495	Mask [I] = I;
1496	}
1497	}
1498	// Create new operands for a shuffle that includes the constant of the
1499	// original insertelt.
1500	return new ShuffleVectorInst (IEI->getOperand(i_nocapture: `0`),
1501	ConstantVector::get(V: Values), Mask);
1502	}
1503	return nullptr;
1504	}
1505
1506	/// If both the base vector and the inserted element are extended from the same
1507	/// type, do the insert element in the narrow source type followed by extend.
1508	/// TODO: This can be extended to include other cast opcodes, but particularly
1509	/// if we create a wider insertelement, make sure codegen is not harmed.
1510	static Instruction *narrowInsElt(InsertElementInst &InsElt,
1511	InstCombiner::BuilderTy &Builder) {
1512	// We are creating a vector extend. If the original vector extend has another
1513	// use, that would mean we end up with 2 vector extends, so avoid that.
1514	// TODO: We could ease the use-clause to "if at least one op has one use"
1515	// (assuming that the source types match - see next TODO comment).
1516	Value *Vec = InsElt.getOperand(i_nocapture: `0`);
1517	if (!Vec->hasOneUse())
1518	return nullptr;
1519
1520	Value *Scalar = InsElt.getOperand(i_nocapture: `1`);
1521	Value X, Y;
1522	CastInst::CastOps CastOpcode;
1523	if (match(V: Vec, P: m_FPExt(Op: m_Value(V&: X))) && match(V: Scalar, P: m_FPExt(Op: m_Value(V&: Y))))
1524	CastOpcode = Instruction::FPExt;
1525	else if (match(V: Vec, P: m_SExt(Op: m_Value(V&: X))) && match(V: Scalar, P: m_SExt(Op: m_Value(V&: Y))))
1526	CastOpcode = Instruction::SExt;
1527	else if (match(V: Vec, P: m_ZExt(Op: m_Value(V&: X))) && match(V: Scalar, P: m_ZExt(Op: m_Value(V&: Y))))
1528	CastOpcode = Instruction::ZExt;
1529	else
1530	return nullptr;
1531
1532	// TODO: We can allow mismatched types by creating an intermediate cast.
1533	if (X->getType()->getScalarType() != Y->getType())
1534	return nullptr;
1535
1536	// inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1537	Value *NewInsElt = Builder.CreateInsertElement(Vec: X, NewElt: Y, Idx: InsElt.getOperand(i_nocapture: `2`));
1538	return CastInst::Create(CastOpcode, S: NewInsElt, Ty: InsElt.getType());
1539	}
1540
1541	/// If we are inserting 2 halves of a value into adjacent elements of a vector,
1542	/// try to convert to a single insert with appropriate bitcasts.
1543	static Instruction *foldTruncInsEltPair(InsertElementInst &InsElt,
1544	bool IsBigEndian,
1545	InstCombiner::BuilderTy &Builder) {
1546	Value *VecOp = InsElt.getOperand(i_nocapture: `0`);
1547	Value *ScalarOp = InsElt.getOperand(i_nocapture: `1`);
1548	Value *IndexOp = InsElt.getOperand(i_nocapture: `2`);
1549
1550	// Pattern depends on endian because we expect lower index is inserted first.
1551	// Big endian:
1552	// inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1
1553	// Little endian:
1554	// inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1
1555	// Note: It is not safe to do this transform with an arbitrary base vector
1556	// because the bitcast of that vector to fewer/larger elements could
1557	// allow poison to spill into an element that was not poison before.
1558	// TODO: Detect smaller fractions of the scalar.
1559	// TODO: One-use checks are conservative.
1560	auto *VTy = dyn_cast<FixedVectorType>(Val: InsElt.getType());
1561	Value Scalar0, BaseVec;
1562	uint64_t Index0, Index1;
1563	if (!VTy \|\| (VTy->getNumElements() & `1`) \|\|
1564	!match(V: IndexOp, P: m_ConstantInt(V&: Index1)) \|\|
1565	!match(V: VecOp, P: m_InsertElt(Val: m_Value(V&: BaseVec), Elt: m_Value(V&: Scalar0),
1566	Idx: m_ConstantInt(V&: Index0))) \|\|
1567	!match(V: BaseVec, P: m_Undef()))
1568	return nullptr;
1569
1570	// The first insert must be to the index one less than this one, and
1571	// the first insert must be to an even index.
1572	if (Index0 + `1` != Index1 \|\| Index0 & `1`)
1573	return nullptr;
1574
1575	// For big endian, the high half of the value should be inserted first.
1576	// For little endian, the low half of the value should be inserted first.
1577	Value *X;
1578	uint64_t ShAmt;
1579	if (IsBigEndian) {
1580	if (!match(V: ScalarOp, P: m_Trunc(Op: m_Value(V&: X))) \|\|
1581	!match(V: Scalar0, P: m_Trunc(Op: m_LShr(L: m_Specific(V: X), R: m_ConstantInt(V&: ShAmt)))))
1582	return nullptr;
1583	} else {
1584	if (!match(V: Scalar0, P: m_Trunc(Op: m_Value(V&: X))) \|\|
1585	!match(V: ScalarOp, P: m_Trunc(Op: m_LShr(L: m_Specific(V: X), R: m_ConstantInt(V&: ShAmt)))))
1586	return nullptr;
1587	}
1588
1589	Type *SrcTy = X->getType();
1590	unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
1591	unsigned VecEltWidth = VTy->getScalarSizeInBits();
1592	if (ScalarWidth != VecEltWidth * `2` \|\| ShAmt != VecEltWidth)
1593	return nullptr;
1594
1595	// Bitcast the base vector to a vector type with the source element type.
1596	Type *CastTy = FixedVectorType::get(ElementType: SrcTy, NumElts: VTy->getNumElements() / `2`);
1597	Value *CastBaseVec = Builder.CreateBitCast(V: BaseVec, DestTy: CastTy);
1598
1599	// Scale the insert index for a vector with half as many elements.
1600	// bitcast (inselt (bitcast BaseVec), X, NewIndex)
1601	uint64_t NewIndex = IsBigEndian ? Index1 / `2` : Index0 / `2`;
1602	Value *NewInsert = Builder.CreateInsertElement(Vec: CastBaseVec, NewElt: X, Idx: NewIndex);
1603	return new BitCastInst (NewInsert, VTy);
1604	}
1605
1606	Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
1607	Value *VecOp = IE.getOperand(i_nocapture: `0`);
1608	Value *ScalarOp = IE.getOperand(i_nocapture: `1`);
1609	Value *IdxOp = IE.getOperand(i_nocapture: `2`);
1610
1611	if (auto *V = simplifyInsertElementInst(
1612	Vec: VecOp, Elt: ScalarOp, Idx: IdxOp, Q: SQ.getWithInstruction(I: &IE)))
1613	return replaceInstUsesWith(I&: IE, V);
1614
1615	// Canonicalize type of constant indices to i64 to simplify CSE
1616	if (auto *IndexC = dyn_cast<ConstantInt>(Val: IdxOp)) {
1617	if (auto *NewIdx = getPreferredVectorIndex(IndexC))
1618	return replaceOperand(I&: IE, OpNum: `2`, V: NewIdx);
1619
1620	Value BaseVec, OtherScalar;
1621	uint64_t OtherIndexVal;
1622	if (match(V: VecOp, P: m_OneUse(SubPattern: m_InsertElt(Val: m_Value(V&: BaseVec),
1623	Elt: m_Value(V&: OtherScalar),
1624	Idx: m_ConstantInt(V&: OtherIndexVal)))) &&
1625	!isa<Constant>(Val: OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {
1626	Value *NewIns = Builder.CreateInsertElement(Vec: BaseVec, NewElt: ScalarOp, Idx: IdxOp);
1627	return InsertElementInst::Create(Vec: NewIns, NewElt: OtherScalar,
1628	Idx: Builder.getInt64(C: OtherIndexVal));
1629	}
1630	}
1631
1632	// If the scalar is bitcast and inserted into undef, do the insert in the
1633	// source type followed by bitcast.
1634	// TODO: Generalize for insert into any constant, not just undef?
1635	Value *ScalarSrc;
1636	if (match(V: VecOp, P: m_Undef()) &&
1637	match(V: ScalarOp, P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: ScalarSrc)))) &&
1638	(ScalarSrc->getType()->isIntegerTy() \|\|
1639	ScalarSrc->getType()->isFloatingPointTy())) {
1640	// inselt undef, (bitcast ScalarSrc), IdxOp -->
1641	// bitcast (inselt undef, ScalarSrc, IdxOp)
1642	Type *ScalarTy = ScalarSrc->getType();
1643	Type *VecTy = VectorType::get(ElementType: ScalarTy, EC: IE.getType()->getElementCount());
1644	Constant *NewUndef = isa<PoisonValue>(Val: VecOp) ? PoisonValue::get(T: VecTy)
1645	: UndefValue::get(T: VecTy);
1646	Value *NewInsElt = Builder.CreateInsertElement(Vec: NewUndef, NewElt: ScalarSrc, Idx: IdxOp);
1647	return new BitCastInst (NewInsElt, IE.getType());
1648	}
1649
1650	// If the vector and scalar are both bitcast from the same element type, do
1651	// the insert in that source type followed by bitcast.
1652	Value *VecSrc;
1653	if (match(V: VecOp, P: m_BitCast(Op: m_Value(V&: VecSrc))) &&
1654	match(V: ScalarOp, P: m_BitCast(Op: m_Value(V&: ScalarSrc))) &&
1655	(VecOp->hasOneUse() \|\| ScalarOp->hasOneUse()) &&
1656	VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1657	cast<VectorType>(Val: VecSrc->getType())->getElementType() ==
1658	ScalarSrc->getType()) {
1659	// inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1660	// bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1661	Value *NewInsElt = Builder.CreateInsertElement(Vec: VecSrc, NewElt: ScalarSrc, Idx: IdxOp);
1662	return new BitCastInst (NewInsElt, IE.getType());
1663	}
1664
1665	// If the inserted element was extracted from some other fixed-length vector
1666	// and both indexes are valid constants, try to turn this into a shuffle.
1667	// Can not handle scalable vector type, the number of elements needed to
1668	// create shuffle mask is not a compile-time constant.
1669	uint64_t InsertedIdx, ExtractedIdx;
1670	Value *ExtVecOp;
1671	if (isa<FixedVectorType>(Val: IE.getType()) &&
1672	match(V: IdxOp, P: m_ConstantInt(V&: InsertedIdx)) &&
1673	match(V: ScalarOp,
1674	P: m_ExtractElt(Val: m_Value(V&: ExtVecOp), Idx: m_ConstantInt(V&: ExtractedIdx))) &&
1675	isa<FixedVectorType>(Val: ExtVecOp->getType()) &&
1676	ExtractedIdx <
1677	cast<FixedVectorType>(Val: ExtVecOp->getType())->getNumElements()) {
1678	// TODO: Looking at the user(s) to determine if this insert is a
1679	// fold-to-shuffle opportunity does not match the usual instcombine
1680	// constraints. We should decide if the transform is worthy based only
1681	// on this instruction and its operands, but that may not work currently.
1682	//
1683	// Here, we are trying to avoid creating shuffles before reaching
1684	// the end of a chain of extract-insert pairs. This is complicated because
1685	// we do not generally form arbitrary shuffle masks in instcombine
1686	// (because those may codegen poorly), but collectShuffleElements() does
1687	// exactly that.
1688	//
1689	// The rules for determining what is an acceptable target-independent
1690	// shuffle mask are fuzzy because they evolve based on the backend's
1691	// capabilities and real-world impact.
1692	auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1693	if (!Insert.hasOneUse())
1694	return true;
1695	auto *InsertUser = dyn_cast<InsertElementInst>(Val: Insert.user_back());
1696	if (!InsertUser)
1697	return true;
1698	return false;
1699	};
1700
1701	// Try to form a shuffle from a chain of extract-insert ops.
1702	if (isShuffleRootCandidate (IE)) {
1703	bool Rerun = true;
1704	while (Rerun) {
1705	Rerun = false;
1706
1707	SmallVector<int, `16`> Mask;
1708	ShuffleOps LR =
1709	collectShuffleElements(V: &IE, Mask, PermittedRHS: nullptr, IC&: *this, Rerun);
1710
1711	// The proposed shuffle may be trivial, in which case we shouldn't
1712	// perform the combine.
1713	if (LR.first != &IE && LR.second != &IE) {
1714	// We now have a shuffle of LHS, RHS, Mask.
1715	if (LR.second == nullptr)
1716	LR.second = PoisonValue::get(T: LR.first->getType());
1717	return new ShuffleVectorInst (LR.first, LR.second, Mask);
1718	}
1719	}
1720	}
1721	}
1722
1723	if (auto VecTy = dyn_cast<FixedVectorType>(Val: VecOp->getType())) {
1724	unsigned VWidth = VecTy->getNumElements();
1725	APInt PoisonElts(VWidth, `0`);
1726	APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
1727	if (Value *V = SimplifyDemandedVectorElts(V: &IE, DemandedElts: AllOnesEltMask,
1728	PoisonElts)) {
1729	if (V != &IE)
1730	return replaceInstUsesWith(I&: IE, V);
1731	return &IE;
1732	}
1733	}
1734
1735	if (Instruction *Shuf = foldConstantInsEltIntoShuffle(InsElt&: IE))
1736	return Shuf;
1737
1738	if (Instruction *NewInsElt = hoistInsEltConst(InsElt2&: IE, Builder))
1739	return NewInsElt;
1740
1741	if (Instruction *Broadcast = foldInsSequenceIntoSplat(InsElt&: IE))
1742	return Broadcast;
1743
1744	if (Instruction *Splat = foldInsEltIntoSplat(InsElt&: IE))
1745	return Splat;
1746
1747	if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(InsElt&: IE))
1748	return IdentityShuf;
1749
1750	if (Instruction *Ext = narrowInsElt(InsElt&: IE, Builder))
1751	return Ext;
1752
1753	if (Instruction *Ext = foldTruncInsEltPair(InsElt&: IE, IsBigEndian: DL.isBigEndian(), Builder))
1754	return Ext;
1755
1756	return nullptr;
1757	}
1758
1759	/// Return true if we can evaluate the specified expression tree if the vector
1760	/// elements were shuffled in a different order.
1761	static bool canEvaluateShuffled(Value V, ArrayRef<int*> Mask,
1762	unsigned Depth = `5`) {
1763	// We can always reorder the elements of a constant.
1764	if (isa<Constant>(Val: V))
1765	return true;
1766
1767	// We won't reorder vector arguments. No IPO here.
1768	Instruction *I = dyn_cast<Instruction>(Val: V);
1769	if (!I) return false;
1770
1771	// Two users may expect different orders of the elements. Don't try it.
1772	if (!I->hasOneUse())
1773	return false;
1774
1775	if (Depth == `0`) return false;
1776
1777	switch (I->getOpcode()) {
1778	case Instruction::UDiv:
1779	case Instruction::SDiv:
1780	case Instruction::URem:
1781	case Instruction::SRem:
1782	// Propagating an undefined shuffle mask element to integer div/rem is not
1783	// allowed because those opcodes can create immediate undefined behavior
1784	// from an undefined element in an operand.
1785	if (llvm::is_contained(Range&: Mask, Element: -`1`))
1786	return false;
1787	[[fallthrough]];
1788	case Instruction::Add:
1789	case Instruction::FAdd:
1790	case Instruction::Sub:
1791	case Instruction::FSub:
1792	case Instruction::Mul:
1793	case Instruction::FMul:
1794	case Instruction::FDiv:
1795	case Instruction::FRem:
1796	case Instruction::Shl:
1797	case Instruction::LShr:
1798	case Instruction::AShr:
1799	case Instruction::And:
1800	case Instruction::Or:
1801	case Instruction::Xor:
1802	case Instruction::ICmp:
1803	case Instruction::FCmp:
1804	case Instruction::Trunc:
1805	case Instruction::ZExt:
1806	case Instruction::SExt:
1807	case Instruction::FPToUI:
1808	case Instruction::FPToSI:
1809	case Instruction::UIToFP:
1810	case Instruction::SIToFP:
1811	case Instruction::FPTrunc:
1812	case Instruction::FPExt:
1813	case Instruction::GetElementPtr: {
1814	// Bail out if we would create longer vector ops. We could allow creating
1815	// longer vector ops, but that may result in more expensive codegen.
1816	Type *ITy = I->getType();
1817	if (ITy->isVectorTy() &&
1818	Mask.size() > cast<FixedVectorType>(Val: ITy)->getNumElements())
1819	return false;
1820	for (Value *Operand : I->operands()) {
1821	if (!canEvaluateShuffled(V: Operand, Mask, Depth: Depth - `1`))
1822	return false;
1823	}
1824	return true;
1825	}
1826	case Instruction::InsertElement: {
1827	ConstantInt *CI = dyn_cast<ConstantInt>(Val: I->getOperand(i: `2`));
1828	if (!CI) return false;
1829	int ElementNumber = CI->getLimitedValue();
1830
1831	// Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1832	// can't put an element into multiple indices.
1833	bool SeenOnce = false;
1834	for (int I : Mask) {
1835	if (I == ElementNumber) {
1836	if (SeenOnce)
1837	return false;
1838	SeenOnce = true;
1839	}
1840	}
1841	return canEvaluateShuffled(V: I->getOperand(i: `0`), Mask, Depth: Depth - `1`);
1842	}
1843	}
1844	return false;
1845	}
1846
1847	/// Rebuild a new instruction just like 'I' but with the new operands given.
1848	/// In the event of type mismatch, the type of the operands is correct.
1849	static Value buildNew(Instruction I, ArrayRef<Value*> NewOps,
1850	IRBuilderBase &Builder) {
1851	Builder.SetInsertPoint(I);
1852	switch (I->getOpcode()) {
1853	case Instruction::Add:
1854	case Instruction::FAdd:
1855	case Instruction::Sub:
1856	case Instruction::FSub:
1857	case Instruction::Mul:
1858	case Instruction::FMul:
1859	case Instruction::UDiv:
1860	case Instruction::SDiv:
1861	case Instruction::FDiv:
1862	case Instruction::URem:
1863	case Instruction::SRem:
1864	case Instruction::FRem:
1865	case Instruction::Shl:
1866	case Instruction::LShr:
1867	case Instruction::AShr:
1868	case Instruction::And:
1869	case Instruction::Or:
1870	case Instruction::Xor: {
1871	BinaryOperator *BO = cast<BinaryOperator>(Val: I);
1872	assert(NewOps.size() == `2` && "binary operator with #ops != 2");
1873	Value *New = Builder.CreateBinOp(Opc: cast<BinaryOperator>(Val: I)->getOpcode(),
1874	LHS: NewOps [`0`], RHS: NewOps [`1`]);
1875	if (auto *NewI = dyn_cast<Instruction>(Val: New)) {
1876	if (isa<OverflowingBinaryOperator>(Val: BO)) {
1877	NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1878	NewI->setHasNoSignedWrap(BO->hasNoSignedWrap());
1879	}
1880	if (isa<PossiblyExactOperator>(Val: BO)) {
1881	NewI->setIsExact(BO->isExact());
1882	}
1883	if (isa<FPMathOperator>(Val: BO))
1884	NewI->copyFastMathFlags(I);
1885	}
1886	return New;
1887	}
1888	case Instruction::ICmp:
1889	assert(NewOps.size() == `2` && "icmp with #ops != 2");
1890	return Builder.CreateICmp(P: cast<ICmpInst>(Val: I)->getPredicate(), LHS: NewOps [`0`],
1891	RHS: NewOps [`1`]);
1892	case Instruction::FCmp:
1893	assert(NewOps.size() == `2` && "fcmp with #ops != 2");
1894	return Builder.CreateFCmp(P: cast<FCmpInst>(Val: I)->getPredicate(), LHS: NewOps [`0`],
1895	RHS: NewOps [`1`]);
1896	case Instruction::Trunc:
1897	case Instruction::ZExt:
1898	case Instruction::SExt:
1899	case Instruction::FPToUI:
1900	case Instruction::FPToSI:
1901	case Instruction::UIToFP:
1902	case Instruction::SIToFP:
1903	case Instruction::FPTrunc:
1904	case Instruction::FPExt: {
1905	// It's possible that the mask has a different number of elements from
1906	// the original cast. We recompute the destination type to match the mask.
1907	Type *DestTy = VectorType::get(
1908	ElementType: I->getType()->getScalarType(),
1909	EC: cast<VectorType>(Val: NewOps [`0`]->getType())->getElementCount());
1910	assert(NewOps.size() == `1` && "cast with #ops != 1");
1911	return Builder.CreateCast(Op: cast<CastInst>(Val: I)->getOpcode(), V: NewOps [`0`],
1912	DestTy);
1913	}
1914	case Instruction::GetElementPtr: {
1915	Value *Ptr = NewOps [`0`];
1916	ArrayRef<Value*> Idx = NewOps.slice(N: `1`);
1917	return Builder.CreateGEP(Ty: cast<GEPOperator>(Val: I)->getSourceElementType(),
1918	Ptr, IdxList: Idx, Name: "",
1919	NW: cast<GEPOperator>(Val: I)->isInBounds());
1920	}
1921	}
1922	llvm_unreachable("failed to rebuild vector instructions");
1923	}
1924
1925	static Value evaluateInDifferentElementOrder(Value V, ArrayRef<int> Mask,
1926	IRBuilderBase &Builder) {
1927	// Mask.size() does not need to be equal to the number of vector elements.
1928
1929	assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
1930	Type *EltTy = V->getType()->getScalarType();
1931
1932	if (isa<PoisonValue>(Val: V))
1933	return PoisonValue::get(T: FixedVectorType::get(ElementType: EltTy, NumElts: Mask.size()));
1934
1935	if (match(V, P: m_Undef()))
1936	return UndefValue::get(T: FixedVectorType::get(ElementType: EltTy, NumElts: Mask.size()));
1937
1938	if (isa<ConstantAggregateZero>(Val: V))
1939	return ConstantAggregateZero::get(Ty: FixedVectorType::get(ElementType: EltTy, NumElts: Mask.size()));
1940
1941	if (Constant *C = dyn_cast<Constant>(Val: V))
1942	return ConstantExpr::getShuffleVector(V1: C, V2: PoisonValue::get(T: C->getType()),
1943	Mask);
1944
1945	Instruction *I = cast<Instruction>(Val: V);
1946	switch (I->getOpcode()) {
1947	case Instruction::Add:
1948	case Instruction::FAdd:
1949	case Instruction::Sub:
1950	case Instruction::FSub:
1951	case Instruction::Mul:
1952	case Instruction::FMul:
1953	case Instruction::UDiv:
1954	case Instruction::SDiv:
1955	case Instruction::FDiv:
1956	case Instruction::URem:
1957	case Instruction::SRem:
1958	case Instruction::FRem:
1959	case Instruction::Shl:
1960	case Instruction::LShr:
1961	case Instruction::AShr:
1962	case Instruction::And:
1963	case Instruction::Or:
1964	case Instruction::Xor:
1965	case Instruction::ICmp:
1966	case Instruction::FCmp:
1967	case Instruction::Trunc:
1968	case Instruction::ZExt:
1969	case Instruction::SExt:
1970	case Instruction::FPToUI:
1971	case Instruction::FPToSI:
1972	case Instruction::UIToFP:
1973	case Instruction::SIToFP:
1974	case Instruction::FPTrunc:
1975	case Instruction::FPExt:
1976	case Instruction::Select:
1977	case Instruction::GetElementPtr: {
1978	SmallVector<Value*, `8`> NewOps;
1979	bool NeedsRebuild =
1980	(Mask.size() !=
1981	cast<FixedVectorType>(Val: I->getType())->getNumElements());
1982	for (int i = `0`, e = I->getNumOperands(); i != e; ++i) {
1983	Value *V;
1984	// Recursively call evaluateInDifferentElementOrder on vector arguments
1985	// as well. E.g. GetElementPtr may have scalar operands even if the
1986	// return value is a vector, so we need to examine the operand type.
1987	if (I->getOperand(i)->getType()->isVectorTy())
1988	V = evaluateInDifferentElementOrder(V: I->getOperand(i), Mask, Builder);
1989	else
1990	V = I->getOperand(i);
1991	NewOps.push_back(Elt: V);
1992	NeedsRebuild \|= (V != I->getOperand(i));
1993	}
1994	if (NeedsRebuild)
1995	return buildNew(I, NewOps, Builder);
1996	return I;
1997	}
1998	case Instruction::InsertElement: {
1999	int Element = cast<ConstantInt>(Val: I->getOperand(i: `2`))->getLimitedValue();
2000
2001	// The insertelement was inserting at Element. Figure out which element
2002	// that becomes after shuffling. The answer is guaranteed to be unique
2003	// by CanEvaluateShuffled.
2004	bool Found = false;
2005	int Index = `0`;
2006	for (int e = Mask.size(); Index != e; ++Index) {
2007	if (Mask [Index] == Element) {
2008	Found = true;
2009	break;
2010	}
2011	}
2012
2013	// If element is not in Mask, no need to handle the operand 1 (element to
2014	// be inserted). Just evaluate values in operand 0 according to Mask.
2015	if (!Found)
2016	return evaluateInDifferentElementOrder(V: I->getOperand(i: `0`), Mask, Builder);
2017
2018	Value *V = evaluateInDifferentElementOrder(V: I->getOperand(i: `0`), Mask,
2019	Builder);
2020	Builder.SetInsertPoint(I);
2021	return Builder.CreateInsertElement(Vec: V, NewElt: I->getOperand(i: `1`), Idx: Index);
2022	}
2023	}
2024	llvm_unreachable("failed to reorder elements of vector instruction!");
2025	}
2026
2027	// Returns true if the shuffle is extracting a contiguous range of values from
2028	// LHS, for example:
2029	// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2030	// Input: \|AA\|BB\|CC\|DD\|EE\|FF\|GG\|HH\|II\|JJ\|KK\|LL\|MM\|NN\|OO\|PP\|
2031	// Shuffles to: \|EE\|FF\|GG\|HH\|
2032	// +--+--+--+--+
2033	static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
2034	ArrayRef<int> Mask) {
2035	unsigned LHSElems =
2036	cast<FixedVectorType>(Val: SVI.getOperand(i_nocapture: `0`)->getType())->getNumElements();
2037	unsigned MaskElems = Mask.size();
2038	unsigned BegIdx = Mask.front();
2039	unsigned EndIdx = Mask.back();
2040	if (BegIdx > EndIdx \|\| EndIdx >= LHSElems \|\| EndIdx - BegIdx != MaskElems - `1`)
2041	return false;
2042	for (unsigned I = `0`; I != MaskElems; ++I)
2043	if (static_cast<unsigned>(Mask [I]) != BegIdx + I)
2044	return false;
2045	return true;
2046	}
2047
2048	/// These are the ingredients in an alternate form binary operator as described
2049	/// below.
2050	struct BinopElts {
2051	BinaryOperator::BinaryOps Opcode;
2052	Value *Op0;
2053	Value *Op1;
2054	BinopElts(BinaryOperator::BinaryOps Opc = (BinaryOperator::BinaryOps)`0`,
2055	Value V0 = nullptr, Value V1 = nullptr) :
2056	Opcode(Opc), Op0(V0), Op1(V1) {}
2057	operator bool() const { return Opcode != `0`; }
2058	};
2059
2060	/// Binops may be transformed into binops with different opcodes and operands.
2061	/// Reverse the usual canonicalization to enable folds with the non-canonical
2062	/// form of the binop. If a transform is possible, return the elements of the
2063	/// new binop. If not, return invalid elements.
2064	static BinopElts getAlternateBinop(BinaryOperator BO, const* DataLayout &DL) {
2065	Value BO0 = BO->getOperand(i_nocapture: `0`), BO1 = BO->getOperand(i_nocapture: `1`);
2066	Type *Ty = BO->getType();
2067	switch (BO->getOpcode()) {
2068	case Instruction::Shl: {
2069	// shl X, C --> mul X, (1 << C)
2070	Constant *C;
2071	if (match(V: BO1, P: m_ImmConstant(C))) {
2072	Constant *ShlOne = ConstantFoldBinaryOpOperands(
2073	Opcode: Instruction::Shl, LHS: ConstantInt::get(Ty, V: `1`), RHS: C, DL);
2074	assert(ShlOne && "Constant folding of immediate constants failed");
2075	return {Instruction::Mul, BO0, ShlOne};
2076	}
2077	break;
2078	}
2079	case Instruction::Or: {
2080	// or disjoin X, C --> add X, C
2081	if (cast<PossiblyDisjointInst>(Val: BO)->isDisjoint())
2082	return {Instruction::Add, BO0, BO1};
2083	break;
2084	}
2085	case Instruction::Sub:
2086	// sub 0, X --> mul X, -1
2087	if (match(V: BO0, P: m_ZeroInt()))
2088	return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)};
2089	break;
2090	default:
2091	break;
2092	}
2093	return {};
2094	}
2095
2096	/// A select shuffle of a select shuffle with a shared operand can be reduced
2097	/// to a single select shuffle. This is an obvious improvement in IR, and the
2098	/// backend is expected to lower select shuffles efficiently.
2099	static Instruction *foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf) {
2100	assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2101
2102	Value Op0 = Shuf.getOperand(i_nocapture: `0`), Op1 = Shuf.getOperand(i_nocapture: `1`);
2103	SmallVector<int, `16`> Mask;
2104	Shuf.getShuffleMask(Result&: Mask);
2105	unsigned NumElts = Mask.size();
2106
2107	// Canonicalize a select shuffle with common operand as Op1.
2108	auto *ShufOp = dyn_cast<ShuffleVectorInst>(Val: Op0);
2109	if (ShufOp && ShufOp->isSelect() &&
2110	(ShufOp->getOperand(i_nocapture: `0`) == Op1 \|\| ShufOp->getOperand(i_nocapture: `1`) == Op1)) {
2111	std::swap(a&: Op0, b&: Op1);
2112	ShuffleVectorInst::commuteShuffleMask(Mask, InVecNumElts: NumElts);
2113	}
2114
2115	ShufOp = dyn_cast<ShuffleVectorInst>(Val: Op1);
2116	if (!ShufOp \|\| !ShufOp->isSelect() \|\|
2117	(ShufOp->getOperand(i_nocapture: `0`) != Op0 && ShufOp->getOperand(i_nocapture: `1`) != Op0))
2118	return nullptr;
2119
2120	Value X = ShufOp->getOperand(i_nocapture: `0`), Y = ShufOp->getOperand(i_nocapture: `1`);
2121	SmallVector<int, `16`> Mask1;
2122	ShufOp->getShuffleMask(Result&: Mask1);
2123	assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
2124
2125	// Canonicalize common operand (Op0) as X (first operand of first shuffle).
2126	if (Y == Op0) {
2127	std::swap(a&: X, b&: Y);
2128	ShuffleVectorInst::commuteShuffleMask(Mask: Mask1, InVecNumElts: NumElts);
2129	}
2130
2131	// If the mask chooses from X (operand 0), it stays the same.
2132	// If the mask chooses from the earlier shuffle, the other mask value is
2133	// transferred to the combined select shuffle:
2134	// shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2135	SmallVector<int, `16`> NewMask(NumElts);
2136	for (unsigned i = `0`; i != NumElts; ++i)
2137	NewMask [i] = Mask [i] < (signed)NumElts ? Mask [i] : Mask1 [i];
2138
2139	// A select mask with undef elements might look like an identity mask.
2140	assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) \|\|
2141	ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) &&
2142	"Unexpected shuffle mask");
2143	return new ShuffleVectorInst (X, Y, NewMask);
2144	}
2145
2146	static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf,
2147	const SimplifyQuery &SQ) {
2148	assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2149
2150	// Are we shuffling together some value and that same value after it has been
2151	// modified by a binop with a constant?
2152	Value Op0 = Shuf.getOperand(i_nocapture: `0`), Op1 = Shuf.getOperand(i_nocapture: `1`);
2153	Constant *C;
2154	bool Op0IsBinop;
2155	if (match(V: Op0, P: m_BinOp(L: m_Specific(V: Op1), R: m_Constant(C))))
2156	Op0IsBinop = true;
2157	else if (match(V: Op1, P: m_BinOp(L: m_Specific(V: Op0), R: m_Constant(C))))
2158	Op0IsBinop = false;
2159	else
2160	return nullptr;
2161
2162	// The identity constant for a binop leaves a variable operand unchanged. For
2163	// a vector, this is a splat of something like 0, -1, or 1.
2164	// If there's no identity constant for this binop, we're done.
2165	auto *BO = cast<BinaryOperator>(Val: Op0IsBinop ? Op0 : Op1);
2166	BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
2167	Constant IdC = ConstantExpr::getBinOpIdentity(Opcode: BOpcode, Ty: Shuf.getType(), AllowRHSConstant: true*);
2168	if (!IdC)
2169	return nullptr;
2170
2171	Value *X = Op0IsBinop ? Op1 : Op0;
2172
2173	// Prevent folding in the case the non-binop operand might have NaN values.
2174	// If X can have NaN elements then we have that the floating point math
2175	// operation in the transformed code may not preserve the exact NaN
2176	// bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`.
2177	// This makes the transformation incorrect since the original program would
2178	// have preserved the exact NaN bit-pattern.
2179	// Avoid the folding if X can have NaN elements.
2180	if (Shuf.getType()->getElementType()->isFloatingPointTy() &&
2181	!isKnownNeverNaN(V: X, Depth: `0`, SQ))
2182	return nullptr;
2183
2184	// Shuffle identity constants into the lanes that return the original value.
2185	// Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
2186	// Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
2187	// The existing binop constant vector remains in the same operand position.
2188	ArrayRef<int> Mask = Shuf.getShuffleMask();
2189	Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(V1: C, V2: IdC, Mask) :
2190	ConstantExpr::getShuffleVector(V1: IdC, V2: C, Mask);
2191
2192	bool MightCreatePoisonOrUB =
2193	is_contained(Range&: Mask, Element: PoisonMaskElem) &&
2194	(Instruction::isIntDivRem(Opcode: BOpcode) \|\| Instruction::isShift(Opcode: BOpcode));
2195	if (MightCreatePoisonOrUB)
2196	NewC = InstCombiner::getSafeVectorConstantForBinop(Opcode: BOpcode, In: NewC, IsRHSConstant: true);
2197
2198	// shuf (bop X, C), X, M --> bop X, C'
2199	// shuf X, (bop X, C), M --> bop X, C'
2200	Instruction *NewBO = BinaryOperator::Create(Op: BOpcode, S1: X, S2: NewC);
2201	NewBO->copyIRFlags(V: BO);
2202
2203	// An undef shuffle mask element may propagate as an undef constant element in
2204	// the new binop. That would produce poison where the original code might not.
2205	// If we already made a safe constant, then there's no danger.
2206	if (is_contained(Range&: Mask, Element: PoisonMaskElem) && !MightCreatePoisonOrUB)
2207	NewBO->dropPoisonGeneratingFlags();
2208	return NewBO;
2209	}
2210
2211	/// If we have an insert of a scalar to a non-zero element of an undefined
2212	/// vector and then shuffle that value, that's the same as inserting to the zero
2213	/// element and shuffling. Splatting from the zero element is recognized as the
2214	/// canonical form of splat.
2215	static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
2216	InstCombiner::BuilderTy &Builder) {
2217	Value Op0 = Shuf.getOperand(i_nocapture: `0`), Op1 = Shuf.getOperand(i_nocapture: `1`);
2218	ArrayRef<int> Mask = Shuf.getShuffleMask();
2219	Value *X;
2220	uint64_t IndexC;
2221
2222	// Match a shuffle that is a splat to a non-zero element.
2223	if (!match(V: Op0, P: m_OneUse(SubPattern: m_InsertElt(Val: m_Poison(), Elt: m_Value(V&: X),
2224	Idx: m_ConstantInt(V&: IndexC)))) \|\|
2225	!match(V: Op1, P: m_Poison()) \|\| match(Mask, P: m_ZeroMask ()) \|\| IndexC == `0`)
2226	return nullptr;
2227
2228	// Insert into element 0 of a poison vector.
2229	PoisonValue *PoisonVec = PoisonValue::get(T: Shuf.getType());
2230	Value *NewIns = Builder.CreateInsertElement(Vec: PoisonVec, NewElt: X, Idx: (uint64_t)`0`);
2231
2232	// Splat from element 0. Any mask element that is poison remains poison.
2233	// For example:
2234	// shuf (inselt poison, X, 2), _, <2,2,undef>
2235	// --> shuf (inselt poison, X, 0), poison, <0,0,undef>
2236	unsigned NumMaskElts =
2237	cast<FixedVectorType>(Val: Shuf.getType())->getNumElements();
2238	SmallVector<int, `16`> NewMask(NumMaskElts, `0`);
2239	for (unsigned i = `0`; i != NumMaskElts; ++i)
2240	if (Mask [i] == PoisonMaskElem)
2241	NewMask [i] = Mask [i];
2242
2243	return new ShuffleVectorInst (NewIns, NewMask);
2244	}
2245
2246	/// Try to fold shuffles that are the equivalent of a vector select.
2247	Instruction *InstCombinerImpl::foldSelectShuffle(ShuffleVectorInst &Shuf) {
2248	if (!Shuf.isSelect())
2249	return nullptr;
2250
2251	// Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2252	// Commuting undef to operand 0 conflicts with another canonicalization.
2253	unsigned NumElts = cast<FixedVectorType>(Val: Shuf.getType())->getNumElements();
2254	if (!match(V: Shuf.getOperand(i_nocapture: `1`), P: m_Undef()) &&
2255	Shuf.getMaskValue(Elt: `0`) >= (int)NumElts) {
2256	// TODO: Can we assert that both operands of a shuffle-select are not undef
2257	// (otherwise, it would have been folded by instsimplify?
2258	Shuf.commute();
2259	return &Shuf;
2260	}
2261
2262	if (Instruction *I = foldSelectShuffleOfSelectShuffle(Shuf))
2263	return I;
2264
2265	if (Instruction *I = foldSelectShuffleWith1Binop(
2266	Shuf, SQ: getSimplifyQuery().getWithInstruction(I: &Shuf)))
2267	return I;
2268
2269	BinaryOperator B0, B1;
2270	if (!match(V: Shuf.getOperand(i_nocapture: `0`), P: m_BinOp(I&: B0)) \|\|
2271	!match(V: Shuf.getOperand(i_nocapture: `1`), P: m_BinOp(I&: B1)))
2272	return nullptr;
2273
2274	// If one operand is "0 - X", allow that to be viewed as "X -1"*
2275	// (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
2276	// with a multiply, we will exit because C0/C1 will not be set.
2277	Value X, Y;
2278	Constant C0 = nullptr, C1 = nullptr;
2279	bool ConstantsAreOp1;
2280	if (match(V: B0, P: m_BinOp(L: m_Constant(C&: C0), R: m_Value(V&: X))) &&
2281	match(V: B1, P: m_BinOp(L: m_Constant(C&: C1), R: m_Value(V&: Y))))
2282	ConstantsAreOp1 = false;
2283	else if (match(V: B0, P: m_CombineOr(L: m_BinOp(L: m_Value(V&: X), R: m_Constant(C&: C0)),
2284	R: m_Neg(V: m_Value(V&: X)))) &&
2285	match(V: B1, P: m_CombineOr(L: m_BinOp(L: m_Value(V&: Y), R: m_Constant(C&: C1)),
2286	R: m_Neg(V: m_Value(V&: Y)))))
2287	ConstantsAreOp1 = true;
2288	else
2289	return nullptr;
2290
2291	// We need matching binops to fold the lanes together.
2292	BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
2293	BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
2294	bool DropNSW = false;
2295	if (ConstantsAreOp1 && Opc0 != Opc1) {
2296	// TODO: We drop "nsw" if shift is converted into multiply because it may
2297	// not be correct when the shift amount is BitWidth - 1. We could examine
2298	// each vector element to determine if it is safe to keep that flag.
2299	if (Opc0 == Instruction::Shl \|\| Opc1 == Instruction::Shl)
2300	DropNSW = true;
2301	if (BinopElts AltB0 = getAlternateBinop(BO: B0, DL)) {
2302	assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
2303	Opc0 = AltB0.Opcode;
2304	C0 = cast<Constant>(Val: AltB0.Op1);
2305	} else if (BinopElts AltB1 = getAlternateBinop(BO: B1, DL)) {
2306	assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2307	Opc1 = AltB1.Opcode;
2308	C1 = cast<Constant>(Val: AltB1.Op1);
2309	}
2310	}
2311
2312	if (Opc0 != Opc1 \|\| !C0 \|\| !C1)
2313	return nullptr;
2314
2315	// The opcodes must be the same. Use a new name to make that clear.
2316	BinaryOperator::BinaryOps BOpc = Opc0;
2317
2318	// Select the constant elements needed for the single binop.
2319	ArrayRef<int> Mask = Shuf.getShuffleMask();
2320	Constant *NewC = ConstantExpr::getShuffleVector(V1: C0, V2: C1, Mask);
2321
2322	// We are moving a binop after a shuffle. When a shuffle has an undefined
2323	// mask element, the result is undefined, but it is not poison or undefined
2324	// behavior. That is not necessarily true for div/rem/shift.
2325	bool MightCreatePoisonOrUB =
2326	is_contained(Range&: Mask, Element: PoisonMaskElem) &&
2327	(Instruction::isIntDivRem(Opcode: BOpc) \|\| Instruction::isShift(Opcode: BOpc));
2328	if (MightCreatePoisonOrUB)
2329	NewC = InstCombiner::getSafeVectorConstantForBinop(Opcode: BOpc, In: NewC,
2330	IsRHSConstant: ConstantsAreOp1);
2331
2332	Value *V;
2333	if (X == Y) {
2334	// Remove a binop and the shuffle by rearranging the constant:
2335	// shuffle (op V, C0), (op V, C1), M --> op V, C'
2336	// shuffle (op C0, V), (op C1, V), M --> op C', V
2337	V = X;
2338	} else {
2339	// If there are 2 different variable operands, we must create a new shuffle
2340	// (select) first, so check uses to ensure that we don't end up with more
2341	// instructions than we started with.
2342	if (!B0->hasOneUse() && !B1->hasOneUse())
2343	return nullptr;
2344
2345	// If we use the original shuffle mask and op1 is variable, we would be
2346	// putting an undef into operand 1 of div/rem/shift. This is either UB or
2347	// poison. We do not have to guard against UB when constants* are op1*
2348	// because safe constants guarantee that we do not overflow sdiv/srem (and
2349	// there's no danger for other opcodes).
2350	// TODO: To allow this case, create a new shuffle mask with no undefs.
2351	if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2352	return nullptr;
2353
2354	// Note: In general, we do not create new shuffles in InstCombine because we
2355	// do not know if a target can lower an arbitrary shuffle optimally. In this
2356	// case, the shuffle uses the existing mask, so there is no additional risk.
2357
2358	// Select the variable vectors first, then perform the binop:
2359	// shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2360	// shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2361	V = Builder.CreateShuffleVector(V1: X, V2: Y, Mask);
2362	}
2363
2364	Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(Opc: BOpc, LHS: V, RHS: NewC) :
2365	Builder.CreateBinOp(Opc: BOpc, LHS: NewC, RHS: V);
2366
2367	// Flags are intersected from the 2 source binops. But there are 2 exceptions:
2368	// 1. If we changed an opcode, poison conditions might have changed.
2369	// 2. If the shuffle had undef mask elements, the new binop might have undefs
2370	// where the original code did not. But if we already made a safe constant,
2371	// then there's no danger.
2372	if (auto *NewI = dyn_cast<Instruction>(Val: NewBO)) {
2373	NewI->copyIRFlags(V: B0);
2374	NewI->andIRFlags(V: B1);
2375	if (DropNSW)
2376	NewI->setHasNoSignedWrap(false);
2377	if (is_contained(Range&: Mask, Element: PoisonMaskElem) && !MightCreatePoisonOrUB)
2378	NewI->dropPoisonGeneratingFlags();
2379	}
2380	return replaceInstUsesWith(I&: Shuf, V: NewBO);
2381	}
2382
2383	/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2384	/// Example (little endian):
2385	/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2386	static Instruction *foldTruncShuffle(ShuffleVectorInst &Shuf,
2387	bool IsBigEndian) {
2388	// This must be a bitcasted shuffle of 1 vector integer operand.
2389	Type *DestType = Shuf.getType();
2390	Value *X;
2391	if (!match(V: Shuf.getOperand(i_nocapture: `0`), P: m_BitCast(Op: m_Value(V&: X))) \|\|
2392	!match(V: Shuf.getOperand(i_nocapture: `1`), P: m_Poison()) \|\| !DestType->isIntOrIntVectorTy())
2393	return nullptr;
2394
2395	// The source type must have the same number of elements as the shuffle,
2396	// and the source element type must be larger than the shuffle element type.
2397	Type *SrcType = X->getType();
2398	if (!SrcType->isVectorTy() \|\| !SrcType->isIntOrIntVectorTy() \|\|
2399	cast<FixedVectorType>(Val: SrcType)->getNumElements() !=
2400	cast<FixedVectorType>(Val: DestType)->getNumElements() \|\|
2401	SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != `0`)
2402	return nullptr;
2403
2404	assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2405	"Expected a shuffle that decreases length");
2406
2407	// Last, check that the mask chooses the correct low bits for each narrow
2408	// element in the result.
2409	uint64_t TruncRatio =
2410	SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2411	ArrayRef<int> Mask = Shuf.getShuffleMask();
2412	for (unsigned i = `0`, e = Mask.size(); i != e; ++i) {
2413	if (Mask [i] == PoisonMaskElem)
2414	continue;
2415	uint64_t LSBIndex = IsBigEndian ? (i + `1`) * TruncRatio - `1` : i * TruncRatio;
2416	assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2417	if (Mask [i] != (int)LSBIndex)
2418	return nullptr;
2419	}
2420
2421	return new TruncInst (X, DestType);
2422	}
2423
2424	/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2425	/// narrowing (concatenating with poison and extracting back to the original
2426	/// length). This allows replacing the wide select with a narrow select.
2427	static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
2428	InstCombiner::BuilderTy &Builder) {
2429	// This must be a narrowing identity shuffle. It extracts the 1st N elements
2430	// of the 1st vector operand of a shuffle.
2431	if (!match(V: Shuf.getOperand(i_nocapture: `1`), P: m_Poison()) \|\| !Shuf.isIdentityWithExtract())
2432	return nullptr;
2433
2434	// The vector being shuffled must be a vector select that we can eliminate.
2435	// TODO: The one-use requirement could be eased if X and/or Y are constants.
2436	Value Cond, X, *Y;
2437	if (!match(V: Shuf.getOperand(i_nocapture: `0`),
2438	P: m_OneUse(SubPattern: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: X), R: m_Value(V&: Y)))))
2439	return nullptr;
2440
2441	// We need a narrow condition value. It must be extended with poison elements
2442	// and have the same number of elements as this shuffle.
2443	unsigned NarrowNumElts =
2444	cast<FixedVectorType>(Val: Shuf.getType())->getNumElements();
2445	Value *NarrowCond;
2446	if (!match(V: Cond, P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: NarrowCond), v2: m_Poison()))) \|\|
2447	cast<FixedVectorType>(Val: NarrowCond->getType())->getNumElements() !=
2448	NarrowNumElts \|\|
2449	!cast<ShuffleVectorInst>(Val: Cond)->isIdentityWithPadding())
2450	return nullptr;
2451
2452	// shuf (sel (shuf NarrowCond, poison, WideMask), X, Y), poison, NarrowMask)
2453	// -->
2454	// sel NarrowCond, (shuf X, poison, NarrowMask), (shuf Y, poison, NarrowMask)
2455	Value *NarrowX = Builder.CreateShuffleVector(V: X, Mask: Shuf.getShuffleMask());
2456	Value *NarrowY = Builder.CreateShuffleVector(V: Y, Mask: Shuf.getShuffleMask());
2457	return SelectInst::Create(C: NarrowCond, S1: NarrowX, S2: NarrowY);
2458	}
2459
2460	/// Canonicalize FP negate/abs after shuffle.
2461	static Instruction *foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf,
2462	InstCombiner::BuilderTy &Builder) {
2463	auto *S0 = dyn_cast<Instruction>(Val: Shuf.getOperand(i_nocapture: `0`));
2464	Value *X;
2465	if (!S0 \|\| !match(V: S0, P: m_CombineOr(L: m_FNeg(X: m_Value(V&: X)), R: m_FAbs(Op0: m_Value(V&: X)))))
2466	return nullptr;
2467
2468	bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
2469
2470	// Match 1-input (unary) shuffle.
2471	// shuffle (fneg/fabs X), Mask --> fneg/fabs (shuffle X, Mask)
2472	if (S0->hasOneUse() && match(V: Shuf.getOperand(i_nocapture: `1`), P: m_Poison())) {
2473	Value *NewShuf = Builder.CreateShuffleVector(V: X, Mask: Shuf.getShuffleMask());
2474	if (IsFNeg)
2475	return UnaryOperator::CreateFNegFMF(Op: NewShuf, FMFSource: S0);
2476
2477	Function *FAbs = Intrinsic::getDeclaration(M: Shuf.getModule(),
2478	id: Intrinsic::fabs, Tys: Shuf.getType());
2479	CallInst *NewF = CallInst::Create(Func: FAbs, Args: {NewShuf});
2480	NewF->setFastMathFlags(S0->getFastMathFlags());
2481	return NewF;
2482	}
2483
2484	// Match 2-input (binary) shuffle.
2485	auto *S1 = dyn_cast<Instruction>(Val: Shuf.getOperand(i_nocapture: `1`));
2486	Value *Y;
2487	if (!S1 \|\| !match(V: S1, P: m_CombineOr(L: m_FNeg(X: m_Value(V&: Y)), R: m_FAbs(Op0: m_Value(V&: Y)))) \|\|
2488	S0->getOpcode() != S1->getOpcode() \|\|
2489	(!S0->hasOneUse() && !S1->hasOneUse()))
2490	return nullptr;
2491
2492	// shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
2493	Value *NewShuf = Builder.CreateShuffleVector(V1: X, V2: Y, Mask: Shuf.getShuffleMask());
2494	Instruction *NewF;
2495	if (IsFNeg) {
2496	NewF = UnaryOperator::CreateFNeg(V: NewShuf);
2497	} else {
2498	Function *FAbs = Intrinsic::getDeclaration(M: Shuf.getModule(),
2499	id: Intrinsic::fabs, Tys: Shuf.getType());
2500	NewF = CallInst::Create(Func: FAbs, Args: {NewShuf});
2501	}
2502	NewF->copyIRFlags(V: S0);
2503	NewF->andIRFlags(V: S1);
2504	return NewF;
2505	}
2506
2507	/// Canonicalize casts after shuffle.
2508	static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf,
2509	InstCombiner::BuilderTy &Builder) {
2510	// Do we have 2 matching cast operands?
2511	auto *Cast0 = dyn_cast<CastInst>(Val: Shuf.getOperand(i_nocapture: `0`));
2512	auto *Cast1 = dyn_cast<CastInst>(Val: Shuf.getOperand(i_nocapture: `1`));
2513	if (!Cast0 \|\| !Cast1 \|\| Cast0->getOpcode() != Cast1->getOpcode() \|\|
2514	Cast0->getSrcTy() != Cast1->getSrcTy())
2515	return nullptr;
2516
2517	// TODO: Allow other opcodes? That would require easing the type restrictions
2518	// below here.
2519	CastInst::CastOps CastOpcode = Cast0->getOpcode();
2520	switch (CastOpcode) {
2521	case Instruction::FPToSI:
2522	case Instruction::FPToUI:
2523	case Instruction::SIToFP:
2524	case Instruction::UIToFP:
2525	break;
2526	default:
2527	return nullptr;
2528	}
2529
2530	VectorType *ShufTy = Shuf.getType();
2531	VectorType *ShufOpTy = cast<VectorType>(Val: Shuf.getOperand(i_nocapture: `0`)->getType());
2532	VectorType *CastSrcTy = cast<VectorType>(Val: Cast0->getSrcTy());
2533
2534	// TODO: Allow length-increasing shuffles?
2535	if (ShufTy->getElementCount().getKnownMinValue() >
2536	ShufOpTy->getElementCount().getKnownMinValue())
2537	return nullptr;
2538
2539	// TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
2540	assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2541	"Expected fixed vector operands for casts and binary shuffle");
2542	if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2543	return nullptr;
2544
2545	// At least one of the operands must have only one use (the shuffle).
2546	if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2547	return nullptr;
2548
2549	// shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2550	Value *X = Cast0->getOperand(i_nocapture: `0`);
2551	Value *Y = Cast1->getOperand(i_nocapture: `0`);
2552	Value *NewShuf = Builder.CreateShuffleVector(V1: X, V2: Y, Mask: Shuf.getShuffleMask());
2553	return CastInst::Create(CastOpcode, S: NewShuf, Ty: ShufTy);
2554	}
2555
2556	/// Try to fold an extract subvector operation.
2557	static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
2558	Value Op0 = Shuf.getOperand(i_nocapture: `0`), Op1 = Shuf.getOperand(i_nocapture: `1`);
2559	if (!Shuf.isIdentityWithExtract() \|\| !match(V: Op1, P: m_Poison()))
2560	return nullptr;
2561
2562	// Check if we are extracting all bits of an inserted scalar:
2563	// extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2564	Value *X;
2565	if (match(V: Op0, P: m_BitCast(Op: m_InsertElt(Val: m_Value(), Elt: m_Value(V&: X), Idx: m_Zero()))) &&
2566	X->getType()->getPrimitiveSizeInBits() ==
2567	Shuf.getType()->getPrimitiveSizeInBits())
2568	return new BitCastInst (X, Shuf.getType());
2569
2570	// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2571	Value *Y;
2572	ArrayRef<int> Mask;
2573	if (!match(V: Op0, P: m_Shuffle(v1: m_Value(V&: X), v2: m_Value(V&: Y), mask: m_Mask (Mask))))
2574	return nullptr;
2575
2576	// Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2577	// then combining may result in worse codegen.
2578	if (!Op0->hasOneUse())
2579	return nullptr;
2580
2581	// We are extracting a subvector from a shuffle. Remove excess elements from
2582	// the 1st shuffle mask to eliminate the extract.
2583	//
2584	// This transform is conservatively limited to identity extracts because we do
2585	// not allow arbitrary shuffle mask creation as a target-independent transform
2586	// (because we can't guarantee that will lower efficiently).
2587	//
2588	// If the extracting shuffle has an poison mask element, it transfers to the
2589	// new shuffle mask. Otherwise, copy the original mask element. Example:
2590	// shuf (shuf X, Y, <C0, C1, C2, poison, C4>), poison, <0, poison, 2, 3> -->
2591	// shuf X, Y, <C0, poison, C2, poison>
2592	unsigned NumElts = cast<FixedVectorType>(Val: Shuf.getType())->getNumElements();
2593	SmallVector<int, `16`> NewMask(NumElts);
2594	assert(NumElts < Mask.size() &&
2595	"Identity with extract must have less elements than its inputs");
2596
2597	for (unsigned i = `0`; i != NumElts; ++i) {
2598	int ExtractMaskElt = Shuf.getMaskValue(Elt: i);
2599	int MaskElt = Mask [i];
2600	NewMask [i] = ExtractMaskElt == PoisonMaskElem ? ExtractMaskElt : MaskElt;
2601	}
2602	return new ShuffleVectorInst (X, Y, NewMask);
2603	}
2604
2605	/// Try to replace a shuffle with an insertelement or try to replace a shuffle
2606	/// operand with the operand of an insertelement.
2607	static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
2608	InstCombinerImpl &IC) {
2609	Value V0 = Shuf.getOperand(i_nocapture: `0`), V1 = Shuf.getOperand(i_nocapture: `1`);
2610	SmallVector<int, `16`> Mask;
2611	Shuf.getShuffleMask(Result&: Mask);
2612
2613	int NumElts = Mask.size();
2614	int InpNumElts = cast<FixedVectorType>(Val: V0->getType())->getNumElements();
2615
2616	// This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2617	// not be able to handle it there if the insertelement has >1 use.
2618	// If the shuffle has an insertelement operand but does not choose the
2619	// inserted scalar element from that value, then we can replace that shuffle
2620	// operand with the source vector of the insertelement.
2621	Value *X;
2622	uint64_t IdxC;
2623	if (match(V: V0, P: m_InsertElt(Val: m_Value(V&: X), Elt: m_Value(), Idx: m_ConstantInt(V&: IdxC)))) {
2624	// shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2625	if (!is_contained(Range&: Mask, Element: (int)IdxC))
2626	return IC.replaceOperand(I&: Shuf, OpNum: `0`, V: X);
2627	}
2628	if (match(V: V1, P: m_InsertElt(Val: m_Value(V&: X), Elt: m_Value(), Idx: m_ConstantInt(V&: IdxC)))) {
2629	// Offset the index constant by the vector width because we are checking for
2630	// accesses to the 2nd vector input of the shuffle.
2631	IdxC += InpNumElts;
2632	// shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2633	if (!is_contained(Range&: Mask, Element: (int)IdxC))
2634	return IC.replaceOperand(I&: Shuf, OpNum: `1`, V: X);
2635	}
2636	// For the rest of the transform, the shuffle must not change vector sizes.
2637	// TODO: This restriction could be removed if the insert has only one use
2638	// (because the transform would require a new length-changing shuffle).
2639	if (NumElts != InpNumElts)
2640	return nullptr;
2641
2642	// shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2643	auto isShufflingScalarIntoOp1 = [&](Value &Scalar, ConstantInt &IndexC) {
2644	// We need an insertelement with a constant index.
2645	if (!match(V: V0, P: m_InsertElt(Val: m_Value(), Elt: m_Value(V&: Scalar),
2646	Idx: m_ConstantInt(CI&: IndexC))))
2647	return false;
2648
2649	// Test the shuffle mask to see if it splices the inserted scalar into the
2650	// operand 1 vector of the shuffle.
2651	int NewInsIndex = -`1`;
2652	for (int i = `0`; i != NumElts; ++i) {
2653	// Ignore undef mask elements.
2654	if (Mask [i] == -`1`)
2655	continue;
2656
2657	// The shuffle takes elements of operand 1 without lane changes.
2658	if (Mask [i] == NumElts + i)
2659	continue;
2660
2661	// The shuffle must choose the inserted scalar exactly once.
2662	if (NewInsIndex != -`1` \|\| Mask [i] != IndexC->getSExtValue())
2663	return false;
2664
2665	// The shuffle is placing the inserted scalar into element i.
2666	NewInsIndex = i;
2667	}
2668
2669	assert(NewInsIndex != -`1` && "Did not fold shuffle with unused operand?");
2670
2671	// Index is updated to the potentially translated insertion lane.
2672	IndexC = ConstantInt::get(Ty: IndexC->getIntegerType(), V: NewInsIndex);
2673	return true;
2674	};
2675
2676	// If the shuffle is unnecessary, insert the scalar operand directly into
2677	// operand 1 of the shuffle. Example:
2678	// shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2679	Value *Scalar;
2680	ConstantInt *IndexC;
2681	if (isShufflingScalarIntoOp1 (Scalar, IndexC))
2682	return InsertElementInst::Create(Vec: V1, NewElt: Scalar, Idx: IndexC);
2683
2684	// Try again after commuting shuffle. Example:
2685	// shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2686	// shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2687	std::swap(a&: V0, b&: V1);
2688	ShuffleVectorInst::commuteShuffleMask(Mask, InVecNumElts: NumElts);
2689	if (isShufflingScalarIntoOp1 (Scalar, IndexC))
2690	return InsertElementInst::Create(Vec: V1, NewElt: Scalar, Idx: IndexC);
2691
2692	return nullptr;
2693	}
2694
2695	static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
2696	// Match the operands as identity with padding (also known as concatenation
2697	// with undef) shuffles of the same source type. The backend is expected to
2698	// recreate these concatenations from a shuffle of narrow operands.
2699	auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Val: Shuf.getOperand(i_nocapture: `0`));
2700	auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Val: Shuf.getOperand(i_nocapture: `1`));
2701	if (!Shuffle0 \|\| !Shuffle0->isIdentityWithPadding() \|\|
2702	!Shuffle1 \|\| !Shuffle1->isIdentityWithPadding())
2703	return nullptr;
2704
2705	// We limit this transform to power-of-2 types because we expect that the
2706	// backend can convert the simplified IR patterns to identical nodes as the
2707	// original IR.
2708	// TODO: If we can verify the same behavior for arbitrary types, the
2709	// power-of-2 checks can be removed.
2710	Value *X = Shuffle0->getOperand(i_nocapture: `0`);
2711	Value *Y = Shuffle1->getOperand(i_nocapture: `0`);
2712	if (X->getType() != Y->getType() \|\|
2713	!isPowerOf2_32(Value: cast<FixedVectorType>(Val: Shuf.getType())->getNumElements()) \|\|
2714	!isPowerOf2_32(
2715	Value: cast<FixedVectorType>(Val: Shuffle0->getType())->getNumElements()) \|\|
2716	!isPowerOf2_32(Value: cast<FixedVectorType>(Val: X->getType())->getNumElements()) \|\|
2717	match(V: X, P: m_Undef()) \|\| match(V: Y, P: m_Undef()))
2718	return nullptr;
2719	assert(match(Shuffle0->getOperand(`1`), m_Undef()) &&
2720	match(Shuffle1->getOperand(`1`), m_Undef()) &&
2721	"Unexpected operand for identity shuffle");
2722
2723	// This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2724	// operands directly by adjusting the shuffle mask to account for the narrower
2725	// types:
2726	// shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2727	int NarrowElts = cast<FixedVectorType>(Val: X->getType())->getNumElements();
2728	int WideElts = cast<FixedVectorType>(Val: Shuffle0->getType())->getNumElements();
2729	assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2730
2731	ArrayRef<int> Mask = Shuf.getShuffleMask();
2732	SmallVector<int, `16`> NewMask(Mask.size(), -`1`);
2733	for (int i = `0`, e = Mask.size(); i != e; ++i) {
2734	if (Mask [i] == -`1`)
2735	continue;
2736
2737	// If this shuffle is choosing an undef element from 1 of the sources, that
2738	// element is undef.
2739	if (Mask [i] < WideElts) {
2740	if (Shuffle0->getMaskValue(Elt: Mask [i]) == -`1`)
2741	continue;
2742	} else {
2743	if (Shuffle1->getMaskValue(Elt: Mask [i] - WideElts) == -`1`)
2744	continue;
2745	}
2746
2747	// If this shuffle is choosing from the 1st narrow op, the mask element is
2748	// the same. If this shuffle is choosing from the 2nd narrow op, the mask
2749	// element is offset down to adjust for the narrow vector widths.
2750	if (Mask [i] < WideElts) {
2751	assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2752	NewMask [i] = Mask [i];
2753	} else {
2754	assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2755	NewMask [i] = Mask [i] - (WideElts - NarrowElts);
2756	}
2757	}
2758	return new ShuffleVectorInst (X, Y, NewMask);
2759	}
2760
2761	// Splatting the first element of the result of a BinOp, where any of the
2762	// BinOp's operands are the result of a first element splat can be simplified to
2763	// splatting the first element of the result of the BinOp
2764	Instruction *InstCombinerImpl::simplifyBinOpSplats(ShuffleVectorInst &SVI) {
2765	if (!match(V: SVI.getOperand(i_nocapture: `1`), P: m_Poison()) \|\|
2766	!match(Mask: SVI.getShuffleMask(), P: m_ZeroMask ()) \|\|
2767	!SVI.getOperand(i_nocapture: `0`)->hasOneUse())
2768	return nullptr;
2769
2770	Value *Op0 = SVI.getOperand(i_nocapture: `0`);
2771	Value X, Y;
2772	if (!match(V: Op0, P: m_BinOp(L: m_Shuffle(v1: m_Value(V&: X), v2: m_Poison(), mask: m_ZeroMask ()),
2773	R: m_Value(V&: Y))) &&
2774	!match(V: Op0, P: m_BinOp(L: m_Value(V&: X),
2775	R: m_Shuffle(v1: m_Value(V&: Y), v2: m_Poison(), mask: m_ZeroMask ()))))
2776	return nullptr;
2777	if (X->getType() != Y->getType())
2778	return nullptr;
2779
2780	auto *BinOp = cast<BinaryOperator>(Val: Op0);
2781	if (!isSafeToSpeculativelyExecute(I: BinOp))
2782	return nullptr;
2783
2784	Value *NewBO = Builder.CreateBinOp(Opc: BinOp->getOpcode(), LHS: X, RHS: Y);
2785	if (auto NewBOI = dyn_cast<Instruction>(Val: NewBO))
2786	NewBOI->copyIRFlags(V: BinOp);
2787
2788	return new ShuffleVectorInst (NewBO, SVI.getShuffleMask());
2789	}
2790
2791	Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
2792	Value *LHS = SVI.getOperand(i_nocapture: `0`);
2793	Value *RHS = SVI.getOperand(i_nocapture: `1`);
2794	SimplifyQuery ShufQuery = SQ.getWithInstruction(I: &SVI);
2795	if (auto *V = simplifyShuffleVectorInst(Op0: LHS, Op1: RHS, Mask: SVI.getShuffleMask(),
2796	RetTy: SVI.getType(), Q: ShufQuery))
2797	return replaceInstUsesWith(I&: SVI, V);
2798
2799	if (Instruction *I = simplifyBinOpSplats(SVI))
2800	return I;
2801
2802	// Canonicalize splat shuffle to use poison RHS. Handle this explicitly in
2803	// order to support scalable vectors.
2804	if (match(Mask: SVI.getShuffleMask(), P: m_ZeroMask ()) && !isa<PoisonValue>(Val: RHS))
2805	return replaceOperand(I&: SVI, OpNum: `1`, V: PoisonValue::get(T: RHS->getType()));
2806
2807	if (isa<ScalableVectorType>(Val: LHS->getType()))
2808	return nullptr;
2809
2810	unsigned VWidth = cast<FixedVectorType>(Val: SVI.getType())->getNumElements();
2811	unsigned LHSWidth = cast<FixedVectorType>(Val: LHS->getType())->getNumElements();
2812
2813	// shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2814	//
2815	// if X and Y are of the same (vector) type, and the element size is not
2816	// changed by the bitcasts, we can distribute the bitcasts through the
2817	// shuffle, hopefully reducing the number of instructions. We make sure that
2818	// at least one bitcast only has one use, so we don't increase* the number of*
2819	// instructions here.
2820	Value X, Y;
2821	if (match(V: LHS, P: m_BitCast(Op: m_Value(V&: X))) && match(V: RHS, P: m_BitCast(Op: m_Value(V&: Y))) &&
2822	X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2823	X->getType()->getScalarSizeInBits() ==
2824	SVI.getType()->getScalarSizeInBits() &&
2825	(LHS->hasOneUse() \|\| RHS->hasOneUse())) {
2826	Value *V = Builder.CreateShuffleVector(V1: X, V2: Y, Mask: SVI.getShuffleMask(),
2827	Name: SVI.getName() + ".uncasted");
2828	return new BitCastInst (V, SVI.getType());
2829	}
2830
2831	ArrayRef<int> Mask = SVI.getShuffleMask();
2832
2833	// Peek through a bitcasted shuffle operand by scaling the mask. If the
2834	// simulated shuffle can simplify, then this shuffle is unnecessary:
2835	// shuf (bitcast X), undef, Mask --> bitcast X'
2836	// TODO: This could be extended to allow length-changing shuffles.
2837	// The transform might also be obsoleted if we allowed canonicalization
2838	// of bitcasted shuffles.
2839	if (match(V: LHS, P: m_BitCast(Op: m_Value(V&: X))) && match(V: RHS, P: m_Undef()) &&
2840	X->getType()->isVectorTy() && VWidth == LHSWidth) {
2841	// Try to create a scaled mask constant.
2842	auto *XType = cast<FixedVectorType>(Val: X->getType());
2843	unsigned XNumElts = XType->getNumElements();
2844	SmallVector<int, `16`> ScaledMask;
2845	if (scaleShuffleMaskElts(NumDstElts: XNumElts, Mask, ScaledMask)) {
2846	// If the shuffled source vector simplifies, cast that value to this
2847	// shuffle's type.
2848	if (auto *V = simplifyShuffleVectorInst(Op0: X, Op1: UndefValue::get(T: XType),
2849	Mask: ScaledMask, RetTy: XType, Q: ShufQuery))
2850	return BitCastInst::Create(Instruction::BitCast, S: V, Ty: SVI.getType());
2851	}
2852	}
2853
2854	// shuffle x, x, mask --> shuffle x, undef, mask'
2855	if (LHS == RHS) {
2856	assert(!match(RHS, m_Undef()) &&
2857	"Shuffle with 2 undef ops not simplified?");
2858	return new ShuffleVectorInst (LHS, createUnaryMask(Mask, NumElts: LHSWidth));
2859	}
2860
2861	// shuffle undef, x, mask --> shuffle x, undef, mask'
2862	if (match(V: LHS, P: m_Undef())) {
2863	SVI.commute();
2864	return &SVI;
2865	}
2866
2867	if (Instruction *I = canonicalizeInsertSplat(Shuf&: SVI, Builder))
2868	return I;
2869
2870	if (Instruction *I = foldSelectShuffle(Shuf&: SVI))
2871	return I;
2872
2873	if (Instruction *I = foldTruncShuffle(Shuf&: SVI, IsBigEndian: DL.isBigEndian()))
2874	return I;
2875
2876	if (Instruction *I = narrowVectorSelect(Shuf&: SVI, Builder))
2877	return I;
2878
2879	if (Instruction *I = foldShuffleOfUnaryOps(Shuf&: SVI, Builder))
2880	return I;
2881
2882	if (Instruction *I = foldCastShuffle(Shuf&: SVI, Builder))
2883	return I;
2884
2885	APInt PoisonElts(VWidth, `0`);
2886	APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
2887	if (Value *V = SimplifyDemandedVectorElts(V: &SVI, DemandedElts: AllOnesEltMask, PoisonElts)) {
2888	if (V != &SVI)
2889	return replaceInstUsesWith(I&: SVI, V);
2890	return &SVI;
2891	}
2892
2893	if (Instruction *I = foldIdentityExtractShuffle(Shuf&: SVI))
2894	return I;
2895
2896	// These transforms have the potential to lose undef knowledge, so they are
2897	// intentionally placed after SimplifyDemandedVectorElts().
2898	if (Instruction I = foldShuffleWithInsert(Shuf&: SVI, IC&: this))
2899	return I;
2900	if (Instruction *I = foldIdentityPaddedShuffles(Shuf&: SVI))
2901	return I;
2902
2903	if (match(V: RHS, P: m_Poison()) && canEvaluateShuffled(V: LHS, Mask)) {
2904	Value *V = evaluateInDifferentElementOrder(V: LHS, Mask, Builder);
2905	return replaceInstUsesWith(I&: SVI, V);
2906	}
2907
2908	// SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
2909	// a non-vector type. We can instead bitcast the original vector followed by
2910	// an extract of the desired element:
2911	//
2912	// %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
2913	// <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2914	// %1 = bitcast <4 x i8> %sroa to i32
2915	// Becomes:
2916	// %bc = bitcast <16 x i8> %in to <4 x i32>
2917	// %ext = extractelement <4 x i32> %bc, i32 0
2918	//
2919	// If the shuffle is extracting a contiguous range of values from the input
2920	// vector then each use which is a bitcast of the extracted size can be
2921	// replaced. This will work if the vector types are compatible, and the begin
2922	// index is aligned to a value in the casted vector type. If the begin index
2923	// isn't aligned then we can shuffle the original vector (keeping the same
2924	// vector type) before extracting.
2925	//
2926	// This code will bail out if the target type is fundamentally incompatible
2927	// with vectors of the source type.
2928	//
2929	// Example of <16 x i8>, target type i32:
2930	// Index range [4,8): v-----------v Will work.
2931	// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2932	// <16 x i8>: \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \|
2933	// <4 x i32>: \| \| \| \| \|
2934	// +-----------+-----------+-----------+-----------+
2935	// Index range [6,10): ^-----------^ Needs an extra shuffle.
2936	// Target type i40: ^--------------^ Won't work, bail.
2937	bool MadeChange = false;
2938	if (isShuffleExtractingFromLHS(SVI, Mask)) {
2939	Value *V = LHS;
2940	unsigned MaskElems = Mask.size();
2941	auto *SrcTy = cast<FixedVectorType>(Val: V->getType());
2942	unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedValue();
2943	unsigned SrcElemBitWidth = DL.getTypeSizeInBits(Ty: SrcTy->getElementType());
2944	assert(SrcElemBitWidth && "vector elements must have a bitwidth");
2945	unsigned SrcNumElems = SrcTy->getNumElements();
2946	SmallVector<BitCastInst *, `8`> BCs;
2947	DenseMap<Type , Value > NewBCs;
2948	for (User *U : SVI.users())
2949	if (BitCastInst *BC = dyn_cast<BitCastInst>(Val: U))
2950	if (!BC->use_empty())
2951	// Only visit bitcasts that weren't previously handled.
2952	BCs.push_back(Elt: BC);
2953	for (BitCastInst *BC : BCs) {
2954	unsigned BegIdx = Mask.front();
2955	Type *TgtTy = BC->getDestTy();
2956	unsigned TgtElemBitWidth = DL.getTypeSizeInBits(Ty: TgtTy);
2957	if (!TgtElemBitWidth)
2958	continue;
2959	unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
2960	bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
2961	bool BegIsAligned = `0` == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
2962	if (!VecBitWidthsEqual)
2963	continue;
2964	if (!VectorType::isValidElementType(ElemTy: TgtTy))
2965	continue;
2966	auto *CastSrcTy = FixedVectorType::get(ElementType: TgtTy, NumElts: TgtNumElems);
2967	if (!BegIsAligned) {
2968	// Shuffle the input so [0,NumElements) contains the output, and
2969	// [NumElems,SrcNumElems) is undef.
2970	SmallVector<int, `16`> ShuffleMask(SrcNumElems, -`1`);
2971	for (unsigned I = `0`, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
2972	ShuffleMask [I] = Idx;
2973	V = Builder.CreateShuffleVector(V, Mask: ShuffleMask,
2974	Name: SVI.getName() + ".extract");
2975	BegIdx = `0`;
2976	}
2977	unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
2978	assert(SrcElemsPerTgtElem);
2979	BegIdx /= SrcElemsPerTgtElem;
2980	bool BCAlreadyExists = NewBCs.contains(Val: CastSrcTy);
2981	auto *NewBC =
2982	BCAlreadyExists
2983	? NewBCs [CastSrcTy]
2984	: Builder.CreateBitCast(V, DestTy: CastSrcTy, Name: SVI.getName() + ".bc");
2985	if (!BCAlreadyExists)
2986	NewBCs [CastSrcTy] = NewBC;
2987	auto *Ext = Builder.CreateExtractElement(Vec: NewBC, Idx: BegIdx,
2988	Name: SVI.getName() + ".extract");
2989	// The shufflevector isn't being replaced: the bitcast that used it
2990	// is. InstCombine will visit the newly-created instructions.
2991	replaceInstUsesWith(I&: *BC, V: Ext);
2992	MadeChange = true;
2993	}
2994	}
2995
2996	// If the LHS is a shufflevector itself, see if we can combine it with this
2997	// one without producing an unusual shuffle.
2998	// Cases that might be simplified:
2999	// 1.
3000	// x1=shuffle(v1,v2,mask1)
3001	// x=shuffle(x1,undef,mask)
3002	// ==>
3003	// x=shuffle(v1,undef,newMask)
3004	// newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
3005	// 2.
3006	// x1=shuffle(v1,undef,mask1)
3007	// x=shuffle(x1,x2,mask)
3008	// where v1.size() == mask1.size()
3009	// ==>
3010	// x=shuffle(v1,x2,newMask)
3011	// newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
3012	// 3.
3013	// x2=shuffle(v2,undef,mask2)
3014	// x=shuffle(x1,x2,mask)
3015	// where v2.size() == mask2.size()
3016	// ==>
3017	// x=shuffle(x1,v2,newMask)
3018	// newMask[i] = (mask[i] < x1.size())
3019	// ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
3020	// 4.
3021	// x1=shuffle(v1,undef,mask1)
3022	// x2=shuffle(v2,undef,mask2)
3023	// x=shuffle(x1,x2,mask)
3024	// where v1.size() == v2.size()
3025	// ==>
3026	// x=shuffle(v1,v2,newMask)
3027	// newMask[i] = (mask[i] < x1.size())
3028	// ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
3029	//
3030	// Here we are really conservative:
3031	// we are absolutely afraid of producing a shuffle mask not in the input
3032	// program, because the code gen may not be smart enough to turn a merged
3033	// shuffle into two specific shuffles: it may produce worse code. As such,
3034	// we only merge two shuffles if the result is either a splat or one of the
3035	// input shuffle masks. In this case, merging the shuffles just removes
3036	// one instruction, which we know is safe. This is good for things like
3037	// turning: (splat(splat)) -> splat, or
3038	// merge(V[0..n], V[n+1..2n]) -> V[0..2n]
3039	ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(Val: LHS);
3040	ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(Val: RHS);
3041	if (LHSShuffle)
3042	if (!match(V: LHSShuffle->getOperand(i_nocapture: `1`), P: m_Poison()) &&
3043	!match(V: RHS, P: m_Poison()))
3044	LHSShuffle = nullptr;
3045	if (RHSShuffle)
3046	if (!match(V: RHSShuffle->getOperand(i_nocapture: `1`), P: m_Poison()))
3047	RHSShuffle = nullptr;
3048	if (!LHSShuffle && !RHSShuffle)
3049	return MadeChange ? &SVI : nullptr;
3050
3051	Value* LHSOp0 = nullptr;
3052	Value* LHSOp1 = nullptr;
3053	Value* RHSOp0 = nullptr;
3054	unsigned LHSOp0Width = `0`;
3055	unsigned RHSOp0Width = `0`;
3056	if (LHSShuffle) {
3057	LHSOp0 = LHSShuffle->getOperand(i_nocapture: `0`);
3058	LHSOp1 = LHSShuffle->getOperand(i_nocapture: `1`);
3059	LHSOp0Width = cast<FixedVectorType>(Val: LHSOp0->getType())->getNumElements();
3060	}
3061	if (RHSShuffle) {
3062	RHSOp0 = RHSShuffle->getOperand(i_nocapture: `0`);
3063	RHSOp0Width = cast<FixedVectorType>(Val: RHSOp0->getType())->getNumElements();
3064	}
3065	Value* newLHS = LHS;
3066	Value* newRHS = RHS;
3067	if (LHSShuffle) {
3068	// case 1
3069	if (match(V: RHS, P: m_Poison())) {
3070	newLHS = LHSOp0;
3071	newRHS = LHSOp1;
3072	}
3073	// case 2 or 4
3074	else if (LHSOp0Width == LHSWidth) {
3075	newLHS = LHSOp0;
3076	}
3077	}
3078	// case 3 or 4
3079	if (RHSShuffle && RHSOp0Width == LHSWidth) {
3080	newRHS = RHSOp0;
3081	}
3082	// case 4
3083	if (LHSOp0 == RHSOp0) {
3084	newLHS = LHSOp0;
3085	newRHS = nullptr;
3086	}
3087
3088	if (newLHS == LHS && newRHS == RHS)
3089	return MadeChange ? &SVI : nullptr;
3090
3091	ArrayRef<int> LHSMask;
3092	ArrayRef<int> RHSMask;
3093	if (newLHS != LHS)
3094	LHSMask = LHSShuffle->getShuffleMask();
3095	if (RHSShuffle && newRHS != RHS)
3096	RHSMask = RHSShuffle->getShuffleMask();
3097
3098	unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
3099	SmallVector<int, `16`> newMask;
3100	bool isSplat = true;
3101	int SplatElt = -`1`;
3102	// Create a new mask for the new ShuffleVectorInst so that the new
3103	// ShuffleVectorInst is equivalent to the original one.
3104	for (unsigned i = `0`; i < VWidth; ++i) {
3105	int eltMask;
3106	if (Mask [i] < `0`) {
3107	// This element is a poison value.
3108	eltMask = -`1`;
3109	} else if (Mask [i] < (int)LHSWidth) {
3110	// This element is from left hand side vector operand.
3111	//
3112	// If LHS is going to be replaced (case 1, 2, or 4), calculate the
3113	// new mask value for the element.
3114	if (newLHS != LHS) {
3115	eltMask = LHSMask [Mask [i]];
3116	// If the value selected is an poison value, explicitly specify it
3117	// with a -1 mask value.
3118	if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(Val: LHSOp1))
3119	eltMask = -`1`;
3120	} else
3121	eltMask = Mask [i];
3122	} else {
3123	// This element is from right hand side vector operand
3124	//
3125	// If the value selected is a poison value, explicitly specify it
3126	// with a -1 mask value. (case 1)
3127	if (match(V: RHS, P: m_Poison()))
3128	eltMask = -`1`;
3129	// If RHS is going to be replaced (case 3 or 4), calculate the
3130	// new mask value for the element.
3131	else if (newRHS != RHS) {
3132	eltMask = RHSMask [Mask [i]-LHSWidth];
3133	// If the value selected is an poison value, explicitly specify it
3134	// with a -1 mask value.
3135	if (eltMask >= (int)RHSOp0Width) {
3136	assert(match(RHSShuffle->getOperand(`1`), m_Poison()) &&
3137	"should have been check above");
3138	eltMask = -`1`;
3139	}
3140	} else
3141	eltMask = Mask [i]-LHSWidth;
3142
3143	// If LHS's width is changed, shift the mask value accordingly.
3144	// If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
3145	// references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
3146	// If newRHS == newLHS, we want to remap any references from newRHS to
3147	// newLHS so that we can properly identify splats that may occur due to
3148	// obfuscation across the two vectors.
3149	if (eltMask >= `0` && newRHS != nullptr && newLHS != newRHS)
3150	eltMask += newLHSWidth;
3151	}
3152
3153	// Check if this could still be a splat.
3154	if (eltMask >= `0`) {
3155	if (SplatElt >= `0` && SplatElt != eltMask)
3156	isSplat = false;
3157	SplatElt = eltMask;
3158	}
3159
3160	newMask.push_back(Elt: eltMask);
3161	}
3162
3163	// If the result mask is equal to one of the original shuffle masks,
3164	// or is a splat, do the replacement.
3165	if (isSplat \|\| newMask == LHSMask \|\| newMask == RHSMask \|\| newMask == Mask) {
3166	if (!newRHS)
3167	newRHS = PoisonValue::get(T: newLHS->getType());
3168	return new ShuffleVectorInst (newLHS, newRHS, newMask);
3169	}
3170
3171	return MadeChange ? &SVI : nullptr;
3172	}
3173

Browse the source code of llvm_projects/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp