InstCombineVectorOps.cpp source code [llvm_projects/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp]

1	//===- InstCombineVectorOps.cpp -------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements instcombine for ExtractElement, InsertElement and
10	// ShuffleVector.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "InstCombineInternal.h"
15	#include "llvm/ADT/APInt.h"
16	#include "llvm/ADT/ArrayRef.h"
17	#include "llvm/ADT/DenseMap.h"
18	#include "llvm/ADT/STLExtras.h"
19	#include "llvm/ADT/SmallBitVector.h"
20	#include "llvm/ADT/SmallVector.h"
21	#include "llvm/ADT/Statistic.h"
22	#include "llvm/Analysis/InstructionSimplify.h"
23	#include "llvm/Analysis/VectorUtils.h"
24	#include "llvm/IR/BasicBlock.h"
25	#include "llvm/IR/Constant.h"
26	#include "llvm/IR/Constants.h"
27	#include "llvm/IR/DerivedTypes.h"
28	#include "llvm/IR/InstrTypes.h"
29	#include "llvm/IR/Instruction.h"
30	#include "llvm/IR/Instructions.h"
31	#include "llvm/IR/Operator.h"
32	#include "llvm/IR/PatternMatch.h"
33	#include "llvm/IR/Type.h"
34	#include "llvm/IR/User.h"
35	#include "llvm/IR/Value.h"
36	#include "llvm/Support/Casting.h"
37	#include "llvm/Support/ErrorHandling.h"
38	#include "llvm/Transforms/InstCombine/InstCombiner.h"
39	#include <cassert>
40	#include <cstdint>
41	#include <iterator>
42	#include <utility>
43
44	#define DEBUG_TYPE "instcombine"
45
46	using namespace llvm;
47	using namespace PatternMatch;
48
49	STATISTIC(NumAggregateReconstructionsSimplified,
50	"Number of aggregate reconstructions turned into reuse of the "
51	"original aggregate");
52
53	/// Return true if the value is cheaper to scalarize than it is to leave as a
54	/// vector operation. If the extract index \p EI is a constant integer then
55	/// some operations may be cheap to scalarize.
56	///
57	/// FIXME: It's possible to create more instructions than previously existed.
58	static bool cheapToScalarize(Value V, Value EI) {
59	ConstantInt *CEI = dyn_cast<ConstantInt>(Val: EI);
60
61	// If we can pick a scalar constant value out of a vector, that is free.
62	if (auto *C = dyn_cast<Constant>(Val: V))
63	return CEI \|\| C->getSplatValue();
64
65	if (CEI && match(V, P: m_Intrinsic<Intrinsic::stepvector>())) {
66	ElementCount EC = cast<VectorType>(Val: V->getType())->getElementCount();
67	// Index needs to be lower than the minimum size of the vector, because
68	// for scalable vector, the vector size is known at run time.
69	return CEI->getValue().ult(RHS: EC.getKnownMinValue());
70	}
71
72	// An insertelement to the same constant index as our extract will simplify
73	// to the scalar inserted element. An insertelement to a different constant
74	// index is irrelevant to our extract.
75	if (match(V, P: m_InsertElt(Val: m_Value(), Elt: m_Value(), Idx: m_ConstantInt())))
76	return CEI;
77
78	if (match(V, P: m_OneUse(SubPattern: m_Load(Op: m_Value()))))
79	return true;
80
81	if (match(V, P: m_OneUse(SubPattern: m_UnOp())))
82	return true;
83
84	Value V0, V1;
85	if (match(V, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: V0), R: m_Value(V&: V1)))))
86	if (cheapToScalarize(V: V0, EI) \|\| cheapToScalarize(V: V1, EI))
87	return true;
88
89	CmpPredicate UnusedPred;
90	if (match(V, P: m_OneUse(SubPattern: m_Cmp(Pred&: UnusedPred, L: m_Value(V&: V0), R: m_Value(V&: V1)))))
91	if (cheapToScalarize(V: V0, EI) \|\| cheapToScalarize(V: V1, EI))
92	return true;
93
94	return false;
95	}
96
97	// If we have a PHI node with a vector type that is only used to feed
98	// itself and be an operand of extractelement at a constant location,
99	// try to replace the PHI of the vector type with a PHI of a scalar type.
100	Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
101	PHINode *PN) {
102	SmallVector<Instruction *, `2`> Extracts;
103	// The users we want the PHI to have are:
104	// 1) The EI ExtractElement (we already know this)
105	// 2) Possibly more ExtractElements with the same index.
106	// 3) Another operand, which will feed back into the PHI.
107	Instruction PHIUser = nullptr*;
108	for (auto *U : PN->users()) {
109	if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(Val: U)) {
110	if (EI.getIndexOperand() == EU->getIndexOperand())
111	Extracts.push_back(Elt: EU);
112	else
113	return nullptr;
114	} else if (!PHIUser) {
115	PHIUser = cast<Instruction>(Val: U);
116	} else {
117	return nullptr;
118	}
119	}
120
121	if (!PHIUser)
122	return nullptr;
123
124	// Verify that this PHI user has one use, which is the PHI itself,
125	// and that it is a binary operation which is cheap to scalarize.
126	// otherwise return nullptr.
127	if (!PHIUser->hasOneUse() \|\| !(PHIUser->user_back() == PN) \|\|
128	!(isa<BinaryOperator>(Val: PHIUser)) \|\|
129	!cheapToScalarize(V: PHIUser, EI: EI.getIndexOperand()))
130	return nullptr;
131
132	// Create a scalar PHI node that will replace the vector PHI node
133	// just before the current PHI node.
134	PHINode *scalarPHI = cast<PHINode>(Val: InsertNewInstWith(
135	New: PHINode::Create(Ty: EI.getType(), NumReservedValues: PN->getNumIncomingValues(), NameStr: ""), Old: PN->getIterator()));
136	// Scalarize each PHI operand.
137	for (unsigned i = `0`; i < PN->getNumIncomingValues(); i++) {
138	Value *PHIInVal = PN->getIncomingValue(i);
139	BasicBlock *inBB = PN->getIncomingBlock(i);
140	Value *Elt = EI.getIndexOperand();
141	// If the operand is the PHI induction variable:
142	if (PHIInVal == PHIUser) {
143	// Scalarize the binary operation. Its first operand is the
144	// scalar PHI, and the second operand is extracted from the other
145	// vector operand.
146	BinaryOperator *B0 = cast<BinaryOperator>(Val: PHIUser);
147	unsigned opId = (B0->getOperand(i_nocapture: `0`) == PN) ? `1` : `0`;
148	Value *Op = InsertNewInstWith(
149	New: ExtractElementInst::Create(Vec: B0->getOperand(i_nocapture: opId), Idx: Elt,
150	NameStr: B0->getOperand(i_nocapture: opId)->getName() + ".Elt"),
151	Old: B0->getIterator());
152	Value *newPHIUser = InsertNewInstWith(
153	New: BinaryOperator::CreateWithCopiedFlags(Opc: B0->getOpcode(),
154	V1: scalarPHI, V2: Op, CopyO: B0), Old: B0->getIterator());
155	scalarPHI->addIncoming(V: newPHIUser, BB: inBB);
156	} else {
157	// Scalarize PHI input:
158	Instruction *newEI = ExtractElementInst::Create(Vec: PHIInVal, Idx: Elt, NameStr: "");
159	// Insert the new instruction into the predecessor basic block.
160	Instruction *pos = dyn_cast<Instruction>(Val: PHIInVal);
161	BasicBlock::iterator InsertPos;
162	if (pos && !isa<PHINode>(Val: pos)) {
163	InsertPos = ++pos->getIterator();
164	} else {
165	InsertPos = inBB->getFirstInsertionPt();
166	}
167
168	InsertNewInstWith(New: newEI, Old: InsertPos);
169
170	scalarPHI->addIncoming(V: newEI, BB: inBB);
171	}
172	}
173
174	for (auto *E : Extracts) {
175	replaceInstUsesWith(I&: *E, V: scalarPHI);
176	// Add old extract to worklist for DCE.
177	addToWorklist(I: E);
178	}
179
180	return &EI;
181	}
182
183	Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
184	Value *X;
185	uint64_t ExtIndexC;
186	if (!match(V: Ext.getVectorOperand(), P: m_BitCast(Op: m_Value(V&: X))) \|\|
187	!match(V: Ext.getIndexOperand(), P: m_ConstantInt(V&: ExtIndexC)))
188	return nullptr;
189
190	ElementCount NumElts =
191	cast<VectorType>(Val: Ext.getVectorOperandType())->getElementCount();
192	Type *DestTy = Ext.getType();
193	unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
194	bool IsBigEndian = DL.isBigEndian();
195
196	// If we are casting an integer to vector and extracting a portion, that is
197	// a shift-right and truncate.
198	if (X->getType()->isIntegerTy()) {
199	assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
200	"Expected fixed vector type for bitcast from scalar integer");
201
202	// Big endian requires adjusting the extract index since MSB is at index 0.
203	// LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
204	// BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
205	if (IsBigEndian)
206	ExtIndexC = NumElts.getKnownMinValue() - `1` - ExtIndexC;
207	unsigned ShiftAmountC = ExtIndexC * DestWidth;
208	if ((!ShiftAmountC \|\|
209	isDesirableIntType(BitWidth: X->getType()->getPrimitiveSizeInBits())) &&
210	Ext.getVectorOperand()->hasOneUse()) {
211	if (ShiftAmountC)
212	X = Builder.CreateLShr(LHS: X, RHS: ShiftAmountC, Name: "extelt.offset");
213	if (DestTy->isFloatingPointTy()) {
214	Type *DstIntTy = IntegerType::getIntNTy(C&: X->getContext(), N: DestWidth);
215	Value *Trunc = Builder.CreateTrunc(V: X, DestTy: DstIntTy);
216	return new BitCastInst (Trunc, DestTy);
217	}
218	return new TruncInst (X, DestTy);
219	}
220	}
221
222	if (!X->getType()->isVectorTy())
223	return nullptr;
224
225	// If this extractelement is using a bitcast from a vector of the same number
226	// of elements, see if we can find the source element from the source vector:
227	// extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
228	auto *SrcTy = cast<VectorType>(Val: X->getType());
229	ElementCount NumSrcElts = SrcTy->getElementCount();
230	if (NumSrcElts == NumElts)
231	if (Value *Elt = findScalarElement(V: X, EltNo: ExtIndexC))
232	return new BitCastInst (Elt, DestTy);
233
234	assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
235	"Src and Dst must be the same sort of vector type");
236
237	// If the source elements are wider than the destination, try to shift and
238	// truncate a subset of scalar bits of an insert op.
239	if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
240	Value *Scalar;
241	Value *Vec;
242	uint64_t InsIndexC;
243	if (!match(V: X, P: m_InsertElt(Val: m_Value(V&: Vec), Elt: m_Value(V&: Scalar),
244	Idx: m_ConstantInt(V&: InsIndexC))))
245	return nullptr;
246
247	// The extract must be from the subset of vector elements that we inserted
248	// into. Example: if we inserted element 1 of a <2 x i64> and we are
249	// extracting an i16 (narrowing ratio = 4), then this extract must be from 1
250	// of elements 4-7 of the bitcasted vector.
251	unsigned NarrowingRatio =
252	NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
253
254	if (ExtIndexC / NarrowingRatio != InsIndexC) {
255	// Remove insertelement, if we don't use the inserted element.
256	// extractelement (bitcast (insertelement (Vec, b)), a) ->
257	// extractelement (bitcast (Vec), a)
258	// FIXME: this should be removed to SimplifyDemandedVectorElts,
259	// once scale vectors are supported.
260	if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
261	Value *NewBC = Builder.CreateBitCast(V: Vec, DestTy: Ext.getVectorOperandType());
262	return ExtractElementInst::Create(Vec: NewBC, Idx: Ext.getIndexOperand());
263	}
264	return nullptr;
265	}
266
267	// We are extracting part of the original scalar. How that scalar is
268	// inserted into the vector depends on the endian-ness. Example:
269	// Vector Byte Elt Index: 0 1 2 3 4 5 6 7
270	// +--+--+--+--+--+--+--+--+
271	// inselt <2 x i32> V, <i32> S, 1: \|V0\|V1\|V2\|V3\|S0\|S1\|S2\|S3\|
272	// extelt <4 x i16> V', 3: \| \|S2\|S3\|
273	// +--+--+--+--+--+--+--+--+
274	// If this is little-endian, S2\|S3 are the MSB of the 32-bit 'S' value.
275	// If this is big-endian, S2\|S3 are the LSB of the 32-bit 'S' value.
276	// In this example, we must right-shift little-endian. Big-endian is just a
277	// truncate.
278	unsigned Chunk = ExtIndexC % NarrowingRatio;
279	if (IsBigEndian)
280	Chunk = NarrowingRatio - `1` - Chunk;
281
282	// Bail out if this is an FP vector to FP vector sequence. That would take
283	// more instructions than we started with unless there is no shift, and it
284	// may not be handled as well in the backend.
285	bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
286	bool NeedDestBitcast = DestTy->isFloatingPointTy();
287	if (NeedSrcBitcast && NeedDestBitcast)
288	return nullptr;
289
290	unsigned SrcWidth = SrcTy->getScalarSizeInBits();
291	unsigned ShAmt = Chunk * DestWidth;
292
293	// TODO: This limitation is more strict than necessary. We could sum the
294	// number of new instructions and subtract the number eliminated to know if
295	// we can proceed.
296	if (!X->hasOneUse() \|\| !Ext.getVectorOperand()->hasOneUse())
297	if (NeedSrcBitcast \|\| NeedDestBitcast)
298	return nullptr;
299
300	if (NeedSrcBitcast) {
301	Type *SrcIntTy = IntegerType::getIntNTy(C&: Scalar->getContext(), N: SrcWidth);
302	Scalar = Builder.CreateBitCast(V: Scalar, DestTy: SrcIntTy);
303	}
304
305	if (ShAmt) {
306	// Bail out if we could end with more instructions than we started with.
307	if (!Ext.getVectorOperand()->hasOneUse())
308	return nullptr;
309	Scalar = Builder.CreateLShr(LHS: Scalar, RHS: ShAmt);
310	}
311
312	if (NeedDestBitcast) {
313	Type *DestIntTy = IntegerType::getIntNTy(C&: Scalar->getContext(), N: DestWidth);
314	return new BitCastInst (Builder.CreateTrunc(V: Scalar, DestTy: DestIntTy), DestTy);
315	}
316	return new TruncInst (Scalar, DestTy);
317	}
318
319	return nullptr;
320	}
321
322	/// Find elements of V demanded by UserInstr.
323	static APInt findDemandedEltsBySingleUser(Value V, Instruction UserInstr) {
324	unsigned VWidth = cast<FixedVectorType>(Val: V->getType())->getNumElements();
325
326	// Conservatively assume that all elements are needed.
327	APInt UsedElts(APInt::getAllOnes(numBits: VWidth));
328
329	switch (UserInstr->getOpcode()) {
330	case Instruction::ExtractElement: {
331	ExtractElementInst *EEI = cast<ExtractElementInst>(Val: UserInstr);
332	assert(EEI->getVectorOperand() == V);
333	ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(Val: EEI->getIndexOperand());
334	if (EEIIndexC && EEIIndexC->getValue().ult(RHS: VWidth)) {
335	UsedElts = APInt::getOneBitSet(numBits: VWidth, BitNo: EEIIndexC->getZExtValue());
336	}
337	break;
338	}
339	case Instruction::ShuffleVector: {
340	ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(Val: UserInstr);
341	unsigned MaskNumElts =
342	cast<FixedVectorType>(Val: UserInstr->getType())->getNumElements();
343
344	UsedElts = APInt (VWidth, `0`);
345	for (unsigned i = `0`; i < MaskNumElts; i++) {
346	unsigned MaskVal = Shuffle->getMaskValue(Elt: i);
347	if (MaskVal == -`1u` \|\| MaskVal >= `2` * VWidth)
348	continue;
349	if (Shuffle->getOperand(i_nocapture: `0`) == V && (MaskVal < VWidth))
350	UsedElts.setBit(MaskVal);
351	if (Shuffle->getOperand(i_nocapture: `1`) == V &&
352	((MaskVal >= VWidth) && (MaskVal < `2` * VWidth)))
353	UsedElts.setBit(MaskVal - VWidth);
354	}
355	break;
356	}
357	default:
358	break;
359	}
360	return UsedElts;
361	}
362
363	/// Find union of elements of V demanded by all its users.
364	/// If it is known by querying findDemandedEltsBySingleUser that
365	/// no user demands an element of V, then the corresponding bit
366	/// remains unset in the returned value.
367	static APInt findDemandedEltsByAllUsers(Value *V) {
368	unsigned VWidth = cast<FixedVectorType>(Val: V->getType())->getNumElements();
369
370	APInt UnionUsedElts(VWidth, `0`);
371	for (const Use &U : V->uses()) {
372	if (Instruction *I = dyn_cast<Instruction>(Val: U.getUser())) {
373	UnionUsedElts \|= findDemandedEltsBySingleUser(V, UserInstr: I);
374	} else {
375	UnionUsedElts = APInt::getAllOnes(numBits: VWidth);
376	break;
377	}
378
379	if (UnionUsedElts.isAllOnes())
380	break;
381	}
382
383	return UnionUsedElts;
384	}
385
386	/// Given a constant index for a extractelement or insertelement instruction,
387	/// return it with the canonical type if it isn't already canonical. We
388	/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
389	/// matter, we just want a consistent type to simplify CSE.
390	static ConstantInt getPreferredVectorIndex(ConstantInt IndexC) {
391	const unsigned IndexBW = IndexC->getBitWidth();
392	if (IndexBW == `64` \|\| IndexC->getValue().getActiveBits() > `64`)
393	return nullptr;
394	return ConstantInt::get(Context&: IndexC->getContext(),
395	V: IndexC->getValue().zextOrTrunc(width: `64`));
396	}
397
398	Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
399	Value *SrcVec = EI.getVectorOperand();
400	Value *Index = EI.getIndexOperand();
401	if (Value *V = simplifyExtractElementInst(Vec: SrcVec, Idx: Index,
402	Q: SQ.getWithInstruction(I: &EI)))
403	return replaceInstUsesWith(I&: EI, V);
404
405	// extractelt (select %x, %vec1, %vec2), %const ->
406	// select %x, %vec1[%const], %vec2[%const]
407	// TODO: Support constant folding of multiple select operands:
408	// extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)
409	// If the extractelement will for instance try to do out of bounds accesses
410	// because of the values of %c1 and/or %c2, the sequence could be optimized
411	// early. This is currently not possible because constant folding will reach
412	// an unreachable assertion if it doesn't find a constant operand.
413	if (SelectInst *SI = dyn_cast<SelectInst>(Val: EI.getVectorOperand()))
414	if (SI->getCondition()->getType()->isIntegerTy() &&
415	isa<Constant>(Val: EI.getIndexOperand()))
416	if (Instruction *R = FoldOpIntoSelect(Op&: EI, SI))
417	return R;
418
419	// If extracting a specified index from the vector, see if we can recursively
420	// find a previously computed scalar that was inserted into the vector.
421	auto *IndexC = dyn_cast<ConstantInt>(Val: Index);
422	bool HasKnownValidIndex = false;
423	if (IndexC) {
424	// Canonicalize type of constant indices to i64 to simplify CSE
425	if (auto *NewIdx = getPreferredVectorIndex(IndexC))
426	return replaceOperand(I&: EI, OpNum: `1`, V: NewIdx);
427
428	ElementCount EC = EI.getVectorOperandType()->getElementCount();
429	unsigned NumElts = EC.getKnownMinValue();
430	HasKnownValidIndex = IndexC->getValue().ult(RHS: NumElts);
431
432	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: SrcVec)) {
433	Intrinsic::ID IID = II->getIntrinsicID();
434	// Index needs to be lower than the minimum size of the vector, because
435	// for scalable vector, the vector size is known at run time.
436	if (IID == Intrinsic::stepvector && IndexC->getValue().ult(RHS: NumElts)) {
437	Type *Ty = EI.getType();
438	unsigned BitWidth = Ty->getIntegerBitWidth();
439	Value *Idx;
440	// Return index when its value does not exceed the allowed limit
441	// for the element type of the vector, otherwise return undefined.
442	if (IndexC->getValue().getActiveBits() <= BitWidth)
443	Idx = ConstantInt::get(Ty, V: IndexC->getValue().zextOrTrunc(width: BitWidth));
444	else
445	Idx = PoisonValue::get(T: Ty);
446	return replaceInstUsesWith(I&: EI, V: Idx);
447	}
448	}
449
450	// InstSimplify should handle cases where the index is invalid.
451	// For fixed-length vector, it's invalid to extract out-of-range element.
452	if (!EC.isScalable() && IndexC->getValue().uge(RHS: NumElts))
453	return nullptr;
454
455	if (Instruction *I = foldBitcastExtElt(Ext&: EI))
456	return I;
457
458	// If there's a vector PHI feeding a scalar use through this extractelement
459	// instruction, try to scalarize the PHI.
460	if (auto *Phi = dyn_cast<PHINode>(Val: SrcVec))
461	if (Instruction *ScalarPHI = scalarizePHI(EI, PN: Phi))
462	return ScalarPHI;
463	}
464
465	// TODO come up with a n-ary matcher that subsumes both unary and
466	// binary matchers.
467	UnaryOperator *UO;
468	if (match(V: SrcVec, P: m_UnOp(I&: UO)) && cheapToScalarize(V: SrcVec, EI: Index)) {
469	// extelt (unop X), Index --> unop (extelt X, Index)
470	Value *X = UO->getOperand(i_nocapture: `0`);
471	Value *E = Builder.CreateExtractElement(Vec: X, Idx: Index);
472	return UnaryOperator::CreateWithCopiedFlags(Opc: UO->getOpcode(), V: E, CopyO: UO);
473	}
474
475	// If the binop is not speculatable, we cannot hoist the extractelement if
476	// it may make the operand poison.
477	BinaryOperator *BO;
478	if (match(V: SrcVec, P: m_BinOp(I&: BO)) && cheapToScalarize(V: SrcVec, EI: Index) &&
479	(HasKnownValidIndex \|\|
480	isSafeToSpeculativelyExecuteWithVariableReplaced(I: BO))) {
481	// extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
482	Value X = BO->getOperand(i_nocapture: `0`), Y = BO->getOperand(i_nocapture: `1`);
483	Value *E0 = Builder.CreateExtractElement(Vec: X, Idx: Index);
484	Value *E1 = Builder.CreateExtractElement(Vec: Y, Idx: Index);
485	return BinaryOperator::CreateWithCopiedFlags(Opc: BO->getOpcode(), V1: E0, V2: E1, CopyO: BO);
486	}
487
488	Value X, Y;
489	CmpPredicate Pred;
490	if (match(V: SrcVec, P: m_Cmp(Pred, L: m_Value(V&: X), R: m_Value(V&: Y))) &&
491	cheapToScalarize(V: SrcVec, EI: Index)) {
492	// extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
493	Value *E0 = Builder.CreateExtractElement(Vec: X, Idx: Index);
494	Value *E1 = Builder.CreateExtractElement(Vec: Y, Idx: Index);
495	CmpInst *SrcCmpInst = cast<CmpInst>(Val: SrcVec);
496	return CmpInst::CreateWithCopiedFlags(Op: SrcCmpInst->getOpcode(), Pred, S1: E0, S2: E1,
497	FlagsSource: SrcCmpInst);
498	}
499
500	if (auto *I = dyn_cast<Instruction>(Val: SrcVec)) {
501	if (auto *IE = dyn_cast<InsertElementInst>(Val: I)) {
502	// instsimplify already handled the case where the indices are constants
503	// and equal by value, if both are constants, they must not be the same
504	// value, extract from the pre-inserted value instead.
505	if (isa<Constant>(Val: IE->getOperand(i_nocapture: `2`)) && IndexC)
506	return replaceOperand(I&: EI, OpNum: `0`, V: IE->getOperand(i_nocapture: `0`));
507	} else if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: I)) {
508	auto *VecType = cast<VectorType>(Val: GEP->getType());
509	ElementCount EC = VecType->getElementCount();
510	uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : `0`;
511	if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
512	// Find out why we have a vector result - these are a few examples:
513	// 1. We have a scalar pointer and a vector of indices, or
514	// 2. We have a vector of pointers and a scalar index, or
515	// 3. We have a vector of pointers and a vector of indices, etc.
516	// Here we only consider combining when there is exactly one vector
517	// operand, since the optimization is less obviously a win due to
518	// needing more than one extractelements.
519
520	unsigned VectorOps =
521	llvm::count_if(Range: GEP->operands(), P: [](const Value *V) {
522	return isa<VectorType>(Val: V->getType());
523	});
524	if (VectorOps == `1`) {
525	Value *NewPtr = GEP->getPointerOperand();
526	if (isa<VectorType>(Val: NewPtr->getType()))
527	NewPtr = Builder.CreateExtractElement(Vec: NewPtr, Idx: IndexC);
528
529	SmallVector<Value *> NewOps;
530	for (unsigned I = `1`; I != GEP->getNumOperands(); ++I) {
531	Value *Op = GEP->getOperand(i_nocapture: I);
532	if (isa<VectorType>(Val: Op->getType()))
533	NewOps.push_back(Elt: Builder.CreateExtractElement(Vec: Op, Idx: IndexC));
534	else
535	NewOps.push_back(Elt: Op);
536	}
537
538	GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
539	PointeeType: GEP->getSourceElementType(), Ptr: NewPtr, IdxList: NewOps);
540	NewGEP->setNoWrapFlags(GEP->getNoWrapFlags());
541	return NewGEP;
542	}
543	}
544	} else if (auto *SVI = dyn_cast<ShuffleVectorInst>(Val: I)) {
545	// If this is extracting an element from a shufflevector, figure out where
546	// it came from and extract from the appropriate input element instead.
547	// Restrict the following transformation to fixed-length vector.
548	if (isa<FixedVectorType>(Val: SVI->getType()) && isa<ConstantInt>(Val: Index)) {
549	int SrcIdx =
550	SVI->getMaskValue(Elt: cast<ConstantInt>(Val: Index)->getZExtValue());
551	Value *Src;
552	unsigned LHSWidth = cast<FixedVectorType>(Val: SVI->getOperand(i_nocapture: `0`)->getType())
553	->getNumElements();
554
555	if (SrcIdx < `0`)
556	return replaceInstUsesWith(I&: EI, V: PoisonValue::get(T: EI.getType()));
557	if (SrcIdx < (int)LHSWidth)
558	Src = SVI->getOperand(i_nocapture: `0`);
559	else {
560	SrcIdx -= LHSWidth;
561	Src = SVI->getOperand(i_nocapture: `1`);
562	}
563	Type *Int64Ty = Type::getInt64Ty(C&: EI.getContext());
564	return ExtractElementInst::Create(
565	Vec: Src, Idx: ConstantInt::get(Ty: Int64Ty, V: SrcIdx, IsSigned: false));
566	}
567	} else if (auto *CI = dyn_cast<CastInst>(Val: I)) {
568	// Canonicalize extractelement(cast) -> cast(extractelement).
569	// Bitcasts can change the number of vector elements, and they cost
570	// nothing.
571	if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
572	Value *EE = Builder.CreateExtractElement(Vec: CI->getOperand(i_nocapture: `0`), Idx: Index);
573	return CastInst::Create(CI->getOpcode(), S: EE, Ty: EI.getType());
574	}
575	}
576	}
577
578	// Run demanded elements after other transforms as this can drop flags on
579	// binops. If there's two paths to the same final result, we prefer the
580	// one which doesn't force us to drop flags.
581	if (IndexC) {
582	ElementCount EC = EI.getVectorOperandType()->getElementCount();
583	unsigned NumElts = EC.getKnownMinValue();
584	// This instruction only demands the single element from the input vector.
585	// Skip for scalable type, the number of elements is unknown at
586	// compile-time.
587	if (!EC.isScalable() && NumElts != `1`) {
588	// If the input vector has a single use, simplify it based on this use
589	// property.
590	if (SrcVec->hasOneUse()) {
591	APInt PoisonElts(NumElts, `0`);
592	APInt DemandedElts(NumElts, `0`);
593	DemandedElts.setBit(IndexC->getZExtValue());
594	if (Value *V =
595	SimplifyDemandedVectorElts(V: SrcVec, DemandedElts, PoisonElts))
596	return replaceOperand(I&: EI, OpNum: `0`, V);
597	} else {
598	// If the input vector has multiple uses, simplify it based on a union
599	// of all elements used.
600	APInt DemandedElts = findDemandedEltsByAllUsers(V: SrcVec);
601	if (!DemandedElts.isAllOnes()) {
602	APInt PoisonElts(NumElts, `0`);
603	if (Value *V = SimplifyDemandedVectorElts(
604	V: SrcVec, DemandedElts, PoisonElts, Depth: `0` / Depth /,
605	AllowMultipleUsers: true / AllowMultipleUsers /)) {
606	if (V != SrcVec) {
607	Worklist.addValue(V: SrcVec);
608	SrcVec->replaceAllUsesWith(V);
609	return &EI;
610	}
611	}
612	}
613	}
614	}
615	}
616	return nullptr;
617	}
618
619	/// If V is a shuffle of values that ONLY returns elements from either LHS or
620	/// RHS, return the shuffle mask and true. Otherwise, return false.
621	static bool collectSingleShuffleElements(Value V, Value LHS, Value *RHS,
622	SmallVectorImpl<int> &Mask) {
623	assert(LHS->getType() == RHS->getType() &&
624	"Invalid CollectSingleShuffleElements");
625	unsigned NumElts = cast<FixedVectorType>(Val: V->getType())->getNumElements();
626
627	if (match(V, P: m_Poison())) {
628	Mask.assign(NumElts, Elt: -`1`);
629	return true;
630	}
631
632	if (V == LHS) {
633	for (unsigned i = `0`; i != NumElts; ++i)
634	Mask.push_back(Elt: i);
635	return true;
636	}
637
638	if (V == RHS) {
639	for (unsigned i = `0`; i != NumElts; ++i)
640	Mask.push_back(Elt: i + NumElts);
641	return true;
642	}
643
644	if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(Val: V)) {
645	// If this is an insert of an extract from some other vector, include it.
646	Value *VecOp = IEI->getOperand(i_nocapture: `0`);
647	Value *ScalarOp = IEI->getOperand(i_nocapture: `1`);
648	Value *IdxOp = IEI->getOperand(i_nocapture: `2`);
649
650	if (!isa<ConstantInt>(Val: IdxOp))
651	return false;
652	unsigned InsertedIdx = cast<ConstantInt>(Val: IdxOp)->getZExtValue();
653
654	if (isa<PoisonValue>(Val: ScalarOp)) { // inserting poison into vector.
655	// We can handle this if the vector we are inserting into is
656	// transitively ok.
657	if (collectSingleShuffleElements(V: VecOp, LHS, RHS, Mask)) {
658	// If so, update the mask to reflect the inserted poison.
659	Mask [InsertedIdx] = -`1`;
660	return true;
661	}
662	} else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(Val: ScalarOp)){
663	if (isa<ConstantInt>(Val: EI->getOperand(i_nocapture: `1`))) {
664	unsigned ExtractedIdx =
665	cast<ConstantInt>(Val: EI->getOperand(i_nocapture: `1`))->getZExtValue();
666	unsigned NumLHSElts =
667	cast<FixedVectorType>(Val: LHS->getType())->getNumElements();
668
669	// This must be extracting from either LHS or RHS.
670	if (EI->getOperand(i_nocapture: `0`) == LHS \|\| EI->getOperand(i_nocapture: `0`) == RHS) {
671	// We can handle this if the vector we are inserting into is
672	// transitively ok.
673	if (collectSingleShuffleElements(V: VecOp, LHS, RHS, Mask)) {
674	// If so, update the mask to reflect the inserted value.
675	if (EI->getOperand(i_nocapture: `0`) == LHS) {
676	Mask [InsertedIdx % NumElts] = ExtractedIdx;
677	} else {
678	assert(EI->getOperand(`0`) == RHS);
679	Mask [InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
680	}
681	return true;
682	}
683	}
684	}
685	}
686	}
687
688	return false;
689	}
690
691	/// If we have insertion into a vector that is wider than the vector that we
692	/// are extracting from, try to widen the source vector to allow a single
693	/// shufflevector to replace one or more insert/extract pairs.
694	static bool replaceExtractElements(InsertElementInst *InsElt,
695	ExtractElementInst *ExtElt,
696	InstCombinerImpl &IC) {
697	auto *InsVecType = cast<FixedVectorType>(Val: InsElt->getType());
698	auto *ExtVecType = cast<FixedVectorType>(Val: ExtElt->getVectorOperandType());
699	unsigned NumInsElts = InsVecType->getNumElements();
700	unsigned NumExtElts = ExtVecType->getNumElements();
701
702	// The inserted-to vector must be wider than the extracted-from vector.
703	if (InsVecType->getElementType() != ExtVecType->getElementType() \|\|
704	NumExtElts >= NumInsElts)
705	return false;
706
707	// Create a shuffle mask to widen the extended-from vector using poison
708	// values. The mask selects all of the values of the original vector followed
709	// by as many poison values as needed to create a vector of the same length
710	// as the inserted-to vector.
711	SmallVector<int, `16`> ExtendMask;
712	for (unsigned i = `0`; i < NumExtElts; ++i)
713	ExtendMask.push_back(Elt: i);
714	for (unsigned i = NumExtElts; i < NumInsElts; ++i)
715	ExtendMask.push_back(Elt: -`1`);
716
717	Value *ExtVecOp = ExtElt->getVectorOperand();
718	auto *ExtVecOpInst = dyn_cast<Instruction>(Val: ExtVecOp);
719	BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(Val: ExtVecOpInst))
720	? ExtVecOpInst->getParent()
721	: ExtElt->getParent();
722
723	// TODO: This restriction matches the basic block check below when creating
724	// new extractelement instructions. If that limitation is removed, this one
725	// could also be removed. But for now, we just bail out to ensure that we
726	// will replace the extractelement instruction that is feeding our
727	// insertelement instruction. This allows the insertelement to then be
728	// replaced by a shufflevector. If the insertelement is not replaced, we can
729	// induce infinite looping because there's an optimization for extractelement
730	// that will delete our widening shuffle. This would trigger another attempt
731	// here to create that shuffle, and we spin forever.
732	if (InsertionBlock != InsElt->getParent())
733	return false;
734
735	// TODO: This restriction matches the check in visitInsertElementInst() and
736	// prevents an infinite loop caused by not turning the extract/insert pair
737	// into a shuffle. We really should not need either check, but we're lacking
738	// folds for shufflevectors because we're afraid to generate shuffle masks
739	// that the backend can't handle.
740	if (InsElt->hasOneUse() && isa<InsertElementInst>(Val: InsElt->user_back()))
741	return false;
742
743	auto WideVec = new* ShuffleVectorInst (ExtVecOp, ExtendMask);
744
745	// Insert the new shuffle after the vector operand of the extract is defined
746	// (as long as it's not a PHI) or at the start of the basic block of the
747	// extract, so any subsequent extracts in the same basic block can use it.
748	// TODO: Insert before the earliest ExtractElementInst that is replaced.
749	if (ExtVecOpInst && !isa<PHINode>(Val: ExtVecOpInst))
750	WideVec->insertAfter(InsertPos: ExtVecOpInst->getIterator());
751	else
752	IC.InsertNewInstWith(New: WideVec, Old: ExtElt->getParent()->getFirstInsertionPt());
753
754	// Replace extracts from the original narrow vector with extracts from the new
755	// wide vector.
756	for (User *U : ExtVecOp->users()) {
757	ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(Val: U);
758	if (!OldExt \|\| OldExt->getParent() != WideVec->getParent())
759	continue;
760	auto *NewExt = ExtractElementInst::Create(Vec: WideVec, Idx: OldExt->getOperand(i_nocapture: `1`));
761	IC.InsertNewInstWith(New: NewExt, Old: OldExt->getIterator());
762	IC.replaceInstUsesWith(I&: *OldExt, V: NewExt);
763	// Add the old extracts to the worklist for DCE. We can't remove the
764	// extracts directly, because they may still be used by the calling code.
765	IC.addToWorklist(I: OldExt);
766	}
767
768	return true;
769	}
770
771	/// We are building a shuffle to create V, which is a sequence of insertelement,
772	/// extractelement pairs. If PermittedRHS is set, then we must either use it or
773	/// not rely on the second vector source. Return a std::pair containing the
774	/// left and right vectors of the proposed shuffle (or 0), and set the Mask
775	/// parameter as required.
776	///
777	/// Note: we intentionally don't try to fold earlier shuffles since they have
778	/// often been chosen carefully to be efficiently implementable on the target.
779	using ShuffleOps = std::pair<Value , Value >;
780
781	static ShuffleOps collectShuffleElements(Value V, SmallVectorImpl<int*> &Mask,
782	Value *PermittedRHS,
783	InstCombinerImpl &IC, bool &Rerun) {
784	assert(V->getType()->isVectorTy() && "Invalid shuffle!");
785	unsigned NumElts = cast<FixedVectorType>(Val: V->getType())->getNumElements();
786
787	if (match(V, P: m_Poison())) {
788	Mask.assign(NumElts, Elt: -`1`);
789	return std::make_pair(
790	x: PermittedRHS ? PoisonValue::get(T: PermittedRHS->getType()) : V, y: nullptr);
791	}
792
793	if (isa<ConstantAggregateZero>(Val: V)) {
794	Mask.assign(NumElts, Elt: `0`);
795	return std::make_pair(x&: V, y: nullptr);
796	}
797
798	if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(Val: V)) {
799	// If this is an insert of an extract from some other vector, include it.
800	Value *VecOp = IEI->getOperand(i_nocapture: `0`);
801	Value *ScalarOp = IEI->getOperand(i_nocapture: `1`);
802	Value *IdxOp = IEI->getOperand(i_nocapture: `2`);
803
804	if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(Val: ScalarOp)) {
805	if (isa<ConstantInt>(Val: EI->getOperand(i_nocapture: `1`)) && isa<ConstantInt>(Val: IdxOp)) {
806	unsigned ExtractedIdx =
807	cast<ConstantInt>(Val: EI->getOperand(i_nocapture: `1`))->getZExtValue();
808	unsigned InsertedIdx = cast<ConstantInt>(Val: IdxOp)->getZExtValue();
809
810	// Either the extracted from or inserted into vector must be RHSVec,
811	// otherwise we'd end up with a shuffle of three inputs.
812	if (EI->getOperand(i_nocapture: `0`) == PermittedRHS \|\| PermittedRHS == nullptr) {
813	Value *RHS = EI->getOperand(i_nocapture: `0`);
814	ShuffleOps LR = collectShuffleElements(V: VecOp, Mask, PermittedRHS: RHS, IC, Rerun);
815	assert(LR.second == nullptr \|\| LR.second == RHS);
816
817	if (LR.first->getType() != RHS->getType()) {
818	// Although we are giving up for now, see if we can create extracts
819	// that match the inserts for another round of combining.
820	if (replaceExtractElements(InsElt: IEI, ExtElt: EI, IC))
821	Rerun = true;
822
823	// We tried our best, but we can't find anything compatible with RHS
824	// further up the chain. Return a trivial shuffle.
825	for (unsigned i = `0`; i < NumElts; ++i)
826	Mask [i] = i;
827	return std::make_pair(x&: V, y: nullptr);
828	}
829
830	unsigned NumLHSElts =
831	cast<FixedVectorType>(Val: RHS->getType())->getNumElements();
832	Mask [InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
833	return std::make_pair(x&: LR.first, y&: RHS);
834	}
835
836	if (VecOp == PermittedRHS) {
837	// We've gone as far as we can: anything on the other side of the
838	// extractelement will already have been converted into a shuffle.
839	unsigned NumLHSElts =
840	cast<FixedVectorType>(Val: EI->getOperand(i_nocapture: `0`)->getType())
841	->getNumElements();
842	for (unsigned i = `0`; i != NumElts; ++i)
843	Mask.push_back(Elt: i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
844	return std::make_pair(x: EI->getOperand(i_nocapture: `0`), y&: PermittedRHS);
845	}
846
847	// If this insertelement is a chain that comes from exactly these two
848	// vectors, return the vector and the effective shuffle.
849	if (EI->getOperand(i_nocapture: `0`)->getType() == PermittedRHS->getType() &&
850	collectSingleShuffleElements(V: IEI, LHS: EI->getOperand(i_nocapture: `0`), RHS: PermittedRHS,
851	Mask))
852	return std::make_pair(x: EI->getOperand(i_nocapture: `0`), y&: PermittedRHS);
853	}
854	}
855	}
856
857	// Otherwise, we can't do anything fancy. Return an identity vector.
858	for (unsigned i = `0`; i != NumElts; ++i)
859	Mask.push_back(Elt: i);
860	return std::make_pair(x&: V, y: nullptr);
861	}
862
863	/// Look for chain of insertvalue's that fully define an aggregate, and trace
864	/// back the values inserted, see if they are all were extractvalue'd from
865	/// the same source aggregate from the exact same element indexes.
866	/// If they were, just reuse the source aggregate.
867	/// This potentially deals with PHI indirections.
868	Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
869	InsertValueInst &OrigIVI) {
870	Type *AggTy = OrigIVI.getType();
871	unsigned NumAggElts;
872	switch (AggTy->getTypeID()) {
873	case Type::StructTyID:
874	NumAggElts = AggTy->getStructNumElements();
875	break;
876	case Type::ArrayTyID:
877	NumAggElts = AggTy->getArrayNumElements();
878	break;
879	default:
880	llvm_unreachable("Unhandled aggregate type?");
881	}
882
883	// Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
884	// to handle clang C++ exception struct (which is hardcoded as {i8, i32}),*
885	// FIXME: any interesting patterns to be caught with larger limit?
886	assert(NumAggElts > `0` && "Aggregate should have elements.");
887	if (NumAggElts > `2`)
888	return nullptr;
889
890	static constexpr auto NotFound = std::nullopt;
891	static constexpr auto FoundMismatch = nullptr;
892
893	// Try to find a value of each element of an aggregate.
894	// FIXME: deal with more complex, not one-dimensional, aggregate types
895	SmallVector<std::optional<Instruction *>, `2`> AggElts(NumAggElts, NotFound);
896
897	// Do we know values for each element of the aggregate?
898	auto KnowAllElts = [&AggElts]() {
899	return !llvm::is_contained(Range&: AggElts, Element: NotFound);
900	};
901
902	int Depth = `0`;
903
904	// Arbitrary `insertvalue` visitation depth limit. Let's be okay with
905	// every element being overwritten twice, which should never happen.
906	static const int DepthLimit = `2` * NumAggElts;
907
908	// Recurse up the chain of `insertvalue` aggregate operands until either we've
909	// reconstructed full initializer or can't visit any more `insertvalue`'s.
910	for (InsertValueInst *CurrIVI = &OrigIVI;
911	Depth < DepthLimit && CurrIVI && !KnowAllElts ();
912	CurrIVI = dyn_cast<InsertValueInst>(Val: CurrIVI->getAggregateOperand()),
913	++Depth) {
914	auto *InsertedValue =
915	dyn_cast<Instruction>(Val: CurrIVI->getInsertedValueOperand());
916	if (!InsertedValue)
917	return nullptr; // Inserted value must be produced by an instruction.
918
919	ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
920
921	// Don't bother with more than single-level aggregates.
922	if (Indices.size() != `1`)
923	return nullptr; // FIXME: deal with more complex aggregates?
924
925	// Now, we may have already previously recorded the value for this element
926	// of an aggregate. If we did, that means the CurrIVI will later be
927	// overwritten with the already-recorded value. But if not, let's record it!
928	std::optional<Instruction *> &Elt = AggElts [Indices.front()];
929	Elt = Elt.value_or(u&: InsertedValue);
930
931	// FIXME: should we handle chain-terminating undef base operand?
932	}
933
934	// Was that sufficient to deduce the full initializer for the aggregate?
935	if (!KnowAllElts ())
936	return nullptr; // Give up then.
937
938	// We now want to find the source[s] of the aggregate elements we've found.
939	// And with "source" we mean the original aggregate[s] from which
940	// the inserted elements were extracted. This may require PHI translation.
941
942	enum class AggregateDescription {
943	/// When analyzing the value that was inserted into an aggregate, we did
944	/// not manage to find defining `extractvalue` instruction to analyze.
945	NotFound,
946	/// When analyzing the value that was inserted into an aggregate, we did
947	/// manage to find defining `extractvalue` instruction[s], and everything
948	/// matched perfectly - aggregate type, element insertion/extraction index.
949	Found,
950	/// When analyzing the value that was inserted into an aggregate, we did
951	/// manage to find defining `extractvalue` instruction, but there was
952	/// a mismatch: either the source type from which the extraction was didn't
953	/// match the aggregate type into which the insertion was,
954	/// or the extraction/insertion channels mismatched,
955	/// or different elements had different source aggregates.
956	FoundMismatch
957	};
958	auto Describe = [](std::optional<Value *> SourceAggregate) {
959	if (SourceAggregate == NotFound)
960	return AggregateDescription::NotFound;
961	if (*SourceAggregate == FoundMismatch)
962	return AggregateDescription::FoundMismatch;
963	return AggregateDescription::Found;
964	};
965
966	// If an aggregate element is defined in UseBB, we can't use it in PredBB.
967	bool EltDefinedInUseBB = false;
968
969	// Given the value \p Elt that was being inserted into element \p EltIdx of an
970	// aggregate AggTy, see if \p Elt was originally defined by an
971	// appropriate extractvalue (same element index, same aggregate type).
972	// If found, return the source aggregate from which the extraction was.
973	// If \p PredBB is provided, does PHI translation of an \p Elt first.
974	auto FindSourceAggregate =
975	[&](Instruction Elt, unsigned* EltIdx, std::optional<BasicBlock *> UseBB,
976	std::optional<BasicBlock > PredBB) -> std::optional<Value > {
977	// For now(?), only deal with, at most, a single level of PHI indirection.
978	if (UseBB && PredBB) {
979	Elt = dyn_cast<Instruction>(Val: Elt->DoPHITranslation(CurBB: UseBB, PredBB: PredBB));
980	if (Elt && Elt->getParent() == *UseBB)
981	EltDefinedInUseBB = true;
982	}
983	// FIXME: deal with multiple levels of PHI indirection?
984
985	// Did we find an extraction?
986	auto *EVI = dyn_cast_or_null<ExtractValueInst>(Val: Elt);
987	if (!EVI)
988	return NotFound;
989
990	Value *SourceAggregate = EVI->getAggregateOperand();
991
992	// Is the extraction from the same type into which the insertion was?
993	if (SourceAggregate->getType() != AggTy)
994	return FoundMismatch;
995	// And the element index doesn't change between extraction and insertion?
996	if (EVI->getNumIndices() != `1` \|\| EltIdx != EVI->getIndices().front())
997	return FoundMismatch;
998
999	return SourceAggregate; // AggregateDescription::Found
1000	};
1001
1002	// Given elements AggElts that were constructing an aggregate OrigIVI,
1003	// see if we can find appropriate source aggregate for each of the elements,
1004	// and see it's the same aggregate for each element. If so, return it.
1005	auto FindCommonSourceAggregate =
1006	[&](std::optional<BasicBlock *> UseBB,
1007	std::optional<BasicBlock > PredBB) -> std::optional<Value > {
1008	std::optional<Value *> SourceAggregate;
1009
1010	for (auto I : enumerate(First&: AggElts)) {
1011	assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
1012	"We don't store nullptr in SourceAggregate!");
1013	assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
1014	(I.index() != `0`) &&
1015	"SourceAggregate should be valid after the first element,");
1016
1017	// For this element, is there a plausible source aggregate?
1018	// FIXME: we could special-case undef element, IFF we know that in the
1019	// source aggregate said element isn't poison.
1020	std::optional<Value *> SourceAggregateForElement =
1021	FindSourceAggregate (*I.value(), I.index(), UseBB, PredBB);
1022
1023	// Okay, what have we found? Does that correlate with previous findings?
1024
1025	// Regardless of whether or not we have previously found source
1026	// aggregate for previous elements (if any), if we didn't find one for
1027	// this element, passthrough whatever we have just found.
1028	if (Describe (SourceAggregateForElement) != AggregateDescription::Found)
1029	return SourceAggregateForElement;
1030
1031	// Okay, we have found source aggregate for this element.
1032	// Let's see what we already know from previous elements, if any.
1033	switch (Describe (SourceAggregate)) {
1034	case AggregateDescription::NotFound:
1035	// This is apparently the first element that we have examined.
1036	SourceAggregate = SourceAggregateForElement; // Record the aggregate!
1037	continue; // Great, now look at next element.
1038	case AggregateDescription::Found:
1039	// We have previously already successfully examined other elements.
1040	// Is this the same source aggregate we've found for other elements?
1041	if (SourceAggregateForElement != SourceAggregate)
1042	return FoundMismatch;
1043	continue; // Still the same aggregate, look at next element.
1044	case AggregateDescription::FoundMismatch:
1045	llvm_unreachable("Can't happen. We would have early-exited then.");
1046	};
1047	}
1048
1049	assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1050	"Must be a valid Value");
1051	return *SourceAggregate;
1052	};
1053
1054	std::optional<Value *> SourceAggregate;
1055
1056	// Can we find the source aggregate without looking at predecessors?
1057	SourceAggregate = FindCommonSourceAggregate (/UseBB=/std::nullopt,
1058	/PredBB=/std::nullopt);
1059	if (Describe (SourceAggregate) != AggregateDescription::NotFound) {
1060	if (Describe (SourceAggregate) == AggregateDescription::FoundMismatch)
1061	return nullptr; // Conflicting source aggregates!
1062	++NumAggregateReconstructionsSimplified;
1063	return replaceInstUsesWith(I&: OrigIVI, V: *SourceAggregate);
1064	}
1065
1066	// Okay, apparently we need to look at predecessors.
1067
1068	// We should be smart about picking the "use" basic block, which will be the
1069	// merge point for aggregate, where we'll insert the final PHI that will be
1070	// used instead of OrigIVI. Basic block of OrigIVI is not* the right choice.*
1071	// We should look in which blocks each of the AggElts is being defined,
1072	// they all should be defined in the same basic block.
1073	BasicBlock UseBB = nullptr*;
1074
1075	for (const std::optional<Instruction *> &I : AggElts) {
1076	BasicBlock BB = (I)->getParent();
1077	// If it's the first instruction we've encountered, record the basic block.
1078	if (!UseBB) {
1079	UseBB = BB;
1080	continue;
1081	}
1082	// Otherwise, this must be the same basic block we've seen previously.
1083	if (UseBB != BB)
1084	return nullptr;
1085	}
1086
1087	// If all* of the elements are basic-block-independent, meaning they are*
1088	// either function arguments, or constant expressions, then if we didn't
1089	// handle them without predecessor-aware handling, we won't handle them now.
1090	if (!UseBB)
1091	return nullptr;
1092
1093	// If we didn't manage to find source aggregate without looking at
1094	// predecessors, and there are no predecessors to look at, then we're done.
1095	if (pred_empty(BB: UseBB))
1096	return nullptr;
1097
1098	// Arbitrary predecessor count limit.
1099	static const int PredCountLimit = `64`;
1100
1101	// Cache the (non-uniqified!) list of predecessors in a vector,
1102	// checking the limit at the same time for efficiency.
1103	SmallVector<BasicBlock , `4`> Preds; // May have duplicates!*
1104	for (BasicBlock *Pred : predecessors(BB: UseBB)) {
1105	// Don't bother if there are too many predecessors.
1106	if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1107	return nullptr;
1108	Preds.emplace_back(Args&: Pred);
1109	}
1110
1111	// For each predecessor, what is the source aggregate,
1112	// from which all the elements were originally extracted from?
1113	// Note that we want for the map to have stable iteration order!
1114	SmallMapVector<BasicBlock , Value , `4`> SourceAggregates;
1115	bool FoundSrcAgg = false;
1116	for (BasicBlock *Pred : Preds) {
1117	std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1118	SourceAggregates.try_emplace(Key: Pred);
1119	// Did we already evaluate this predecessor?
1120	if (!IV.second)
1121	continue;
1122
1123	// Let's hope that when coming from predecessor Pred, all elements of the
1124	// aggregate produced by OrigIVI must have been originally extracted from
1125	// the same aggregate. Is that so? Can we find said original aggregate?
1126	SourceAggregate = FindCommonSourceAggregate (UseBB, Pred);
1127	if (Describe (SourceAggregate) == AggregateDescription::Found) {
1128	FoundSrcAgg = true;
1129	IV.first->second = *SourceAggregate;
1130	} else {
1131	// If UseBB is the single successor of Pred, we can add InsertValue to
1132	// Pred.
1133	auto *BI = dyn_cast<BranchInst>(Val: Pred->getTerminator());
1134	if (!BI \|\| !BI->isUnconditional())
1135	return nullptr;
1136	}
1137	}
1138
1139	if (!FoundSrcAgg)
1140	return nullptr;
1141
1142	// Do some sanity check if we need to add insertvalue into predecessors.
1143	auto OrigBB = OrigIVI.getParent();
1144	for (auto &It : SourceAggregates) {
1145	if (Describe (It.second) == AggregateDescription::Found)
1146	continue;
1147
1148	// Element is defined in UseBB, so it can't be used in predecessors.
1149	if (EltDefinedInUseBB)
1150	return nullptr;
1151
1152	// Do this transformation cross loop boundary may cause dead loop. So we
1153	// should avoid this situation. But LoopInfo is not generally available, we
1154	// must be conservative here.
1155	// If OrigIVI is in UseBB and it's the only successor of PredBB, PredBB
1156	// can't be in inner loop.
1157	if (UseBB != OrigBB)
1158	return nullptr;
1159
1160	// Avoid constructing constant aggregate because constant value may expose
1161	// more optimizations.
1162	bool ConstAgg = true;
1163	for (auto Val : AggElts) {
1164	Value Elt = (Val)->DoPHITranslation(CurBB: UseBB, PredBB: It.first);
1165	if (!isa<Constant>(Val: Elt)) {
1166	ConstAgg = false;
1167	break;
1168	}
1169	}
1170	if (ConstAgg)
1171	return nullptr;
1172	}
1173
1174	// For predecessors without appropriate source aggregate, create one in the
1175	// predecessor.
1176	for (auto &It : SourceAggregates) {
1177	if (Describe (It.second) == AggregateDescription::Found)
1178	continue;
1179
1180	BasicBlock *Pred = It.first;
1181	Builder.SetInsertPoint(Pred->getTerminator());
1182	Value *V = PoisonValue::get(T: AggTy);
1183	for (auto [Idx, Val] : enumerate(First&: AggElts)) {
1184	Value Elt = (Val)->DoPHITranslation(CurBB: UseBB, PredBB: Pred);
1185	V = Builder.CreateInsertValue(Agg: V, Val: Elt, Idxs: Idx);
1186	}
1187
1188	It.second = V;
1189	}
1190
1191	// All good! Now we just need to thread the source aggregates here.
1192	// Note that we have to insert the new PHI here, ourselves, because we can't
1193	// rely on InstCombinerImpl::run() inserting it into the right basic block.
1194	// Note that the same block can be a predecessor more than once,
1195	// and we need to preserve that invariant for the PHI node.
1196	BuilderTy::InsertPointGuard Guard(Builder);
1197	Builder.SetInsertPoint(TheBB: UseBB, IP: UseBB->getFirstNonPHIIt());
1198	auto *PHI =
1199	Builder.CreatePHI(Ty: AggTy, NumReservedValues: Preds.size(), Name: OrigIVI.getName() + ".merged");
1200	for (BasicBlock *Pred : Preds)
1201	PHI->addIncoming(V: SourceAggregates [Pred], BB: Pred);
1202
1203	++NumAggregateReconstructionsSimplified;
1204	return replaceInstUsesWith(I&: OrigIVI, V: PHI);
1205	}
1206
1207	/// Try to find redundant insertvalue instructions, like the following ones:
1208	/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1209	/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1210	/// Here the second instruction inserts values at the same indices, as the
1211	/// first one, making the first one redundant.
1212	/// It should be transformed to:
1213	/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1214	Instruction *InstCombinerImpl::visitInsertValueInst(InsertValueInst &I) {
1215	if (Value *V = simplifyInsertValueInst(
1216	Agg: I.getAggregateOperand(), Val: I.getInsertedValueOperand(), Idxs: I.getIndices(),
1217	Q: SQ.getWithInstruction(I: &I)))
1218	return replaceInstUsesWith(I, V);
1219
1220	bool IsRedundant = false;
1221	ArrayRef<unsigned int> FirstIndices = I.getIndices();
1222
1223	// If there is a chain of insertvalue instructions (each of them except the
1224	// last one has only one use and it's another insertvalue insn from this
1225	// chain), check if any of the 'children' uses the same indices as the first
1226	// instruction. In this case, the first one is redundant.
1227	Value *V = &I;
1228	unsigned Depth = `0`;
1229	while (V->hasOneUse() && Depth < `10`) {
1230	User *U = V->user_back();
1231	auto UserInsInst = dyn_cast<InsertValueInst>(Val: U);
1232	if (!UserInsInst \|\| U->getOperand(i: `0`) != V)
1233	break;
1234	if (UserInsInst->getIndices() == FirstIndices) {
1235	IsRedundant = true;
1236	break;
1237	}
1238	V = UserInsInst;
1239	Depth++;
1240	}
1241
1242	if (IsRedundant)
1243	return replaceInstUsesWith(I, V: I.getOperand(i_nocapture: `0`));
1244
1245	if (Instruction *NewI = foldAggregateConstructionIntoAggregateReuse(OrigIVI&: I))
1246	return NewI;
1247
1248	return nullptr;
1249	}
1250
1251	static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
1252	// Can not analyze scalable type, the number of elements is not a compile-time
1253	// constant.
1254	if (isa<ScalableVectorType>(Val: Shuf.getOperand(i_nocapture: `0`)->getType()))
1255	return false;
1256
1257	int MaskSize = Shuf.getShuffleMask().size();
1258	int VecSize =
1259	cast<FixedVectorType>(Val: Shuf.getOperand(i_nocapture: `0`)->getType())->getNumElements();
1260
1261	// A vector select does not change the size of the operands.
1262	if (MaskSize != VecSize)
1263	return false;
1264
1265	// Each mask element must be undefined or choose a vector element from one of
1266	// the source operands without crossing vector lanes.
1267	for (int i = `0`; i != MaskSize; ++i) {
1268	int Elt = Shuf.getMaskValue(Elt: i);
1269	if (Elt != -`1` && Elt != i && Elt != i + VecSize)
1270	return false;
1271	}
1272
1273	return true;
1274	}
1275
1276	/// Turn a chain of inserts that splats a value into an insert + shuffle:
1277	/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1278	/// shufflevector(insertelt(X, %k, 0), poison, zero)
1279	static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
1280	// We are interested in the last insert in a chain. So if this insert has a
1281	// single user and that user is an insert, bail.
1282	if (InsElt.hasOneUse() && isa<InsertElementInst>(Val: InsElt.user_back()))
1283	return nullptr;
1284
1285	VectorType *VecTy = InsElt.getType();
1286	// Can not handle scalable type, the number of elements is not a compile-time
1287	// constant.
1288	if (isa<ScalableVectorType>(Val: VecTy))
1289	return nullptr;
1290	unsigned NumElements = cast<FixedVectorType>(Val: VecTy)->getNumElements();
1291
1292	// Do not try to do this for a one-element vector, since that's a nop,
1293	// and will cause an inf-loop.
1294	if (NumElements == `1`)
1295	return nullptr;
1296
1297	Value *SplatVal = InsElt.getOperand(i_nocapture: `1`);
1298	InsertElementInst *CurrIE = &InsElt;
1299	SmallBitVector ElementPresent(NumElements, false);
1300	InsertElementInst FirstIE = nullptr*;
1301
1302	// Walk the chain backwards, keeping track of which indices we inserted into,
1303	// until we hit something that isn't an insert of the splatted value.
1304	while (CurrIE) {
1305	auto *Idx = dyn_cast<ConstantInt>(Val: CurrIE->getOperand(i_nocapture: `2`));
1306	if (!Idx \|\| CurrIE->getOperand(i_nocapture: `1`) != SplatVal)
1307	return nullptr;
1308
1309	auto *NextIE = dyn_cast<InsertElementInst>(Val: CurrIE->getOperand(i_nocapture: `0`));
1310	// Check none of the intermediate steps have any additional uses, except
1311	// for the root insertelement instruction, which can be re-used, if it
1312	// inserts at position 0.
1313	if (CurrIE != &InsElt &&
1314	(!CurrIE->hasOneUse() && (NextIE != nullptr \|\| !Idx->isZero())))
1315	return nullptr;
1316
1317	ElementPresent [Idx->getZExtValue()] = true;
1318	FirstIE = CurrIE;
1319	CurrIE = NextIE;
1320	}
1321
1322	// If this is just a single insertelement (not a sequence), we are done.
1323	if (FirstIE == &InsElt)
1324	return nullptr;
1325
1326	// If we are not inserting into a poison vector, make sure we've seen an
1327	// insert into every element.
1328	// TODO: If the base vector is not undef, it might be better to create a splat
1329	// and then a select-shuffle (blend) with the base vector.
1330	if (!match(V: FirstIE->getOperand(i_nocapture: `0`), P: m_Poison()))
1331	if (!ElementPresent.all())
1332	return nullptr;
1333
1334	// Create the insert + shuffle.
1335	Type *Int64Ty = Type::getInt64Ty(C&: InsElt.getContext());
1336	PoisonValue *PoisonVec = PoisonValue::get(T: VecTy);
1337	Constant *Zero = ConstantInt::get(Ty: Int64Ty, V: `0`);
1338	if (!cast<ConstantInt>(Val: FirstIE->getOperand(i_nocapture: `2`))->isZero())
1339	FirstIE = InsertElementInst::Create(Vec: PoisonVec, NewElt: SplatVal, Idx: Zero, NameStr: "",
1340	InsertBefore: InsElt.getIterator());
1341
1342	// Splat from element 0, but replace absent elements with poison in the mask.
1343	SmallVector<int, `16`> Mask(NumElements, `0`);
1344	for (unsigned i = `0`; i != NumElements; ++i)
1345	if (!ElementPresent [i])
1346	Mask [i] = -`1`;
1347
1348	return new ShuffleVectorInst (FirstIE, Mask);
1349	}
1350
1351	/// Try to fold an insert element into an existing splat shuffle by changing
1352	/// the shuffle's mask to include the index of this insert element.
1353	static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
1354	// Check if the vector operand of this insert is a canonical splat shuffle.
1355	auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: InsElt.getOperand(i_nocapture: `0`));
1356	if (!Shuf \|\| !Shuf->isZeroEltSplat())
1357	return nullptr;
1358
1359	// Bail out early if shuffle is scalable type. The number of elements in
1360	// shuffle mask is unknown at compile-time.
1361	if (isa<ScalableVectorType>(Val: Shuf->getType()))
1362	return nullptr;
1363
1364	// Check for a constant insertion index.
1365	uint64_t IdxC;
1366	if (!match(V: InsElt.getOperand(i_nocapture: `2`), P: m_ConstantInt(V&: IdxC)))
1367	return nullptr;
1368
1369	// Check if the splat shuffle's input is the same as this insert's scalar op.
1370	Value *X = InsElt.getOperand(i_nocapture: `1`);
1371	Value *Op0 = Shuf->getOperand(i_nocapture: `0`);
1372	if (!match(V: Op0, P: m_InsertElt(Val: m_Undef(), Elt: m_Specific(V: X), Idx: m_ZeroInt())))
1373	return nullptr;
1374
1375	// Replace the shuffle mask element at the index of this insert with a zero.
1376	// For example:
1377	// inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1378	// --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1379	unsigned NumMaskElts =
1380	cast<FixedVectorType>(Val: Shuf->getType())->getNumElements();
1381	SmallVector<int, `16`> NewMask(NumMaskElts);
1382	for (unsigned i = `0`; i != NumMaskElts; ++i)
1383	NewMask [i] = i == IdxC ? `0` : Shuf->getMaskValue(Elt: i);
1384
1385	return new ShuffleVectorInst (Op0, NewMask);
1386	}
1387
1388	/// Try to fold an extract+insert element into an existing identity shuffle by
1389	/// changing the shuffle's mask to include the index of this insert element.
1390	static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {
1391	// Check if the vector operand of this insert is an identity shuffle.
1392	auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: InsElt.getOperand(i_nocapture: `0`));
1393	if (!Shuf \|\| !match(V: Shuf->getOperand(i_nocapture: `1`), P: m_Poison()) \|\|
1394	!(Shuf->isIdentityWithExtract() \|\| Shuf->isIdentityWithPadding()))
1395	return nullptr;
1396
1397	// Bail out early if shuffle is scalable type. The number of elements in
1398	// shuffle mask is unknown at compile-time.
1399	if (isa<ScalableVectorType>(Val: Shuf->getType()))
1400	return nullptr;
1401
1402	// Check for a constant insertion index.
1403	uint64_t IdxC;
1404	if (!match(V: InsElt.getOperand(i_nocapture: `2`), P: m_ConstantInt(V&: IdxC)))
1405	return nullptr;
1406
1407	// Check if this insert's scalar op is extracted from the identity shuffle's
1408	// input vector.
1409	Value *Scalar = InsElt.getOperand(i_nocapture: `1`);
1410	Value *X = Shuf->getOperand(i_nocapture: `0`);
1411	if (!match(V: Scalar, P: m_ExtractElt(Val: m_Specific(V: X), Idx: m_SpecificInt(V: IdxC))))
1412	return nullptr;
1413
1414	// Replace the shuffle mask element at the index of this extract+insert with
1415	// that same index value.
1416	// For example:
1417	// inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1418	unsigned NumMaskElts =
1419	cast<FixedVectorType>(Val: Shuf->getType())->getNumElements();
1420	SmallVector<int, `16`> NewMask(NumMaskElts);
1421	ArrayRef<int> OldMask = Shuf->getShuffleMask();
1422	for (unsigned i = `0`; i != NumMaskElts; ++i) {
1423	if (i != IdxC) {
1424	// All mask elements besides the inserted element remain the same.
1425	NewMask [i] = OldMask [i];
1426	} else if (OldMask [i] == (int)IdxC) {
1427	// If the mask element was already set, there's nothing to do
1428	// (demanded elements analysis may unset it later).
1429	return nullptr;
1430	} else {
1431	assert(OldMask[i] == PoisonMaskElem &&
1432	"Unexpected shuffle mask element for identity shuffle");
1433	NewMask [i] = IdxC;
1434	}
1435	}
1436
1437	return new ShuffleVectorInst (X, Shuf->getOperand(i_nocapture: `1`), NewMask);
1438	}
1439
1440	/// If we have an insertelement instruction feeding into another insertelement
1441	/// and the 2nd is inserting a constant into the vector, canonicalize that
1442	/// constant insertion before the insertion of a variable:
1443	///
1444	/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1445	/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1446	///
1447	/// This has the potential of eliminating the 2nd insertelement instruction
1448	/// via constant folding of the scalar constant into a vector constant.
1449	static Instruction *hoistInsEltConst(InsertElementInst &InsElt2,
1450	InstCombiner::BuilderTy &Builder) {
1451	auto *InsElt1 = dyn_cast<InsertElementInst>(Val: InsElt2.getOperand(i_nocapture: `0`));
1452	if (!InsElt1 \|\| !InsElt1->hasOneUse())
1453	return nullptr;
1454
1455	Value X, Y;
1456	Constant *ScalarC;
1457	ConstantInt IdxC1, IdxC2;
1458	if (match(V: InsElt1->getOperand(i_nocapture: `0`), P: m_Value(V&: X)) &&
1459	match(V: InsElt1->getOperand(i_nocapture: `1`), P: m_Value(V&: Y)) && !isa<Constant>(Val: Y) &&
1460	match(V: InsElt1->getOperand(i_nocapture: `2`), P: m_ConstantInt(CI&: IdxC1)) &&
1461	match(V: InsElt2.getOperand(i_nocapture: `1`), P: m_Constant(C&: ScalarC)) &&
1462	match(V: InsElt2.getOperand(i_nocapture: `2`), P: m_ConstantInt(CI&: IdxC2)) && IdxC1 != IdxC2) {
1463	Value *NewInsElt1 = Builder.CreateInsertElement(Vec: X, NewElt: ScalarC, Idx: IdxC2);
1464	return InsertElementInst::Create(Vec: NewInsElt1, NewElt: Y, Idx: IdxC1);
1465	}
1466
1467	return nullptr;
1468	}
1469
1470	/// insertelt (shufflevector X, CVec, Mask\|insertelt X, C1, CIndex1), C, CIndex
1471	/// --> shufflevector X, CVec', Mask'
1472	static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
1473	auto *Inst = dyn_cast<Instruction>(Val: InsElt.getOperand(i_nocapture: `0`));
1474	// Bail out if the parent has more than one use. In that case, we'd be
1475	// replacing the insertelt with a shuffle, and that's not a clear win.
1476	if (!Inst \|\| !Inst->hasOneUse())
1477	return nullptr;
1478	if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: InsElt.getOperand(i_nocapture: `0`))) {
1479	// The shuffle must have a constant vector operand. The insertelt must have
1480	// a constant scalar being inserted at a constant position in the vector.
1481	Constant ShufConstVec, InsEltScalar;
1482	uint64_t InsEltIndex;
1483	if (!match(V: Shuf->getOperand(i_nocapture: `1`), P: m_Constant(C&: ShufConstVec)) \|\|
1484	!match(V: InsElt.getOperand(i_nocapture: `1`), P: m_Constant(C&: InsEltScalar)) \|\|
1485	!match(V: InsElt.getOperand(i_nocapture: `2`), P: m_ConstantInt(V&: InsEltIndex)))
1486	return nullptr;
1487
1488	// Adding an element to an arbitrary shuffle could be expensive, but a
1489	// shuffle that selects elements from vectors without crossing lanes is
1490	// assumed cheap.
1491	// If we're just adding a constant into that shuffle, it will still be
1492	// cheap.
1493	if (!isShuffleEquivalentToSelect(Shuf&: *Shuf))
1494	return nullptr;
1495
1496	// From the above 'select' check, we know that the mask has the same number
1497	// of elements as the vector input operands. We also know that each constant
1498	// input element is used in its lane and can not be used more than once by
1499	// the shuffle. Therefore, replace the constant in the shuffle's constant
1500	// vector with the insertelt constant. Replace the constant in the shuffle's
1501	// mask vector with the insertelt index plus the length of the vector
1502	// (because the constant vector operand of a shuffle is always the 2nd
1503	// operand).
1504	ArrayRef<int> Mask = Shuf->getShuffleMask();
1505	unsigned NumElts = Mask.size();
1506	SmallVector<Constant *, `16`> NewShufElts(NumElts);
1507	SmallVector<int, `16`> NewMaskElts(NumElts);
1508	for (unsigned I = `0`; I != NumElts; ++I) {
1509	if (I == InsEltIndex) {
1510	NewShufElts [I] = InsEltScalar;
1511	NewMaskElts [I] = InsEltIndex + NumElts;
1512	} else {
1513	// Copy over the existing values.
1514	NewShufElts [I] = ShufConstVec->getAggregateElement(Elt: I);
1515	NewMaskElts [I] = Mask [I];
1516	}
1517
1518	// Bail if we failed to find an element.
1519	if (!NewShufElts [I])
1520	return nullptr;
1521	}
1522
1523	// Create new operands for a shuffle that includes the constant of the
1524	// original insertelt. The old shuffle will be dead now.
1525	return new ShuffleVectorInst (Shuf->getOperand(i_nocapture: `0`),
1526	ConstantVector::get(V: NewShufElts), NewMaskElts);
1527	} else if (auto *IEI = dyn_cast<InsertElementInst>(Val: Inst)) {
1528	// Transform sequences of insertelements ops with constant data/indexes into
1529	// a single shuffle op.
1530	// Can not handle scalable type, the number of elements needed to create
1531	// shuffle mask is not a compile-time constant.
1532	if (isa<ScalableVectorType>(Val: InsElt.getType()))
1533	return nullptr;
1534	unsigned NumElts =
1535	cast<FixedVectorType>(Val: InsElt.getType())->getNumElements();
1536
1537	uint64_t InsertIdx[`2`];
1538	Constant *Val[`2`];
1539	if (!match(V: InsElt.getOperand(i_nocapture: `2`), P: m_ConstantInt(V&: InsertIdx[`0`])) \|\|
1540	!match(V: InsElt.getOperand(i_nocapture: `1`), P: m_Constant(C&: Val[`0`])) \|\|
1541	!match(V: IEI->getOperand(i_nocapture: `2`), P: m_ConstantInt(V&: InsertIdx[`1`])) \|\|
1542	!match(V: IEI->getOperand(i_nocapture: `1`), P: m_Constant(C&: Val[`1`])))
1543	return nullptr;
1544	SmallVector<Constant *, `16`> Values(NumElts);
1545	SmallVector<int, `16`> Mask(NumElts);
1546	auto ValI = std::begin(arr&: Val);
1547	// Generate new constant vector and mask.
1548	// We have 2 values/masks from the insertelements instructions. Insert them
1549	// into new value/mask vectors.
1550	for (uint64_t I : InsertIdx) {
1551	if (!Values [I]) {
1552	Values [I] = *ValI;
1553	Mask [I] = NumElts + I;
1554	}
1555	++ValI;
1556	}
1557	// Remaining values are filled with 'poison' values.
1558	for (unsigned I = `0`; I < NumElts; ++I) {
1559	if (!Values [I]) {
1560	Values [I] = PoisonValue::get(T: InsElt.getType()->getElementType());
1561	Mask [I] = I;
1562	}
1563	}
1564	// Create new operands for a shuffle that includes the constant of the
1565	// original insertelt.
1566	return new ShuffleVectorInst (IEI->getOperand(i_nocapture: `0`),
1567	ConstantVector::get(V: Values), Mask);
1568	}
1569	return nullptr;
1570	}
1571
1572	/// If both the base vector and the inserted element are extended from the same
1573	/// type, do the insert element in the narrow source type followed by extend.
1574	/// TODO: This can be extended to include other cast opcodes, but particularly
1575	/// if we create a wider insertelement, make sure codegen is not harmed.
1576	static Instruction *narrowInsElt(InsertElementInst &InsElt,
1577	InstCombiner::BuilderTy &Builder) {
1578	// We are creating a vector extend. If the original vector extend has another
1579	// use, that would mean we end up with 2 vector extends, so avoid that.
1580	// TODO: We could ease the use-clause to "if at least one op has one use"
1581	// (assuming that the source types match - see next TODO comment).
1582	Value *Vec = InsElt.getOperand(i_nocapture: `0`);
1583	if (!Vec->hasOneUse())
1584	return nullptr;
1585
1586	Value *Scalar = InsElt.getOperand(i_nocapture: `1`);
1587	Value X, Y;
1588	CastInst::CastOps CastOpcode;
1589	if (match(V: Vec, P: m_FPExt(Op: m_Value(V&: X))) && match(V: Scalar, P: m_FPExt(Op: m_Value(V&: Y))))
1590	CastOpcode = Instruction::FPExt;
1591	else if (match(V: Vec, P: m_SExt(Op: m_Value(V&: X))) && match(V: Scalar, P: m_SExt(Op: m_Value(V&: Y))))
1592	CastOpcode = Instruction::SExt;
1593	else if (match(V: Vec, P: m_ZExt(Op: m_Value(V&: X))) && match(V: Scalar, P: m_ZExt(Op: m_Value(V&: Y))))
1594	CastOpcode = Instruction::ZExt;
1595	else
1596	return nullptr;
1597
1598	// TODO: We can allow mismatched types by creating an intermediate cast.
1599	if (X->getType()->getScalarType() != Y->getType())
1600	return nullptr;
1601
1602	// inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1603	Value *NewInsElt = Builder.CreateInsertElement(Vec: X, NewElt: Y, Idx: InsElt.getOperand(i_nocapture: `2`));
1604	return CastInst::Create(CastOpcode, S: NewInsElt, Ty: InsElt.getType());
1605	}
1606
1607	/// If we are inserting 2 halves of a value into adjacent elements of a vector,
1608	/// try to convert to a single insert with appropriate bitcasts.
1609	static Instruction *foldTruncInsEltPair(InsertElementInst &InsElt,
1610	bool IsBigEndian,
1611	InstCombiner::BuilderTy &Builder) {
1612	Value *VecOp = InsElt.getOperand(i_nocapture: `0`);
1613	Value *ScalarOp = InsElt.getOperand(i_nocapture: `1`);
1614	Value *IndexOp = InsElt.getOperand(i_nocapture: `2`);
1615
1616	// Pattern depends on endian because we expect lower index is inserted first.
1617	// Big endian:
1618	// inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1
1619	// Little endian:
1620	// inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1
1621	// Note: It is not safe to do this transform with an arbitrary base vector
1622	// because the bitcast of that vector to fewer/larger elements could
1623	// allow poison to spill into an element that was not poison before.
1624	// TODO: Detect smaller fractions of the scalar.
1625	// TODO: One-use checks are conservative.
1626	auto *VTy = dyn_cast<FixedVectorType>(Val: InsElt.getType());
1627	Value Scalar0, BaseVec;
1628	uint64_t Index0, Index1;
1629	if (!VTy \|\| (VTy->getNumElements() & `1`) \|\|
1630	!match(V: IndexOp, P: m_ConstantInt(V&: Index1)) \|\|
1631	!match(V: VecOp, P: m_InsertElt(Val: m_Value(V&: BaseVec), Elt: m_Value(V&: Scalar0),
1632	Idx: m_ConstantInt(V&: Index0))) \|\|
1633	!match(V: BaseVec, P: m_Undef()))
1634	return nullptr;
1635
1636	// The first insert must be to the index one less than this one, and
1637	// the first insert must be to an even index.
1638	if (Index0 + `1` != Index1 \|\| Index0 & `1`)
1639	return nullptr;
1640
1641	// For big endian, the high half of the value should be inserted first.
1642	// For little endian, the low half of the value should be inserted first.
1643	Value *X;
1644	uint64_t ShAmt;
1645	if (IsBigEndian) {
1646	if (!match(V: ScalarOp, P: m_Trunc(Op: m_Value(V&: X))) \|\|
1647	!match(V: Scalar0, P: m_Trunc(Op: m_LShr(L: m_Specific(V: X), R: m_ConstantInt(V&: ShAmt)))))
1648	return nullptr;
1649	} else {
1650	if (!match(V: Scalar0, P: m_Trunc(Op: m_Value(V&: X))) \|\|
1651	!match(V: ScalarOp, P: m_Trunc(Op: m_LShr(L: m_Specific(V: X), R: m_ConstantInt(V&: ShAmt)))))
1652	return nullptr;
1653	}
1654
1655	Type *SrcTy = X->getType();
1656	unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
1657	unsigned VecEltWidth = VTy->getScalarSizeInBits();
1658	if (ScalarWidth != VecEltWidth * `2` \|\| ShAmt != VecEltWidth)
1659	return nullptr;
1660
1661	// Bitcast the base vector to a vector type with the source element type.
1662	Type *CastTy = FixedVectorType::get(ElementType: SrcTy, NumElts: VTy->getNumElements() / `2`);
1663	Value *CastBaseVec = Builder.CreateBitCast(V: BaseVec, DestTy: CastTy);
1664
1665	// Scale the insert index for a vector with half as many elements.
1666	// bitcast (inselt (bitcast BaseVec), X, NewIndex)
1667	uint64_t NewIndex = IsBigEndian ? Index1 / `2` : Index0 / `2`;
1668	Value *NewInsert = Builder.CreateInsertElement(Vec: CastBaseVec, NewElt: X, Idx: NewIndex);
1669	return new BitCastInst (NewInsert, VTy);
1670	}
1671
1672	Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
1673	Value *VecOp = IE.getOperand(i_nocapture: `0`);
1674	Value *ScalarOp = IE.getOperand(i_nocapture: `1`);
1675	Value *IdxOp = IE.getOperand(i_nocapture: `2`);
1676
1677	if (auto *V = simplifyInsertElementInst(
1678	Vec: VecOp, Elt: ScalarOp, Idx: IdxOp, Q: SQ.getWithInstruction(I: &IE)))
1679	return replaceInstUsesWith(I&: IE, V);
1680
1681	// Canonicalize type of constant indices to i64 to simplify CSE
1682	if (auto *IndexC = dyn_cast<ConstantInt>(Val: IdxOp)) {
1683	if (auto *NewIdx = getPreferredVectorIndex(IndexC))
1684	return replaceOperand(I&: IE, OpNum: `2`, V: NewIdx);
1685
1686	Value BaseVec, OtherScalar;
1687	uint64_t OtherIndexVal;
1688	if (match(V: VecOp, P: m_OneUse(SubPattern: m_InsertElt(Val: m_Value(V&: BaseVec),
1689	Elt: m_Value(V&: OtherScalar),
1690	Idx: m_ConstantInt(V&: OtherIndexVal)))) &&
1691	!isa<Constant>(Val: OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {
1692	Value *NewIns = Builder.CreateInsertElement(Vec: BaseVec, NewElt: ScalarOp, Idx: IdxOp);
1693	return InsertElementInst::Create(Vec: NewIns, NewElt: OtherScalar,
1694	Idx: Builder.getInt64(C: OtherIndexVal));
1695	}
1696	}
1697
1698	// If the scalar is bitcast and inserted into undef, do the insert in the
1699	// source type followed by bitcast.
1700	// TODO: Generalize for insert into any constant, not just undef?
1701	Value *ScalarSrc;
1702	if (match(V: VecOp, P: m_Undef()) &&
1703	match(V: ScalarOp, P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: ScalarSrc)))) &&
1704	(ScalarSrc->getType()->isIntegerTy() \|\|
1705	ScalarSrc->getType()->isFloatingPointTy())) {
1706	// inselt undef, (bitcast ScalarSrc), IdxOp -->
1707	// bitcast (inselt undef, ScalarSrc, IdxOp)
1708	Type *ScalarTy = ScalarSrc->getType();
1709	Type *VecTy = VectorType::get(ElementType: ScalarTy, EC: IE.getType()->getElementCount());
1710	Constant *NewUndef = isa<PoisonValue>(Val: VecOp) ? PoisonValue::get(T: VecTy)
1711	: UndefValue::get(T: VecTy);
1712	Value *NewInsElt = Builder.CreateInsertElement(Vec: NewUndef, NewElt: ScalarSrc, Idx: IdxOp);
1713	return new BitCastInst (NewInsElt, IE.getType());
1714	}
1715
1716	// If the vector and scalar are both bitcast from the same element type, do
1717	// the insert in that source type followed by bitcast.
1718	Value *VecSrc;
1719	if (match(V: VecOp, P: m_BitCast(Op: m_Value(V&: VecSrc))) &&
1720	match(V: ScalarOp, P: m_BitCast(Op: m_Value(V&: ScalarSrc))) &&
1721	(VecOp->hasOneUse() \|\| ScalarOp->hasOneUse()) &&
1722	VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1723	cast<VectorType>(Val: VecSrc->getType())->getElementType() ==
1724	ScalarSrc->getType()) {
1725	// inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1726	// bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1727	Value *NewInsElt = Builder.CreateInsertElement(Vec: VecSrc, NewElt: ScalarSrc, Idx: IdxOp);
1728	return new BitCastInst (NewInsElt, IE.getType());
1729	}
1730
1731	// If the inserted element was extracted from some other fixed-length vector
1732	// and both indexes are valid constants, try to turn this into a shuffle.
1733	// Can not handle scalable vector type, the number of elements needed to
1734	// create shuffle mask is not a compile-time constant.
1735	uint64_t InsertedIdx, ExtractedIdx;
1736	Value *ExtVecOp;
1737	if (isa<FixedVectorType>(Val: IE.getType()) &&
1738	match(V: IdxOp, P: m_ConstantInt(V&: InsertedIdx)) &&
1739	match(V: ScalarOp,
1740	P: m_ExtractElt(Val: m_Value(V&: ExtVecOp), Idx: m_ConstantInt(V&: ExtractedIdx))) &&
1741	isa<FixedVectorType>(Val: ExtVecOp->getType()) &&
1742	ExtractedIdx <
1743	cast<FixedVectorType>(Val: ExtVecOp->getType())->getNumElements()) {
1744	// TODO: Looking at the user(s) to determine if this insert is a
1745	// fold-to-shuffle opportunity does not match the usual instcombine
1746	// constraints. We should decide if the transform is worthy based only
1747	// on this instruction and its operands, but that may not work currently.
1748	//
1749	// Here, we are trying to avoid creating shuffles before reaching
1750	// the end of a chain of extract-insert pairs. This is complicated because
1751	// we do not generally form arbitrary shuffle masks in instcombine
1752	// (because those may codegen poorly), but collectShuffleElements() does
1753	// exactly that.
1754	//
1755	// The rules for determining what is an acceptable target-independent
1756	// shuffle mask are fuzzy because they evolve based on the backend's
1757	// capabilities and real-world impact.
1758	auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1759	if (!Insert.hasOneUse())
1760	return true;
1761	auto *InsertUser = dyn_cast<InsertElementInst>(Val: Insert.user_back());
1762	if (!InsertUser)
1763	return true;
1764	return false;
1765	};
1766
1767	// Try to form a shuffle from a chain of extract-insert ops.
1768	if (isShuffleRootCandidate (IE)) {
1769	bool Rerun = true;
1770	while (Rerun) {
1771	Rerun = false;
1772
1773	SmallVector<int, `16`> Mask;
1774	ShuffleOps LR =
1775	collectShuffleElements(V: &IE, Mask, PermittedRHS: nullptr, IC&: *this, Rerun);
1776
1777	// The proposed shuffle may be trivial, in which case we shouldn't
1778	// perform the combine.
1779	if (LR.first != &IE && LR.second != &IE) {
1780	// We now have a shuffle of LHS, RHS, Mask.
1781	if (LR.second == nullptr)
1782	LR.second = PoisonValue::get(T: LR.first->getType());
1783	return new ShuffleVectorInst (LR.first, LR.second, Mask);
1784	}
1785	}
1786	}
1787	}
1788
1789	if (auto VecTy = dyn_cast<FixedVectorType>(Val: VecOp->getType())) {
1790	unsigned VWidth = VecTy->getNumElements();
1791	APInt PoisonElts(VWidth, `0`);
1792	APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
1793	if (Value *V = SimplifyDemandedVectorElts(V: &IE, DemandedElts: AllOnesEltMask,
1794	PoisonElts)) {
1795	if (V != &IE)
1796	return replaceInstUsesWith(I&: IE, V);
1797	return &IE;
1798	}
1799	}
1800
1801	if (Instruction *Shuf = foldConstantInsEltIntoShuffle(InsElt&: IE))
1802	return Shuf;
1803
1804	if (Instruction *NewInsElt = hoistInsEltConst(InsElt2&: IE, Builder))
1805	return NewInsElt;
1806
1807	if (Instruction *Broadcast = foldInsSequenceIntoSplat(InsElt&: IE))
1808	return Broadcast;
1809
1810	if (Instruction *Splat = foldInsEltIntoSplat(InsElt&: IE))
1811	return Splat;
1812
1813	if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(InsElt&: IE))
1814	return IdentityShuf;
1815
1816	if (Instruction *Ext = narrowInsElt(InsElt&: IE, Builder))
1817	return Ext;
1818
1819	if (Instruction *Ext = foldTruncInsEltPair(InsElt&: IE, IsBigEndian: DL.isBigEndian(), Builder))
1820	return Ext;
1821
1822	return nullptr;
1823	}
1824
1825	/// Return true if we can evaluate the specified expression tree if the vector
1826	/// elements were shuffled in a different order.
1827	static bool canEvaluateShuffled(Value V, ArrayRef<int*> Mask,
1828	unsigned Depth = `5`) {
1829	// We can always reorder the elements of a constant.
1830	if (isa<Constant>(Val: V))
1831	return true;
1832
1833	// We won't reorder vector arguments. No IPO here.
1834	Instruction *I = dyn_cast<Instruction>(Val: V);
1835	if (!I) return false;
1836
1837	// Two users may expect different orders of the elements. Don't try it.
1838	if (!I->hasOneUse())
1839	return false;
1840
1841	if (Depth == `0`) return false;
1842
1843	switch (I->getOpcode()) {
1844	case Instruction::UDiv:
1845	case Instruction::SDiv:
1846	case Instruction::URem:
1847	case Instruction::SRem:
1848	// Propagating an undefined shuffle mask element to integer div/rem is not
1849	// allowed because those opcodes can create immediate undefined behavior
1850	// from an undefined element in an operand.
1851	if (llvm::is_contained(Range&: Mask, Element: -`1`))
1852	return false;
1853	[[fallthrough]];
1854	case Instruction::Add:
1855	case Instruction::FAdd:
1856	case Instruction::Sub:
1857	case Instruction::FSub:
1858	case Instruction::Mul:
1859	case Instruction::FMul:
1860	case Instruction::FDiv:
1861	case Instruction::FRem:
1862	case Instruction::Shl:
1863	case Instruction::LShr:
1864	case Instruction::AShr:
1865	case Instruction::And:
1866	case Instruction::Or:
1867	case Instruction::Xor:
1868	case Instruction::ICmp:
1869	case Instruction::FCmp:
1870	case Instruction::Trunc:
1871	case Instruction::ZExt:
1872	case Instruction::SExt:
1873	case Instruction::FPToUI:
1874	case Instruction::FPToSI:
1875	case Instruction::UIToFP:
1876	case Instruction::SIToFP:
1877	case Instruction::FPTrunc:
1878	case Instruction::FPExt:
1879	case Instruction::GetElementPtr: {
1880	// Bail out if we would create longer vector ops. We could allow creating
1881	// longer vector ops, but that may result in more expensive codegen.
1882	Type *ITy = I->getType();
1883	if (ITy->isVectorTy() &&
1884	Mask.size() > cast<FixedVectorType>(Val: ITy)->getNumElements())
1885	return false;
1886	for (Value *Operand : I->operands()) {
1887	if (!canEvaluateShuffled(V: Operand, Mask, Depth: Depth - `1`))
1888	return false;
1889	}
1890	return true;
1891	}
1892	case Instruction::InsertElement: {
1893	ConstantInt *CI = dyn_cast<ConstantInt>(Val: I->getOperand(i: `2`));
1894	if (!CI) return false;
1895	int ElementNumber = CI->getLimitedValue();
1896
1897	// Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1898	// can't put an element into multiple indices.
1899	bool SeenOnce = false;
1900	for (int I : Mask) {
1901	if (I == ElementNumber) {
1902	if (SeenOnce)
1903	return false;
1904	SeenOnce = true;
1905	}
1906	}
1907	return canEvaluateShuffled(V: I->getOperand(i: `0`), Mask, Depth: Depth - `1`);
1908	}
1909	}
1910	return false;
1911	}
1912
1913	/// Rebuild a new instruction just like 'I' but with the new operands given.
1914	/// In the event of type mismatch, the type of the operands is correct.
1915	static Value buildNew(Instruction I, ArrayRef<Value*> NewOps,
1916	IRBuilderBase &Builder) {
1917	Builder.SetInsertPoint(I);
1918	switch (I->getOpcode()) {
1919	case Instruction::Add:
1920	case Instruction::FAdd:
1921	case Instruction::Sub:
1922	case Instruction::FSub:
1923	case Instruction::Mul:
1924	case Instruction::FMul:
1925	case Instruction::UDiv:
1926	case Instruction::SDiv:
1927	case Instruction::FDiv:
1928	case Instruction::URem:
1929	case Instruction::SRem:
1930	case Instruction::FRem:
1931	case Instruction::Shl:
1932	case Instruction::LShr:
1933	case Instruction::AShr:
1934	case Instruction::And:
1935	case Instruction::Or:
1936	case Instruction::Xor: {
1937	BinaryOperator *BO = cast<BinaryOperator>(Val: I);
1938	assert(NewOps.size() == `2` && "binary operator with #ops != 2");
1939	Value *New = Builder.CreateBinOp(Opc: cast<BinaryOperator>(Val: I)->getOpcode(),
1940	LHS: NewOps [`0`], RHS: NewOps [`1`]);
1941	if (auto *NewI = dyn_cast<Instruction>(Val: New)) {
1942	if (isa<OverflowingBinaryOperator>(Val: BO)) {
1943	NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1944	NewI->setHasNoSignedWrap(BO->hasNoSignedWrap());
1945	}
1946	if (isa<PossiblyExactOperator>(Val: BO)) {
1947	NewI->setIsExact(BO->isExact());
1948	}
1949	if (isa<FPMathOperator>(Val: BO))
1950	NewI->copyFastMathFlags(I);
1951	}
1952	return New;
1953	}
1954	case Instruction::ICmp:
1955	assert(NewOps.size() == `2` && "icmp with #ops != 2");
1956	return Builder.CreateICmp(P: cast<ICmpInst>(Val: I)->getPredicate(), LHS: NewOps [`0`],
1957	RHS: NewOps [`1`]);
1958	case Instruction::FCmp:
1959	assert(NewOps.size() == `2` && "fcmp with #ops != 2");
1960	return Builder.CreateFCmp(P: cast<FCmpInst>(Val: I)->getPredicate(), LHS: NewOps [`0`],
1961	RHS: NewOps [`1`]);
1962	case Instruction::Trunc:
1963	case Instruction::ZExt:
1964	case Instruction::SExt:
1965	case Instruction::FPToUI:
1966	case Instruction::FPToSI:
1967	case Instruction::UIToFP:
1968	case Instruction::SIToFP:
1969	case Instruction::FPTrunc:
1970	case Instruction::FPExt: {
1971	// It's possible that the mask has a different number of elements from
1972	// the original cast. We recompute the destination type to match the mask.
1973	Type *DestTy = VectorType::get(
1974	ElementType: I->getType()->getScalarType(),
1975	EC: cast<VectorType>(Val: NewOps [`0`]->getType())->getElementCount());
1976	assert(NewOps.size() == `1` && "cast with #ops != 1");
1977	return Builder.CreateCast(Op: cast<CastInst>(Val: I)->getOpcode(), V: NewOps [`0`],
1978	DestTy);
1979	}
1980	case Instruction::GetElementPtr: {
1981	Value *Ptr = NewOps [`0`];
1982	ArrayRef<Value*> Idx = NewOps.slice(N: `1`);
1983	return Builder.CreateGEP(Ty: cast<GEPOperator>(Val: I)->getSourceElementType(),
1984	Ptr, IdxList: Idx, Name: "",
1985	NW: cast<GEPOperator>(Val: I)->getNoWrapFlags());
1986	}
1987	}
1988	llvm_unreachable("failed to rebuild vector instructions");
1989	}
1990
1991	static Value evaluateInDifferentElementOrder(Value V, ArrayRef<int> Mask,
1992	IRBuilderBase &Builder) {
1993	// Mask.size() does not need to be equal to the number of vector elements.
1994
1995	assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
1996	Type *EltTy = V->getType()->getScalarType();
1997
1998	if (isa<PoisonValue>(Val: V))
1999	return PoisonValue::get(T: FixedVectorType::get(ElementType: EltTy, NumElts: Mask.size()));
2000
2001	if (match(V, P: m_Undef()))
2002	return UndefValue::get(T: FixedVectorType::get(ElementType: EltTy, NumElts: Mask.size()));
2003
2004	if (isa<ConstantAggregateZero>(Val: V))
2005	return ConstantAggregateZero::get(Ty: FixedVectorType::get(ElementType: EltTy, NumElts: Mask.size()));
2006
2007	if (Constant *C = dyn_cast<Constant>(Val: V))
2008	return ConstantExpr::getShuffleVector(V1: C, V2: PoisonValue::get(T: C->getType()),
2009	Mask);
2010
2011	Instruction *I = cast<Instruction>(Val: V);
2012	switch (I->getOpcode()) {
2013	case Instruction::Add:
2014	case Instruction::FAdd:
2015	case Instruction::Sub:
2016	case Instruction::FSub:
2017	case Instruction::Mul:
2018	case Instruction::FMul:
2019	case Instruction::UDiv:
2020	case Instruction::SDiv:
2021	case Instruction::FDiv:
2022	case Instruction::URem:
2023	case Instruction::SRem:
2024	case Instruction::FRem:
2025	case Instruction::Shl:
2026	case Instruction::LShr:
2027	case Instruction::AShr:
2028	case Instruction::And:
2029	case Instruction::Or:
2030	case Instruction::Xor:
2031	case Instruction::ICmp:
2032	case Instruction::FCmp:
2033	case Instruction::Trunc:
2034	case Instruction::ZExt:
2035	case Instruction::SExt:
2036	case Instruction::FPToUI:
2037	case Instruction::FPToSI:
2038	case Instruction::UIToFP:
2039	case Instruction::SIToFP:
2040	case Instruction::FPTrunc:
2041	case Instruction::FPExt:
2042	case Instruction::Select:
2043	case Instruction::GetElementPtr: {
2044	SmallVector<Value*, `8`> NewOps;
2045	bool NeedsRebuild =
2046	(Mask.size() !=
2047	cast<FixedVectorType>(Val: I->getType())->getNumElements());
2048	for (int i = `0`, e = I->getNumOperands(); i != e; ++i) {
2049	Value *V;
2050	// Recursively call evaluateInDifferentElementOrder on vector arguments
2051	// as well. E.g. GetElementPtr may have scalar operands even if the
2052	// return value is a vector, so we need to examine the operand type.
2053	if (I->getOperand(i)->getType()->isVectorTy())
2054	V = evaluateInDifferentElementOrder(V: I->getOperand(i), Mask, Builder);
2055	else
2056	V = I->getOperand(i);
2057	NewOps.push_back(Elt: V);
2058	NeedsRebuild \|= (V != I->getOperand(i));
2059	}
2060	if (NeedsRebuild)
2061	return buildNew(I, NewOps, Builder);
2062	return I;
2063	}
2064	case Instruction::InsertElement: {
2065	int Element = cast<ConstantInt>(Val: I->getOperand(i: `2`))->getLimitedValue();
2066
2067	// The insertelement was inserting at Element. Figure out which element
2068	// that becomes after shuffling. The answer is guaranteed to be unique
2069	// by CanEvaluateShuffled.
2070	bool Found = false;
2071	int Index = `0`;
2072	for (int e = Mask.size(); Index != e; ++Index) {
2073	if (Mask [Index] == Element) {
2074	Found = true;
2075	break;
2076	}
2077	}
2078
2079	// If element is not in Mask, no need to handle the operand 1 (element to
2080	// be inserted). Just evaluate values in operand 0 according to Mask.
2081	if (!Found)
2082	return evaluateInDifferentElementOrder(V: I->getOperand(i: `0`), Mask, Builder);
2083
2084	Value *V = evaluateInDifferentElementOrder(V: I->getOperand(i: `0`), Mask,
2085	Builder);
2086	Builder.SetInsertPoint(I);
2087	return Builder.CreateInsertElement(Vec: V, NewElt: I->getOperand(i: `1`), Idx: Index);
2088	}
2089	}
2090	llvm_unreachable("failed to reorder elements of vector instruction!");
2091	}
2092
2093	// Returns true if the shuffle is extracting a contiguous range of values from
2094	// LHS, for example:
2095	// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2096	// Input: \|AA\|BB\|CC\|DD\|EE\|FF\|GG\|HH\|II\|JJ\|KK\|LL\|MM\|NN\|OO\|PP\|
2097	// Shuffles to: \|EE\|FF\|GG\|HH\|
2098	// +--+--+--+--+
2099	static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
2100	ArrayRef<int> Mask) {
2101	unsigned LHSElems =
2102	cast<FixedVectorType>(Val: SVI.getOperand(i_nocapture: `0`)->getType())->getNumElements();
2103	unsigned MaskElems = Mask.size();
2104	unsigned BegIdx = Mask.front();
2105	unsigned EndIdx = Mask.back();
2106	if (BegIdx > EndIdx \|\| EndIdx >= LHSElems \|\| EndIdx - BegIdx != MaskElems - `1`)
2107	return false;
2108	for (unsigned I = `0`; I != MaskElems; ++I)
2109	if (static_cast<unsigned>(Mask [I]) != BegIdx + I)
2110	return false;
2111	return true;
2112	}
2113
2114	/// These are the ingredients in an alternate form binary operator as described
2115	/// below.
2116	struct BinopElts {
2117	BinaryOperator::BinaryOps Opcode;
2118	Value *Op0;
2119	Value *Op1;
2120	BinopElts(BinaryOperator::BinaryOps Opc = (BinaryOperator::BinaryOps)`0`,
2121	Value V0 = nullptr, Value V1 = nullptr) :
2122	Opcode(Opc), Op0(V0), Op1(V1) {}
2123	operator bool() const { return Opcode != `0`; }
2124	};
2125
2126	/// Binops may be transformed into binops with different opcodes and operands.
2127	/// Reverse the usual canonicalization to enable folds with the non-canonical
2128	/// form of the binop. If a transform is possible, return the elements of the
2129	/// new binop. If not, return invalid elements.
2130	static BinopElts getAlternateBinop(BinaryOperator BO, const* DataLayout &DL) {
2131	Value BO0 = BO->getOperand(i_nocapture: `0`), BO1 = BO->getOperand(i_nocapture: `1`);
2132	Type *Ty = BO->getType();
2133	switch (BO->getOpcode()) {
2134	case Instruction::Shl: {
2135	// shl X, C --> mul X, (1 << C)
2136	Constant *C;
2137	if (match(V: BO1, P: m_ImmConstant(C))) {
2138	Constant *ShlOne = ConstantFoldBinaryOpOperands(
2139	Opcode: Instruction::Shl, LHS: ConstantInt::get(Ty, V: `1`), RHS: C, DL);
2140	assert(ShlOne && "Constant folding of immediate constants failed");
2141	return {Instruction::Mul, BO0, ShlOne};
2142	}
2143	break;
2144	}
2145	case Instruction::Or: {
2146	// or disjoin X, C --> add X, C
2147	if (cast<PossiblyDisjointInst>(Val: BO)->isDisjoint())
2148	return {Instruction::Add, BO0, BO1};
2149	break;
2150	}
2151	case Instruction::Sub:
2152	// sub 0, X --> mul X, -1
2153	if (match(V: BO0, P: m_ZeroInt()))
2154	return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)};
2155	break;
2156	default:
2157	break;
2158	}
2159	return {};
2160	}
2161
2162	/// A select shuffle of a select shuffle with a shared operand can be reduced
2163	/// to a single select shuffle. This is an obvious improvement in IR, and the
2164	/// backend is expected to lower select shuffles efficiently.
2165	static Instruction *foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf) {
2166	assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2167
2168	Value Op0 = Shuf.getOperand(i_nocapture: `0`), Op1 = Shuf.getOperand(i_nocapture: `1`);
2169	SmallVector<int, `16`> Mask;
2170	Shuf.getShuffleMask(Result&: Mask);
2171	unsigned NumElts = Mask.size();
2172
2173	// Canonicalize a select shuffle with common operand as Op1.
2174	auto *ShufOp = dyn_cast<ShuffleVectorInst>(Val: Op0);
2175	if (ShufOp && ShufOp->isSelect() &&
2176	(ShufOp->getOperand(i_nocapture: `0`) == Op1 \|\| ShufOp->getOperand(i_nocapture: `1`) == Op1)) {
2177	std::swap(a&: Op0, b&: Op1);
2178	ShuffleVectorInst::commuteShuffleMask(Mask, InVecNumElts: NumElts);
2179	}
2180
2181	ShufOp = dyn_cast<ShuffleVectorInst>(Val: Op1);
2182	if (!ShufOp \|\| !ShufOp->isSelect() \|\|
2183	(ShufOp->getOperand(i_nocapture: `0`) != Op0 && ShufOp->getOperand(i_nocapture: `1`) != Op0))
2184	return nullptr;
2185
2186	Value X = ShufOp->getOperand(i_nocapture: `0`), Y = ShufOp->getOperand(i_nocapture: `1`);
2187	SmallVector<int, `16`> Mask1;
2188	ShufOp->getShuffleMask(Result&: Mask1);
2189	assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
2190
2191	// Canonicalize common operand (Op0) as X (first operand of first shuffle).
2192	if (Y == Op0) {
2193	std::swap(a&: X, b&: Y);
2194	ShuffleVectorInst::commuteShuffleMask(Mask: Mask1, InVecNumElts: NumElts);
2195	}
2196
2197	// If the mask chooses from X (operand 0), it stays the same.
2198	// If the mask chooses from the earlier shuffle, the other mask value is
2199	// transferred to the combined select shuffle:
2200	// shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2201	SmallVector<int, `16`> NewMask(NumElts);
2202	for (unsigned i = `0`; i != NumElts; ++i)
2203	NewMask [i] = Mask [i] < (signed)NumElts ? Mask [i] : Mask1 [i];
2204
2205	// A select mask with undef elements might look like an identity mask.
2206	assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) \|\|
2207	ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) &&
2208	"Unexpected shuffle mask");
2209	return new ShuffleVectorInst (X, Y, NewMask);
2210	}
2211
2212	static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf,
2213	const SimplifyQuery &SQ) {
2214	assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2215
2216	// Are we shuffling together some value and that same value after it has been
2217	// modified by a binop with a constant?
2218	Value Op0 = Shuf.getOperand(i_nocapture: `0`), Op1 = Shuf.getOperand(i_nocapture: `1`);
2219	Constant *C;
2220	bool Op0IsBinop;
2221	if (match(V: Op0, P: m_BinOp(L: m_Specific(V: Op1), R: m_Constant(C))))
2222	Op0IsBinop = true;
2223	else if (match(V: Op1, P: m_BinOp(L: m_Specific(V: Op0), R: m_Constant(C))))
2224	Op0IsBinop = false;
2225	else
2226	return nullptr;
2227
2228	// The identity constant for a binop leaves a variable operand unchanged. For
2229	// a vector, this is a splat of something like 0, -1, or 1.
2230	// If there's no identity constant for this binop, we're done.
2231	auto *BO = cast<BinaryOperator>(Val: Op0IsBinop ? Op0 : Op1);
2232	BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
2233	Constant IdC = ConstantExpr::getBinOpIdentity(Opcode: BOpcode, Ty: Shuf.getType(), AllowRHSConstant: true*);
2234	if (!IdC)
2235	return nullptr;
2236
2237	Value *X = Op0IsBinop ? Op1 : Op0;
2238
2239	// Prevent folding in the case the non-binop operand might have NaN values.
2240	// If X can have NaN elements then we have that the floating point math
2241	// operation in the transformed code may not preserve the exact NaN
2242	// bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`.
2243	// This makes the transformation incorrect since the original program would
2244	// have preserved the exact NaN bit-pattern.
2245	// Avoid the folding if X can have NaN elements.
2246	if (Shuf.getType()->getElementType()->isFloatingPointTy() &&
2247	!isKnownNeverNaN(V: X, SQ))
2248	return nullptr;
2249
2250	// Shuffle identity constants into the lanes that return the original value.
2251	// Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
2252	// Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
2253	// The existing binop constant vector remains in the same operand position.
2254	ArrayRef<int> Mask = Shuf.getShuffleMask();
2255	Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(V1: C, V2: IdC, Mask) :
2256	ConstantExpr::getShuffleVector(V1: IdC, V2: C, Mask);
2257
2258	bool MightCreatePoisonOrUB =
2259	is_contained(Range&: Mask, Element: PoisonMaskElem) &&
2260	(Instruction::isIntDivRem(Opcode: BOpcode) \|\| Instruction::isShift(Opcode: BOpcode));
2261	if (MightCreatePoisonOrUB)
2262	NewC = InstCombiner::getSafeVectorConstantForBinop(Opcode: BOpcode, In: NewC, IsRHSConstant: true);
2263
2264	// shuf (bop X, C), X, M --> bop X, C'
2265	// shuf X, (bop X, C), M --> bop X, C'
2266	Instruction *NewBO = BinaryOperator::Create(Op: BOpcode, S1: X, S2: NewC);
2267	NewBO->copyIRFlags(V: BO);
2268
2269	// An undef shuffle mask element may propagate as an undef constant element in
2270	// the new binop. That would produce poison where the original code might not.
2271	// If we already made a safe constant, then there's no danger.
2272	if (is_contained(Range&: Mask, Element: PoisonMaskElem) && !MightCreatePoisonOrUB)
2273	NewBO->dropPoisonGeneratingFlags();
2274	return NewBO;
2275	}
2276
2277	/// If we have an insert of a scalar to a non-zero element of an undefined
2278	/// vector and then shuffle that value, that's the same as inserting to the zero
2279	/// element and shuffling. Splatting from the zero element is recognized as the
2280	/// canonical form of splat.
2281	static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
2282	InstCombiner::BuilderTy &Builder) {
2283	Value Op0 = Shuf.getOperand(i_nocapture: `0`), Op1 = Shuf.getOperand(i_nocapture: `1`);
2284	ArrayRef<int> Mask = Shuf.getShuffleMask();
2285	Value *X;
2286	uint64_t IndexC;
2287
2288	// Match a shuffle that is a splat to a non-zero element.
2289	if (!match(V: Op0, P: m_OneUse(SubPattern: m_InsertElt(Val: m_Poison(), Elt: m_Value(V&: X),
2290	Idx: m_ConstantInt(V&: IndexC)))) \|\|
2291	!match(V: Op1, P: m_Poison()) \|\| match(Mask, P: m_ZeroMask ()) \|\| IndexC == `0`)
2292	return nullptr;
2293
2294	// Insert into element 0 of a poison vector.
2295	PoisonValue *PoisonVec = PoisonValue::get(T: Shuf.getType());
2296	Value *NewIns = Builder.CreateInsertElement(Vec: PoisonVec, NewElt: X, Idx: (uint64_t)`0`);
2297
2298	// Splat from element 0. Any mask element that is poison remains poison.
2299	// For example:
2300	// shuf (inselt poison, X, 2), _, <2,2,undef>
2301	// --> shuf (inselt poison, X, 0), poison, <0,0,undef>
2302	unsigned NumMaskElts =
2303	cast<FixedVectorType>(Val: Shuf.getType())->getNumElements();
2304	SmallVector<int, `16`> NewMask(NumMaskElts, `0`);
2305	for (unsigned i = `0`; i != NumMaskElts; ++i)
2306	if (Mask [i] == PoisonMaskElem)
2307	NewMask [i] = Mask [i];
2308
2309	return new ShuffleVectorInst (NewIns, NewMask);
2310	}
2311
2312	/// Try to fold shuffles that are the equivalent of a vector select.
2313	Instruction *InstCombinerImpl::foldSelectShuffle(ShuffleVectorInst &Shuf) {
2314	if (!Shuf.isSelect())
2315	return nullptr;
2316
2317	// Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2318	// Commuting undef to operand 0 conflicts with another canonicalization.
2319	unsigned NumElts = cast<FixedVectorType>(Val: Shuf.getType())->getNumElements();
2320	if (!match(V: Shuf.getOperand(i_nocapture: `1`), P: m_Undef()) &&
2321	Shuf.getMaskValue(Elt: `0`) >= (int)NumElts) {
2322	// TODO: Can we assert that both operands of a shuffle-select are not undef
2323	// (otherwise, it would have been folded by instsimplify?
2324	Shuf.commute();
2325	return &Shuf;
2326	}
2327
2328	if (Instruction *I = foldSelectShuffleOfSelectShuffle(Shuf))
2329	return I;
2330
2331	if (Instruction *I = foldSelectShuffleWith1Binop(
2332	Shuf, SQ: getSimplifyQuery().getWithInstruction(I: &Shuf)))
2333	return I;
2334
2335	BinaryOperator B0, B1;
2336	if (!match(V: Shuf.getOperand(i_nocapture: `0`), P: m_BinOp(I&: B0)) \|\|
2337	!match(V: Shuf.getOperand(i_nocapture: `1`), P: m_BinOp(I&: B1)))
2338	return nullptr;
2339
2340	// If one operand is "0 - X", allow that to be viewed as "X -1"*
2341	// (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
2342	// with a multiply, we will exit because C0/C1 will not be set.
2343	Value X, Y;
2344	Constant C0 = nullptr, C1 = nullptr;
2345	bool ConstantsAreOp1;
2346	if (match(V: B0, P: m_BinOp(L: m_Constant(C&: C0), R: m_Value(V&: X))) &&
2347	match(V: B1, P: m_BinOp(L: m_Constant(C&: C1), R: m_Value(V&: Y))))
2348	ConstantsAreOp1 = false;
2349	else if (match(V: B0, P: m_CombineOr(L: m_BinOp(L: m_Value(V&: X), R: m_Constant(C&: C0)),
2350	R: m_Neg(V: m_Value(V&: X)))) &&
2351	match(V: B1, P: m_CombineOr(L: m_BinOp(L: m_Value(V&: Y), R: m_Constant(C&: C1)),
2352	R: m_Neg(V: m_Value(V&: Y)))))
2353	ConstantsAreOp1 = true;
2354	else
2355	return nullptr;
2356
2357	// We need matching binops to fold the lanes together.
2358	BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
2359	BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
2360	bool DropNSW = false;
2361	if (ConstantsAreOp1 && Opc0 != Opc1) {
2362	// TODO: We drop "nsw" if shift is converted into multiply because it may
2363	// not be correct when the shift amount is BitWidth - 1. We could examine
2364	// each vector element to determine if it is safe to keep that flag.
2365	if (Opc0 == Instruction::Shl \|\| Opc1 == Instruction::Shl)
2366	DropNSW = true;
2367	if (BinopElts AltB0 = getAlternateBinop(BO: B0, DL)) {
2368	assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
2369	Opc0 = AltB0.Opcode;
2370	C0 = cast<Constant>(Val: AltB0.Op1);
2371	} else if (BinopElts AltB1 = getAlternateBinop(BO: B1, DL)) {
2372	assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2373	Opc1 = AltB1.Opcode;
2374	C1 = cast<Constant>(Val: AltB1.Op1);
2375	}
2376	}
2377
2378	if (Opc0 != Opc1 \|\| !C0 \|\| !C1)
2379	return nullptr;
2380
2381	// The opcodes must be the same. Use a new name to make that clear.
2382	BinaryOperator::BinaryOps BOpc = Opc0;
2383
2384	// Select the constant elements needed for the single binop.
2385	ArrayRef<int> Mask = Shuf.getShuffleMask();
2386	Constant *NewC = ConstantExpr::getShuffleVector(V1: C0, V2: C1, Mask);
2387
2388	// We are moving a binop after a shuffle. When a shuffle has an undefined
2389	// mask element, the result is undefined, but it is not poison or undefined
2390	// behavior. That is not necessarily true for div/rem/shift.
2391	bool MightCreatePoisonOrUB =
2392	is_contained(Range&: Mask, Element: PoisonMaskElem) &&
2393	(Instruction::isIntDivRem(Opcode: BOpc) \|\| Instruction::isShift(Opcode: BOpc));
2394	if (MightCreatePoisonOrUB)
2395	NewC = InstCombiner::getSafeVectorConstantForBinop(Opcode: BOpc, In: NewC,
2396	IsRHSConstant: ConstantsAreOp1);
2397
2398	Value *V;
2399	if (X == Y) {
2400	// Remove a binop and the shuffle by rearranging the constant:
2401	// shuffle (op V, C0), (op V, C1), M --> op V, C'
2402	// shuffle (op C0, V), (op C1, V), M --> op C', V
2403	V = X;
2404	} else {
2405	// If there are 2 different variable operands, we must create a new shuffle
2406	// (select) first, so check uses to ensure that we don't end up with more
2407	// instructions than we started with.
2408	if (!B0->hasOneUse() && !B1->hasOneUse())
2409	return nullptr;
2410
2411	// If we use the original shuffle mask and op1 is variable, we would be
2412	// putting an undef into operand 1 of div/rem/shift. This is either UB or
2413	// poison. We do not have to guard against UB when constants* are op1*
2414	// because safe constants guarantee that we do not overflow sdiv/srem (and
2415	// there's no danger for other opcodes).
2416	// TODO: To allow this case, create a new shuffle mask with no undefs.
2417	if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2418	return nullptr;
2419
2420	// Note: In general, we do not create new shuffles in InstCombine because we
2421	// do not know if a target can lower an arbitrary shuffle optimally. In this
2422	// case, the shuffle uses the existing mask, so there is no additional risk.
2423
2424	// Select the variable vectors first, then perform the binop:
2425	// shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2426	// shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2427	V = Builder.CreateShuffleVector(V1: X, V2: Y, Mask);
2428	}
2429
2430	Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(Opc: BOpc, LHS: V, RHS: NewC) :
2431	Builder.CreateBinOp(Opc: BOpc, LHS: NewC, RHS: V);
2432
2433	// Flags are intersected from the 2 source binops. But there are 2 exceptions:
2434	// 1. If we changed an opcode, poison conditions might have changed.
2435	// 2. If the shuffle had undef mask elements, the new binop might have undefs
2436	// where the original code did not. But if we already made a safe constant,
2437	// then there's no danger.
2438	if (auto *NewI = dyn_cast<Instruction>(Val: NewBO)) {
2439	NewI->copyIRFlags(V: B0);
2440	NewI->andIRFlags(V: B1);
2441	if (DropNSW)
2442	NewI->setHasNoSignedWrap(false);
2443	if (is_contained(Range&: Mask, Element: PoisonMaskElem) && !MightCreatePoisonOrUB)
2444	NewI->dropPoisonGeneratingFlags();
2445	}
2446	return replaceInstUsesWith(I&: Shuf, V: NewBO);
2447	}
2448
2449	/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2450	/// Example (little endian):
2451	/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2452	static Instruction *foldTruncShuffle(ShuffleVectorInst &Shuf,
2453	bool IsBigEndian) {
2454	// This must be a bitcasted shuffle of 1 vector integer operand.
2455	Type *DestType = Shuf.getType();
2456	Value *X;
2457	if (!match(V: Shuf.getOperand(i_nocapture: `0`), P: m_BitCast(Op: m_Value(V&: X))) \|\|
2458	!match(V: Shuf.getOperand(i_nocapture: `1`), P: m_Poison()) \|\| !DestType->isIntOrIntVectorTy())
2459	return nullptr;
2460
2461	// The source type must have the same number of elements as the shuffle,
2462	// and the source element type must be larger than the shuffle element type.
2463	Type *SrcType = X->getType();
2464	if (!SrcType->isVectorTy() \|\| !SrcType->isIntOrIntVectorTy() \|\|
2465	cast<FixedVectorType>(Val: SrcType)->getNumElements() !=
2466	cast<FixedVectorType>(Val: DestType)->getNumElements() \|\|
2467	SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != `0`)
2468	return nullptr;
2469
2470	assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2471	"Expected a shuffle that decreases length");
2472
2473	// Last, check that the mask chooses the correct low bits for each narrow
2474	// element in the result.
2475	uint64_t TruncRatio =
2476	SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2477	ArrayRef<int> Mask = Shuf.getShuffleMask();
2478	for (unsigned i = `0`, e = Mask.size(); i != e; ++i) {
2479	if (Mask [i] == PoisonMaskElem)
2480	continue;
2481	uint64_t LSBIndex = IsBigEndian ? (i + `1`) * TruncRatio - `1` : i * TruncRatio;
2482	assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2483	if (Mask [i] != (int)LSBIndex)
2484	return nullptr;
2485	}
2486
2487	return new TruncInst (X, DestType);
2488	}
2489
2490	/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2491	/// narrowing (concatenating with poison and extracting back to the original
2492	/// length). This allows replacing the wide select with a narrow select.
2493	static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
2494	InstCombiner::BuilderTy &Builder) {
2495	// This must be a narrowing identity shuffle. It extracts the 1st N elements
2496	// of the 1st vector operand of a shuffle.
2497	if (!match(V: Shuf.getOperand(i_nocapture: `1`), P: m_Poison()) \|\| !Shuf.isIdentityWithExtract())
2498	return nullptr;
2499
2500	// The vector being shuffled must be a vector select that we can eliminate.
2501	// TODO: The one-use requirement could be eased if X and/or Y are constants.
2502	Value Cond, X, *Y;
2503	if (!match(V: Shuf.getOperand(i_nocapture: `0`),
2504	P: m_OneUse(SubPattern: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: X), R: m_Value(V&: Y)))))
2505	return nullptr;
2506
2507	// We need a narrow condition value. It must be extended with poison elements
2508	// and have the same number of elements as this shuffle.
2509	unsigned NarrowNumElts =
2510	cast<FixedVectorType>(Val: Shuf.getType())->getNumElements();
2511	Value *NarrowCond;
2512	if (!match(V: Cond, P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: NarrowCond), v2: m_Poison()))) \|\|
2513	cast<FixedVectorType>(Val: NarrowCond->getType())->getNumElements() !=
2514	NarrowNumElts \|\|
2515	!cast<ShuffleVectorInst>(Val: Cond)->isIdentityWithPadding())
2516	return nullptr;
2517
2518	// shuf (sel (shuf NarrowCond, poison, WideMask), X, Y), poison, NarrowMask)
2519	// -->
2520	// sel NarrowCond, (shuf X, poison, NarrowMask), (shuf Y, poison, NarrowMask)
2521	Value *NarrowX = Builder.CreateShuffleVector(V: X, Mask: Shuf.getShuffleMask());
2522	Value *NarrowY = Builder.CreateShuffleVector(V: Y, Mask: Shuf.getShuffleMask());
2523	return SelectInst::Create(C: NarrowCond, S1: NarrowX, S2: NarrowY);
2524	}
2525
2526	/// Canonicalize FP negate/abs after shuffle.
2527	static Instruction *foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf,
2528	InstCombiner::BuilderTy &Builder) {
2529	auto *S0 = dyn_cast<Instruction>(Val: Shuf.getOperand(i_nocapture: `0`));
2530	Value *X;
2531	if (!S0 \|\| !match(V: S0, P: m_CombineOr(L: m_FNeg(X: m_Value(V&: X)), R: m_FAbs(Op0: m_Value(V&: X)))))
2532	return nullptr;
2533
2534	bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
2535
2536	// Match 2-input (binary) shuffle.
2537	auto *S1 = dyn_cast<Instruction>(Val: Shuf.getOperand(i_nocapture: `1`));
2538	Value *Y;
2539	if (!S1 \|\| !match(V: S1, P: m_CombineOr(L: m_FNeg(X: m_Value(V&: Y)), R: m_FAbs(Op0: m_Value(V&: Y)))) \|\|
2540	S0->getOpcode() != S1->getOpcode() \|\|
2541	(!S0->hasOneUse() && !S1->hasOneUse()))
2542	return nullptr;
2543
2544	// shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
2545	Value *NewShuf = Builder.CreateShuffleVector(V1: X, V2: Y, Mask: Shuf.getShuffleMask());
2546	Instruction *NewF;
2547	if (IsFNeg) {
2548	NewF = UnaryOperator::CreateFNeg(V: NewShuf);
2549	} else {
2550	Function *FAbs = Intrinsic::getOrInsertDeclaration(
2551	M: Shuf.getModule(), id: Intrinsic::fabs, Tys: Shuf.getType());
2552	NewF = CallInst::Create(Func: FAbs, Args: {NewShuf});
2553	}
2554	NewF->copyIRFlags(V: S0);
2555	NewF->andIRFlags(V: S1);
2556	return NewF;
2557	}
2558
2559	/// Canonicalize casts after shuffle.
2560	static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf,
2561	InstCombiner::BuilderTy &Builder) {
2562	// Do we have 2 matching cast operands?
2563	auto *Cast0 = dyn_cast<CastInst>(Val: Shuf.getOperand(i_nocapture: `0`));
2564	auto *Cast1 = dyn_cast<CastInst>(Val: Shuf.getOperand(i_nocapture: `1`));
2565	if (!Cast0 \|\| !Cast1 \|\| Cast0->getOpcode() != Cast1->getOpcode() \|\|
2566	Cast0->getSrcTy() != Cast1->getSrcTy())
2567	return nullptr;
2568
2569	// TODO: Allow other opcodes? That would require easing the type restrictions
2570	// below here.
2571	CastInst::CastOps CastOpcode = Cast0->getOpcode();
2572	switch (CastOpcode) {
2573	case Instruction::FPToSI:
2574	case Instruction::FPToUI:
2575	case Instruction::SIToFP:
2576	case Instruction::UIToFP:
2577	break;
2578	default:
2579	return nullptr;
2580	}
2581
2582	VectorType *ShufTy = Shuf.getType();
2583	VectorType *ShufOpTy = cast<VectorType>(Val: Shuf.getOperand(i_nocapture: `0`)->getType());
2584	VectorType *CastSrcTy = cast<VectorType>(Val: Cast0->getSrcTy());
2585
2586	// TODO: Allow length-increasing shuffles?
2587	if (ShufTy->getElementCount().getKnownMinValue() >
2588	ShufOpTy->getElementCount().getKnownMinValue())
2589	return nullptr;
2590
2591	// TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
2592	assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2593	"Expected fixed vector operands for casts and binary shuffle");
2594	if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2595	return nullptr;
2596
2597	// At least one of the operands must have only one use (the shuffle).
2598	if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2599	return nullptr;
2600
2601	// shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2602	Value *X = Cast0->getOperand(i_nocapture: `0`);
2603	Value *Y = Cast1->getOperand(i_nocapture: `0`);
2604	Value *NewShuf = Builder.CreateShuffleVector(V1: X, V2: Y, Mask: Shuf.getShuffleMask());
2605	return CastInst::Create(CastOpcode, S: NewShuf, Ty: ShufTy);
2606	}
2607
2608	/// Try to fold an extract subvector operation.
2609	static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
2610	Value Op0 = Shuf.getOperand(i_nocapture: `0`), Op1 = Shuf.getOperand(i_nocapture: `1`);
2611	if (!Shuf.isIdentityWithExtract() \|\| !match(V: Op1, P: m_Poison()))
2612	return nullptr;
2613
2614	// Check if we are extracting all bits of an inserted scalar:
2615	// extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2616	Value *X;
2617	if (match(V: Op0, P: m_BitCast(Op: m_InsertElt(Val: m_Value(), Elt: m_Value(V&: X), Idx: m_Zero()))) &&
2618	X->getType()->getPrimitiveSizeInBits() ==
2619	Shuf.getType()->getPrimitiveSizeInBits())
2620	return new BitCastInst (X, Shuf.getType());
2621
2622	// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2623	Value *Y;
2624	ArrayRef<int> Mask;
2625	if (!match(V: Op0, P: m_Shuffle(v1: m_Value(V&: X), v2: m_Value(V&: Y), mask: m_Mask (Mask))))
2626	return nullptr;
2627
2628	// Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2629	// then combining may result in worse codegen.
2630	if (!Op0->hasOneUse())
2631	return nullptr;
2632
2633	// We are extracting a subvector from a shuffle. Remove excess elements from
2634	// the 1st shuffle mask to eliminate the extract.
2635	//
2636	// This transform is conservatively limited to identity extracts because we do
2637	// not allow arbitrary shuffle mask creation as a target-independent transform
2638	// (because we can't guarantee that will lower efficiently).
2639	//
2640	// If the extracting shuffle has an poison mask element, it transfers to the
2641	// new shuffle mask. Otherwise, copy the original mask element. Example:
2642	// shuf (shuf X, Y, <C0, C1, C2, poison, C4>), poison, <0, poison, 2, 3> -->
2643	// shuf X, Y, <C0, poison, C2, poison>
2644	unsigned NumElts = cast<FixedVectorType>(Val: Shuf.getType())->getNumElements();
2645	SmallVector<int, `16`> NewMask(NumElts);
2646	assert(NumElts < Mask.size() &&
2647	"Identity with extract must have less elements than its inputs");
2648
2649	for (unsigned i = `0`; i != NumElts; ++i) {
2650	int ExtractMaskElt = Shuf.getMaskValue(Elt: i);
2651	int MaskElt = Mask [i];
2652	NewMask [i] = ExtractMaskElt == PoisonMaskElem ? ExtractMaskElt : MaskElt;
2653	}
2654	return new ShuffleVectorInst (X, Y, NewMask);
2655	}
2656
2657	/// Try to replace a shuffle with an insertelement or try to replace a shuffle
2658	/// operand with the operand of an insertelement.
2659	static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
2660	InstCombinerImpl &IC) {
2661	Value V0 = Shuf.getOperand(i_nocapture: `0`), V1 = Shuf.getOperand(i_nocapture: `1`);
2662	SmallVector<int, `16`> Mask;
2663	Shuf.getShuffleMask(Result&: Mask);
2664
2665	int NumElts = Mask.size();
2666	int InpNumElts = cast<FixedVectorType>(Val: V0->getType())->getNumElements();
2667
2668	// This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2669	// not be able to handle it there if the insertelement has >1 use.
2670	// If the shuffle has an insertelement operand but does not choose the
2671	// inserted scalar element from that value, then we can replace that shuffle
2672	// operand with the source vector of the insertelement.
2673	Value *X;
2674	uint64_t IdxC;
2675	if (match(V: V0, P: m_InsertElt(Val: m_Value(V&: X), Elt: m_Value(), Idx: m_ConstantInt(V&: IdxC)))) {
2676	// shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2677	if (!is_contained(Range&: Mask, Element: (int)IdxC))
2678	return IC.replaceOperand(I&: Shuf, OpNum: `0`, V: X);
2679	}
2680	if (match(V: V1, P: m_InsertElt(Val: m_Value(V&: X), Elt: m_Value(), Idx: m_ConstantInt(V&: IdxC)))) {
2681	// Offset the index constant by the vector width because we are checking for
2682	// accesses to the 2nd vector input of the shuffle.
2683	IdxC += InpNumElts;
2684	// shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2685	if (!is_contained(Range&: Mask, Element: (int)IdxC))
2686	return IC.replaceOperand(I&: Shuf, OpNum: `1`, V: X);
2687	}
2688	// For the rest of the transform, the shuffle must not change vector sizes.
2689	// TODO: This restriction could be removed if the insert has only one use
2690	// (because the transform would require a new length-changing shuffle).
2691	if (NumElts != InpNumElts)
2692	return nullptr;
2693
2694	// shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2695	auto isShufflingScalarIntoOp1 = [&](Value &Scalar, ConstantInt &IndexC) {
2696	// We need an insertelement with a constant index.
2697	if (!match(V: V0, P: m_InsertElt(Val: m_Value(), Elt: m_Value(V&: Scalar),
2698	Idx: m_ConstantInt(CI&: IndexC))))
2699	return false;
2700
2701	// Test the shuffle mask to see if it splices the inserted scalar into the
2702	// operand 1 vector of the shuffle.
2703	int NewInsIndex = -`1`;
2704	for (int i = `0`; i != NumElts; ++i) {
2705	// Ignore undef mask elements.
2706	if (Mask [i] == -`1`)
2707	continue;
2708
2709	// The shuffle takes elements of operand 1 without lane changes.
2710	if (Mask [i] == NumElts + i)
2711	continue;
2712
2713	// The shuffle must choose the inserted scalar exactly once.
2714	if (NewInsIndex != -`1` \|\| Mask [i] != IndexC->getSExtValue())
2715	return false;
2716
2717	// The shuffle is placing the inserted scalar into element i.
2718	NewInsIndex = i;
2719	}
2720
2721	assert(NewInsIndex != -`1` && "Did not fold shuffle with unused operand?");
2722
2723	// Index is updated to the potentially translated insertion lane.
2724	IndexC = ConstantInt::get(Ty: IndexC->getIntegerType(), V: NewInsIndex);
2725	return true;
2726	};
2727
2728	// If the shuffle is unnecessary, insert the scalar operand directly into
2729	// operand 1 of the shuffle. Example:
2730	// shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2731	Value *Scalar;
2732	ConstantInt *IndexC;
2733	if (isShufflingScalarIntoOp1 (Scalar, IndexC))
2734	return InsertElementInst::Create(Vec: V1, NewElt: Scalar, Idx: IndexC);
2735
2736	// Try again after commuting shuffle. Example:
2737	// shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2738	// shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2739	std::swap(a&: V0, b&: V1);
2740	ShuffleVectorInst::commuteShuffleMask(Mask, InVecNumElts: NumElts);
2741	if (isShufflingScalarIntoOp1 (Scalar, IndexC))
2742	return InsertElementInst::Create(Vec: V1, NewElt: Scalar, Idx: IndexC);
2743
2744	return nullptr;
2745	}
2746
2747	static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
2748	// Match the operands as identity with padding (also known as concatenation
2749	// with undef) shuffles of the same source type. The backend is expected to
2750	// recreate these concatenations from a shuffle of narrow operands.
2751	auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Val: Shuf.getOperand(i_nocapture: `0`));
2752	auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Val: Shuf.getOperand(i_nocapture: `1`));
2753	if (!Shuffle0 \|\| !Shuffle0->isIdentityWithPadding() \|\|
2754	!Shuffle1 \|\| !Shuffle1->isIdentityWithPadding())
2755	return nullptr;
2756
2757	// We limit this transform to power-of-2 types because we expect that the
2758	// backend can convert the simplified IR patterns to identical nodes as the
2759	// original IR.
2760	// TODO: If we can verify the same behavior for arbitrary types, the
2761	// power-of-2 checks can be removed.
2762	Value *X = Shuffle0->getOperand(i_nocapture: `0`);
2763	Value *Y = Shuffle1->getOperand(i_nocapture: `0`);
2764	if (X->getType() != Y->getType() \|\|
2765	!isPowerOf2_32(Value: cast<FixedVectorType>(Val: Shuf.getType())->getNumElements()) \|\|
2766	!isPowerOf2_32(
2767	Value: cast<FixedVectorType>(Val: Shuffle0->getType())->getNumElements()) \|\|
2768	!isPowerOf2_32(Value: cast<FixedVectorType>(Val: X->getType())->getNumElements()) \|\|
2769	match(V: X, P: m_Undef()) \|\| match(V: Y, P: m_Undef()))
2770	return nullptr;
2771	assert(match(Shuffle0->getOperand(`1`), m_Undef()) &&
2772	match(Shuffle1->getOperand(`1`), m_Undef()) &&
2773	"Unexpected operand for identity shuffle");
2774
2775	// This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2776	// operands directly by adjusting the shuffle mask to account for the narrower
2777	// types:
2778	// shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2779	int NarrowElts = cast<FixedVectorType>(Val: X->getType())->getNumElements();
2780	int WideElts = cast<FixedVectorType>(Val: Shuffle0->getType())->getNumElements();
2781	assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2782
2783	ArrayRef<int> Mask = Shuf.getShuffleMask();
2784	SmallVector<int, `16`> NewMask(Mask.size(), -`1`);
2785	for (int i = `0`, e = Mask.size(); i != e; ++i) {
2786	if (Mask [i] == -`1`)
2787	continue;
2788
2789	// If this shuffle is choosing an undef element from 1 of the sources, that
2790	// element is undef.
2791	if (Mask [i] < WideElts) {
2792	if (Shuffle0->getMaskValue(Elt: Mask [i]) == -`1`)
2793	continue;
2794	} else {
2795	if (Shuffle1->getMaskValue(Elt: Mask [i] - WideElts) == -`1`)
2796	continue;
2797	}
2798
2799	// If this shuffle is choosing from the 1st narrow op, the mask element is
2800	// the same. If this shuffle is choosing from the 2nd narrow op, the mask
2801	// element is offset down to adjust for the narrow vector widths.
2802	if (Mask [i] < WideElts) {
2803	assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2804	NewMask [i] = Mask [i];
2805	} else {
2806	assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2807	NewMask [i] = Mask [i] - (WideElts - NarrowElts);
2808	}
2809	}
2810	return new ShuffleVectorInst (X, Y, NewMask);
2811	}
2812
2813	// Splatting the first element of the result of a BinOp, where any of the
2814	// BinOp's operands are the result of a first element splat can be simplified to
2815	// splatting the first element of the result of the BinOp
2816	Instruction *InstCombinerImpl::simplifyBinOpSplats(ShuffleVectorInst &SVI) {
2817	if (!match(V: SVI.getOperand(i_nocapture: `1`), P: m_Poison()) \|\|
2818	!match(Mask: SVI.getShuffleMask(), P: m_ZeroMask ()) \|\|
2819	!SVI.getOperand(i_nocapture: `0`)->hasOneUse())
2820	return nullptr;
2821
2822	Value *Op0 = SVI.getOperand(i_nocapture: `0`);
2823	Value X, Y;
2824	if (!match(V: Op0, P: m_BinOp(L: m_Shuffle(v1: m_Value(V&: X), v2: m_Poison(), mask: m_ZeroMask ()),
2825	R: m_Value(V&: Y))) &&
2826	!match(V: Op0, P: m_BinOp(L: m_Value(V&: X),
2827	R: m_Shuffle(v1: m_Value(V&: Y), v2: m_Poison(), mask: m_ZeroMask ()))))
2828	return nullptr;
2829	if (X->getType() != Y->getType())
2830	return nullptr;
2831
2832	auto *BinOp = cast<BinaryOperator>(Val: Op0);
2833	if (!isSafeToSpeculativelyExecuteWithVariableReplaced(I: BinOp))
2834	return nullptr;
2835
2836	Value *NewBO = Builder.CreateBinOp(Opc: BinOp->getOpcode(), LHS: X, RHS: Y);
2837	if (auto NewBOI = dyn_cast<Instruction>(Val: NewBO))
2838	NewBOI->copyIRFlags(V: BinOp);
2839
2840	return new ShuffleVectorInst (NewBO, SVI.getShuffleMask());
2841	}
2842
2843	Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
2844	Value *LHS = SVI.getOperand(i_nocapture: `0`);
2845	Value *RHS = SVI.getOperand(i_nocapture: `1`);
2846	SimplifyQuery ShufQuery = SQ.getWithInstruction(I: &SVI);
2847	if (auto *V = simplifyShuffleVectorInst(Op0: LHS, Op1: RHS, Mask: SVI.getShuffleMask(),
2848	RetTy: SVI.getType(), Q: ShufQuery))
2849	return replaceInstUsesWith(I&: SVI, V);
2850
2851	if (Instruction *I = simplifyBinOpSplats(SVI))
2852	return I;
2853
2854	// Canonicalize splat shuffle to use poison RHS. Handle this explicitly in
2855	// order to support scalable vectors.
2856	if (match(Mask: SVI.getShuffleMask(), P: m_ZeroMask ()) && !isa<PoisonValue>(Val: RHS))
2857	return replaceOperand(I&: SVI, OpNum: `1`, V: PoisonValue::get(T: RHS->getType()));
2858
2859	if (isa<ScalableVectorType>(Val: LHS->getType()))
2860	return nullptr;
2861
2862	unsigned VWidth = cast<FixedVectorType>(Val: SVI.getType())->getNumElements();
2863	unsigned LHSWidth = cast<FixedVectorType>(Val: LHS->getType())->getNumElements();
2864
2865	// shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2866	//
2867	// if X and Y are of the same (vector) type, and the element size is not
2868	// changed by the bitcasts, we can distribute the bitcasts through the
2869	// shuffle, hopefully reducing the number of instructions. We make sure that
2870	// at least one bitcast only has one use, so we don't increase* the number of*
2871	// instructions here.
2872	Value X, Y;
2873	if (match(V: LHS, P: m_BitCast(Op: m_Value(V&: X))) && match(V: RHS, P: m_BitCast(Op: m_Value(V&: Y))) &&
2874	X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2875	X->getType()->getScalarSizeInBits() ==
2876	SVI.getType()->getScalarSizeInBits() &&
2877	(LHS->hasOneUse() \|\| RHS->hasOneUse())) {
2878	Value *V = Builder.CreateShuffleVector(V1: X, V2: Y, Mask: SVI.getShuffleMask(),
2879	Name: SVI.getName() + ".uncasted");
2880	return new BitCastInst (V, SVI.getType());
2881	}
2882
2883	ArrayRef<int> Mask = SVI.getShuffleMask();
2884
2885	// Peek through a bitcasted shuffle operand by scaling the mask. If the
2886	// simulated shuffle can simplify, then this shuffle is unnecessary:
2887	// shuf (bitcast X), undef, Mask --> bitcast X'
2888	// TODO: This could be extended to allow length-changing shuffles.
2889	// The transform might also be obsoleted if we allowed canonicalization
2890	// of bitcasted shuffles.
2891	if (match(V: LHS, P: m_BitCast(Op: m_Value(V&: X))) && match(V: RHS, P: m_Undef()) &&
2892	X->getType()->isVectorTy() && VWidth == LHSWidth) {
2893	// Try to create a scaled mask constant.
2894	auto *XType = cast<FixedVectorType>(Val: X->getType());
2895	unsigned XNumElts = XType->getNumElements();
2896	SmallVector<int, `16`> ScaledMask;
2897	if (scaleShuffleMaskElts(NumDstElts: XNumElts, Mask, ScaledMask)) {
2898	// If the shuffled source vector simplifies, cast that value to this
2899	// shuffle's type.
2900	if (auto *V = simplifyShuffleVectorInst(Op0: X, Op1: UndefValue::get(T: XType),
2901	Mask: ScaledMask, RetTy: XType, Q: ShufQuery))
2902	return BitCastInst::Create(Instruction::BitCast, S: V, Ty: SVI.getType());
2903	}
2904	}
2905
2906	// shuffle x, x, mask --> shuffle x, undef, mask'
2907	if (LHS == RHS) {
2908	assert(!match(RHS, m_Undef()) &&
2909	"Shuffle with 2 undef ops not simplified?");
2910	return new ShuffleVectorInst (LHS, createUnaryMask(Mask, NumElts: LHSWidth));
2911	}
2912
2913	// shuffle undef, x, mask --> shuffle x, undef, mask'
2914	if (match(V: LHS, P: m_Undef())) {
2915	SVI.commute();
2916	return &SVI;
2917	}
2918
2919	if (Instruction *I = canonicalizeInsertSplat(Shuf&: SVI, Builder))
2920	return I;
2921
2922	if (Instruction *I = foldSelectShuffle(Shuf&: SVI))
2923	return I;
2924
2925	if (Instruction *I = foldTruncShuffle(Shuf&: SVI, IsBigEndian: DL.isBigEndian()))
2926	return I;
2927
2928	if (Instruction *I = narrowVectorSelect(Shuf&: SVI, Builder))
2929	return I;
2930
2931	if (Instruction *I = foldShuffleOfUnaryOps(Shuf&: SVI, Builder))
2932	return I;
2933
2934	if (Instruction *I = foldCastShuffle(Shuf&: SVI, Builder))
2935	return I;
2936
2937	APInt PoisonElts(VWidth, `0`);
2938	APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
2939	if (Value *V = SimplifyDemandedVectorElts(V: &SVI, DemandedElts: AllOnesEltMask, PoisonElts)) {
2940	if (V != &SVI)
2941	return replaceInstUsesWith(I&: SVI, V);
2942	return &SVI;
2943	}
2944
2945	if (Instruction *I = foldIdentityExtractShuffle(Shuf&: SVI))
2946	return I;
2947
2948	// These transforms have the potential to lose undef knowledge, so they are
2949	// intentionally placed after SimplifyDemandedVectorElts().
2950	if (Instruction I = foldShuffleWithInsert(Shuf&: SVI, IC&: this))
2951	return I;
2952	if (Instruction *I = foldIdentityPaddedShuffles(Shuf&: SVI))
2953	return I;
2954
2955	if (match(V: RHS, P: m_Constant())) {
2956	if (auto *SI = dyn_cast<SelectInst>(Val: LHS)) {
2957	// We cannot do this fold for elementwise select since ShuffleVector is
2958	// not elementwise.
2959	if (SI->getCondition()->getType()->isIntegerTy() &&
2960	(isa<PoisonValue>(Val: RHS) \|\|
2961	isGuaranteedNotToBePoison(V: SI->getCondition()))) {
2962	if (Instruction *I = FoldOpIntoSelect(Op&: SVI, SI))
2963	return I;
2964	}
2965	}
2966	if (auto *PN = dyn_cast<PHINode>(Val: LHS)) {
2967	if (Instruction I = foldOpIntoPhi(I&: SVI, PN, /AllowMultipleUses=/*true))
2968	return I;
2969	}
2970	}
2971
2972	if (match(V: RHS, P: m_Poison()) && canEvaluateShuffled(V: LHS, Mask)) {
2973	Value *V = evaluateInDifferentElementOrder(V: LHS, Mask, Builder);
2974	return replaceInstUsesWith(I&: SVI, V);
2975	}
2976
2977	// SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
2978	// a non-vector type. We can instead bitcast the original vector followed by
2979	// an extract of the desired element:
2980	//
2981	// %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
2982	// <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2983	// %1 = bitcast <4 x i8> %sroa to i32
2984	// Becomes:
2985	// %bc = bitcast <16 x i8> %in to <4 x i32>
2986	// %ext = extractelement <4 x i32> %bc, i32 0
2987	//
2988	// If the shuffle is extracting a contiguous range of values from the input
2989	// vector then each use which is a bitcast of the extracted size can be
2990	// replaced. This will work if the vector types are compatible, and the begin
2991	// index is aligned to a value in the casted vector type. If the begin index
2992	// isn't aligned then we can shuffle the original vector (keeping the same
2993	// vector type) before extracting.
2994	//
2995	// This code will bail out if the target type is fundamentally incompatible
2996	// with vectors of the source type.
2997	//
2998	// Example of <16 x i8>, target type i32:
2999	// Index range [4,8): v-----------v Will work.
3000	// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
3001	// <16 x i8>: \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \|
3002	// <4 x i32>: \| \| \| \| \|
3003	// +-----------+-----------+-----------+-----------+
3004	// Index range [6,10): ^-----------^ Needs an extra shuffle.
3005	// Target type i40: ^--------------^ Won't work, bail.
3006	bool MadeChange = false;
3007	if (isShuffleExtractingFromLHS(SVI, Mask)) {
3008	Value *V = LHS;
3009	unsigned MaskElems = Mask.size();
3010	auto *SrcTy = cast<FixedVectorType>(Val: V->getType());
3011	unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedValue();
3012	unsigned SrcElemBitWidth = DL.getTypeSizeInBits(Ty: SrcTy->getElementType());
3013	assert(SrcElemBitWidth && "vector elements must have a bitwidth");
3014	unsigned SrcNumElems = SrcTy->getNumElements();
3015	SmallVector<BitCastInst *, `8`> BCs;
3016	DenseMap<Type , Value > NewBCs;
3017	for (User *U : SVI.users())
3018	if (BitCastInst *BC = dyn_cast<BitCastInst>(Val: U)) {
3019	// Only visit bitcasts that weren't previously handled.
3020	if (BC->use_empty())
3021	continue;
3022	// Prefer to combine bitcasts of bitcasts before attempting this fold.
3023	if (BC->hasOneUse()) {
3024	auto *BC2 = dyn_cast<BitCastInst>(Val: BC->user_back());
3025	if (BC2 && isEliminableCastPair(CI1: BC, CI2: BC2))
3026	continue;
3027	}
3028	BCs.push_back(Elt: BC);
3029	}
3030	for (BitCastInst *BC : BCs) {
3031	unsigned BegIdx = Mask.front();
3032	Type *TgtTy = BC->getDestTy();
3033	unsigned TgtElemBitWidth = DL.getTypeSizeInBits(Ty: TgtTy);
3034	if (!TgtElemBitWidth)
3035	continue;
3036	unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
3037	bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
3038	bool BegIsAligned = `0` == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
3039	if (!VecBitWidthsEqual)
3040	continue;
3041	if (!VectorType::isValidElementType(ElemTy: TgtTy))
3042	continue;
3043	auto *CastSrcTy = FixedVectorType::get(ElementType: TgtTy, NumElts: TgtNumElems);
3044	if (!BegIsAligned) {
3045	// Shuffle the input so [0,NumElements) contains the output, and
3046	// [NumElems,SrcNumElems) is undef.
3047	SmallVector<int, `16`> ShuffleMask(SrcNumElems, -`1`);
3048	for (unsigned I = `0`, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
3049	ShuffleMask [I] = Idx;
3050	V = Builder.CreateShuffleVector(V, Mask: ShuffleMask,
3051	Name: SVI.getName() + ".extract");
3052	BegIdx = `0`;
3053	}
3054	unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
3055	assert(SrcElemsPerTgtElem);
3056	BegIdx /= SrcElemsPerTgtElem;
3057	auto [It, Inserted] = NewBCs.try_emplace(Key: CastSrcTy);
3058	if (Inserted)
3059	It ->second = Builder.CreateBitCast(V, DestTy: CastSrcTy, Name: SVI.getName() + ".bc");
3060	auto *Ext = Builder.CreateExtractElement(Vec: It ->second, Idx: BegIdx,
3061	Name: SVI.getName() + ".extract");
3062	// The shufflevector isn't being replaced: the bitcast that used it
3063	// is. InstCombine will visit the newly-created instructions.
3064	replaceInstUsesWith(I&: *BC, V: Ext);
3065	MadeChange = true;
3066	}
3067	}
3068
3069	// If the LHS is a shufflevector itself, see if we can combine it with this
3070	// one without producing an unusual shuffle.
3071	// Cases that might be simplified:
3072	// 1.
3073	// x1=shuffle(v1,v2,mask1)
3074	// x=shuffle(x1,undef,mask)
3075	// ==>
3076	// x=shuffle(v1,undef,newMask)
3077	// newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
3078	// 2.
3079	// x1=shuffle(v1,undef,mask1)
3080	// x=shuffle(x1,x2,mask)
3081	// where v1.size() == mask1.size()
3082	// ==>
3083	// x=shuffle(v1,x2,newMask)
3084	// newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
3085	// 3.
3086	// x2=shuffle(v2,undef,mask2)
3087	// x=shuffle(x1,x2,mask)
3088	// where v2.size() == mask2.size()
3089	// ==>
3090	// x=shuffle(x1,v2,newMask)
3091	// newMask[i] = (mask[i] < x1.size())
3092	// ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
3093	// 4.
3094	// x1=shuffle(v1,undef,mask1)
3095	// x2=shuffle(v2,undef,mask2)
3096	// x=shuffle(x1,x2,mask)
3097	// where v1.size() == v2.size()
3098	// ==>
3099	// x=shuffle(v1,v2,newMask)
3100	// newMask[i] = (mask[i] < x1.size())
3101	// ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
3102	//
3103	// Here we are really conservative:
3104	// we are absolutely afraid of producing a shuffle mask not in the input
3105	// program, because the code gen may not be smart enough to turn a merged
3106	// shuffle into two specific shuffles: it may produce worse code. As such,
3107	// we only merge two shuffles if the result is either a splat or one of the
3108	// input shuffle masks. In this case, merging the shuffles just removes
3109	// one instruction, which we know is safe. This is good for things like
3110	// turning: (splat(splat)) -> splat, or
3111	// merge(V[0..n], V[n+1..2n]) -> V[0..2n]
3112	ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(Val: LHS);
3113	ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(Val: RHS);
3114	if (LHSShuffle)
3115	if (!match(V: LHSShuffle->getOperand(i_nocapture: `1`), P: m_Poison()) &&
3116	!match(V: RHS, P: m_Poison()))
3117	LHSShuffle = nullptr;
3118	if (RHSShuffle)
3119	if (!match(V: RHSShuffle->getOperand(i_nocapture: `1`), P: m_Poison()))
3120	RHSShuffle = nullptr;
3121	if (!LHSShuffle && !RHSShuffle)
3122	return MadeChange ? &SVI : nullptr;
3123
3124	Value* LHSOp0 = nullptr;
3125	Value* LHSOp1 = nullptr;
3126	Value* RHSOp0 = nullptr;
3127	unsigned LHSOp0Width = `0`;
3128	unsigned RHSOp0Width = `0`;
3129	if (LHSShuffle) {
3130	LHSOp0 = LHSShuffle->getOperand(i_nocapture: `0`);
3131	LHSOp1 = LHSShuffle->getOperand(i_nocapture: `1`);
3132	LHSOp0Width = cast<FixedVectorType>(Val: LHSOp0->getType())->getNumElements();
3133	}
3134	if (RHSShuffle) {
3135	RHSOp0 = RHSShuffle->getOperand(i_nocapture: `0`);
3136	RHSOp0Width = cast<FixedVectorType>(Val: RHSOp0->getType())->getNumElements();
3137	}
3138	Value* newLHS = LHS;
3139	Value* newRHS = RHS;
3140	if (LHSShuffle) {
3141	// case 1
3142	if (match(V: RHS, P: m_Poison())) {
3143	newLHS = LHSOp0;
3144	newRHS = LHSOp1;
3145	}
3146	// case 2 or 4
3147	else if (LHSOp0Width == LHSWidth) {
3148	newLHS = LHSOp0;
3149	}
3150	}
3151	// case 3 or 4
3152	if (RHSShuffle && RHSOp0Width == LHSWidth) {
3153	newRHS = RHSOp0;
3154	}
3155	// case 4
3156	if (LHSOp0 == RHSOp0) {
3157	newLHS = LHSOp0;
3158	newRHS = nullptr;
3159	}
3160
3161	if (newLHS == LHS && newRHS == RHS)
3162	return MadeChange ? &SVI : nullptr;
3163
3164	ArrayRef<int> LHSMask;
3165	ArrayRef<int> RHSMask;
3166	if (newLHS != LHS)
3167	LHSMask = LHSShuffle->getShuffleMask();
3168	if (RHSShuffle && newRHS != RHS)
3169	RHSMask = RHSShuffle->getShuffleMask();
3170
3171	unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
3172	SmallVector<int, `16`> newMask;
3173	bool isSplat = true;
3174	int SplatElt = -`1`;
3175	// Create a new mask for the new ShuffleVectorInst so that the new
3176	// ShuffleVectorInst is equivalent to the original one.
3177	for (unsigned i = `0`; i < VWidth; ++i) {
3178	int eltMask;
3179	if (Mask [i] < `0`) {
3180	// This element is a poison value.
3181	eltMask = -`1`;
3182	} else if (Mask [i] < (int)LHSWidth) {
3183	// This element is from left hand side vector operand.
3184	//
3185	// If LHS is going to be replaced (case 1, 2, or 4), calculate the
3186	// new mask value for the element.
3187	if (newLHS != LHS) {
3188	eltMask = LHSMask [Mask [i]];
3189	// If the value selected is an poison value, explicitly specify it
3190	// with a -1 mask value.
3191	if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(Val: LHSOp1))
3192	eltMask = -`1`;
3193	} else
3194	eltMask = Mask [i];
3195	} else {
3196	// This element is from right hand side vector operand
3197	//
3198	// If the value selected is a poison value, explicitly specify it
3199	// with a -1 mask value. (case 1)
3200	if (match(V: RHS, P: m_Poison()))
3201	eltMask = -`1`;
3202	// If RHS is going to be replaced (case 3 or 4), calculate the
3203	// new mask value for the element.
3204	else if (newRHS != RHS) {
3205	eltMask = RHSMask [Mask [i]-LHSWidth];
3206	// If the value selected is an poison value, explicitly specify it
3207	// with a -1 mask value.
3208	if (eltMask >= (int)RHSOp0Width) {
3209	assert(match(RHSShuffle->getOperand(`1`), m_Poison()) &&
3210	"should have been check above");
3211	eltMask = -`1`;
3212	}
3213	} else
3214	eltMask = Mask [i]-LHSWidth;
3215
3216	// If LHS's width is changed, shift the mask value accordingly.
3217	// If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
3218	// references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
3219	// If newRHS == newLHS, we want to remap any references from newRHS to
3220	// newLHS so that we can properly identify splats that may occur due to
3221	// obfuscation across the two vectors.
3222	if (eltMask >= `0` && newRHS != nullptr && newLHS != newRHS)
3223	eltMask += newLHSWidth;
3224	}
3225
3226	// Check if this could still be a splat.
3227	if (eltMask >= `0`) {
3228	if (SplatElt >= `0` && SplatElt != eltMask)
3229	isSplat = false;
3230	SplatElt = eltMask;
3231	}
3232
3233	newMask.push_back(Elt: eltMask);
3234	}
3235
3236	// If the result mask is equal to one of the original shuffle masks,
3237	// or is a splat, do the replacement.
3238	if (isSplat \|\| newMask == LHSMask \|\| newMask == RHSMask \|\| newMask == Mask) {
3239	if (!newRHS)
3240	newRHS = PoisonValue::get(T: newLHS->getType());
3241	return new ShuffleVectorInst (newLHS, newRHS, newMask);
3242	}
3243
3244	return MadeChange ? &SVI : nullptr;
3245	}
3246

Browse the source code of llvm_projects/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp