1//===- InstCombineVectorOps.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements instcombine for ExtractElement, InsertElement and
10// ShuffleVector.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombineInternal.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SmallBitVector.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/Analysis/InstructionSimplify.h"
23#include "llvm/Analysis/VectorUtils.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constant.h"
26#include "llvm/IR/Constants.h"
27#include "llvm/IR/DerivedTypes.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
30#include "llvm/IR/Instructions.h"
31#include "llvm/IR/Operator.h"
32#include "llvm/IR/PatternMatch.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/User.h"
35#include "llvm/IR/Value.h"
36#include "llvm/Support/Casting.h"
37#include "llvm/Support/ErrorHandling.h"
38#include "llvm/Transforms/InstCombine/InstCombiner.h"
39#include <cassert>
40#include <cstdint>
41#include <iterator>
42#include <utility>
43
44#define DEBUG_TYPE "instcombine"
45
46using namespace llvm;
47using namespace PatternMatch;
48
49STATISTIC(NumAggregateReconstructionsSimplified,
50 "Number of aggregate reconstructions turned into reuse of the "
51 "original aggregate");
52
53/// Return true if the value is cheaper to scalarize than it is to leave as a
54/// vector operation. If the extract index \p EI is a constant integer then
55/// some operations may be cheap to scalarize.
56///
57/// FIXME: It's possible to create more instructions than previously existed.
58static bool cheapToScalarize(Value *V, Value *EI) {
59 ConstantInt *CEI = dyn_cast<ConstantInt>(Val: EI);
60
61 // If we can pick a scalar constant value out of a vector, that is free.
62 if (auto *C = dyn_cast<Constant>(Val: V))
63 return CEI || C->getSplatValue();
64
65 if (CEI && match(V, P: m_Intrinsic<Intrinsic::stepvector>())) {
66 ElementCount EC = cast<VectorType>(Val: V->getType())->getElementCount();
67 // Index needs to be lower than the minimum size of the vector, because
68 // for scalable vector, the vector size is known at run time.
69 return CEI->getValue().ult(RHS: EC.getKnownMinValue());
70 }
71
72 // An insertelement to the same constant index as our extract will simplify
73 // to the scalar inserted element. An insertelement to a different constant
74 // index is irrelevant to our extract.
75 if (match(V, P: m_InsertElt(Val: m_Value(), Elt: m_Value(), Idx: m_ConstantInt())))
76 return CEI;
77
78 if (match(V, P: m_OneUse(SubPattern: m_Load(Op: m_Value()))))
79 return true;
80
81 if (match(V, P: m_OneUse(SubPattern: m_UnOp())))
82 return true;
83
84 Value *V0, *V1;
85 if (match(V, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: V0), R: m_Value(V&: V1)))))
86 if (cheapToScalarize(V: V0, EI) || cheapToScalarize(V: V1, EI))
87 return true;
88
89 CmpPredicate UnusedPred;
90 if (match(V, P: m_OneUse(SubPattern: m_Cmp(Pred&: UnusedPred, L: m_Value(V&: V0), R: m_Value(V&: V1)))))
91 if (cheapToScalarize(V: V0, EI) || cheapToScalarize(V: V1, EI))
92 return true;
93
94 return false;
95}
96
97// If we have a PHI node with a vector type that is only used to feed
98// itself and be an operand of extractelement at a constant location,
99// try to replace the PHI of the vector type with a PHI of a scalar type.
100Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
101 PHINode *PN) {
102 SmallVector<Instruction *, 2> Extracts;
103 // The users we want the PHI to have are:
104 // 1) The EI ExtractElement (we already know this)
105 // 2) Possibly more ExtractElements with the same index.
106 // 3) Another operand, which will feed back into the PHI.
107 Instruction *PHIUser = nullptr;
108 for (auto *U : PN->users()) {
109 if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(Val: U)) {
110 if (EI.getIndexOperand() == EU->getIndexOperand())
111 Extracts.push_back(Elt: EU);
112 else
113 return nullptr;
114 } else if (!PHIUser) {
115 PHIUser = cast<Instruction>(Val: U);
116 } else {
117 return nullptr;
118 }
119 }
120
121 if (!PHIUser)
122 return nullptr;
123
124 // Verify that this PHI user has one use, which is the PHI itself,
125 // and that it is a binary operation which is cheap to scalarize.
126 // otherwise return nullptr.
127 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
128 !(isa<BinaryOperator>(Val: PHIUser)) ||
129 !cheapToScalarize(V: PHIUser, EI: EI.getIndexOperand()))
130 return nullptr;
131
132 // Create a scalar PHI node that will replace the vector PHI node
133 // just before the current PHI node.
134 PHINode *scalarPHI = cast<PHINode>(Val: InsertNewInstWith(
135 New: PHINode::Create(Ty: EI.getType(), NumReservedValues: PN->getNumIncomingValues(), NameStr: ""), Old: PN->getIterator()));
136 // Scalarize each PHI operand. A switch may produce multiple edges from the
137 // same predecessor; reuse the scalar instruction for duplicate edges.
138 SmallDenseMap<BasicBlock *, Value *, 4> ScalarizedValues;
139 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
140 Value *PHIInVal = PN->getIncomingValue(i);
141 BasicBlock *inBB = PN->getIncomingBlock(i);
142 Value *Elt = EI.getIndexOperand();
143
144 // Reuse scalar value for duplicate edges from the same predecessor.
145 if (Value *Existing = ScalarizedValues.lookup(Val: inBB)) {
146 scalarPHI->addIncoming(V: Existing, BB: inBB);
147 continue;
148 }
149
150 Value *ScalarVal;
151 // If the operand is the PHI induction variable:
152 if (PHIInVal == PHIUser) {
153 // Scalarize the binary operation. One operand is the
154 // scalar PHI, and the other is extracted from the other
155 // vector operand.
156 BinaryOperator *B0 = cast<BinaryOperator>(Val: PHIUser);
157 unsigned opId = (B0->getOperand(i_nocapture: 0) == PN) ? 1 : 0;
158 Value *Op = InsertNewInstWith(
159 New: ExtractElementInst::Create(Vec: B0->getOperand(i_nocapture: opId), Idx: Elt,
160 NameStr: B0->getOperand(i_nocapture: opId)->getName() + ".Elt"),
161 Old: B0->getIterator());
162 // Preserve operand order for binary operation to preserve semantics of
163 // non-commutative operations.
164 Value *FirstOp = (B0->getOperand(i_nocapture: 0) == PN) ? scalarPHI : Op;
165 Value *SecondOp = (B0->getOperand(i_nocapture: 0) == PN) ? Op : scalarPHI;
166 ScalarVal = InsertNewInstWith(New: BinaryOperator::CreateWithCopiedFlags(
167 Opc: B0->getOpcode(), V1: FirstOp, V2: SecondOp, CopyO: B0),
168 Old: B0->getIterator());
169 } else {
170 // Scalarize PHI input:
171 Instruction *newEI = ExtractElementInst::Create(Vec: PHIInVal, Idx: Elt, NameStr: "");
172 // Insert the new instruction into the predecessor basic block.
173 Instruction *pos = dyn_cast<Instruction>(Val: PHIInVal);
174 BasicBlock::iterator InsertPos;
175 if (pos && !isa<PHINode>(Val: pos)) {
176 InsertPos = ++pos->getIterator();
177 } else {
178 InsertPos = inBB->getFirstInsertionPt();
179 }
180
181 ScalarVal = InsertNewInstWith(New: newEI, Old: InsertPos);
182 }
183
184 ScalarizedValues[inBB] = ScalarVal;
185 scalarPHI->addIncoming(V: ScalarVal, BB: inBB);
186 }
187
188 for (auto *E : Extracts) {
189 replaceInstUsesWith(I&: *E, V: scalarPHI);
190 // Add old extract to worklist for DCE.
191 addToWorklist(I: E);
192 }
193
194 return &EI;
195}
196
197Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
198 Value *X;
199 uint64_t ExtIndexC;
200 if (!match(V: Ext.getVectorOperand(), P: m_BitCast(Op: m_Value(V&: X))) ||
201 !match(V: Ext.getIndexOperand(), P: m_ConstantInt(V&: ExtIndexC)))
202 return nullptr;
203
204 ElementCount NumElts =
205 cast<VectorType>(Val: Ext.getVectorOperandType())->getElementCount();
206 Type *DestTy = Ext.getType();
207 unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
208 bool IsBigEndian = DL.isBigEndian();
209
210 // If we are casting an integer to vector and extracting a portion, that is
211 // a shift-right and truncate.
212 if (X->getType()->isIntegerTy()) {
213 assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
214 "Expected fixed vector type for bitcast from scalar integer");
215
216 // Big endian requires adjusting the extract index since MSB is at index 0.
217 // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
218 // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
219 if (IsBigEndian)
220 ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
221 unsigned ShiftAmountC = ExtIndexC * DestWidth;
222 if ((!ShiftAmountC ||
223 isDesirableIntType(BitWidth: X->getType()->getPrimitiveSizeInBits())) &&
224 Ext.getVectorOperand()->hasOneUse()) {
225 if (ShiftAmountC)
226 X = Builder.CreateLShr(LHS: X, RHS: ShiftAmountC, Name: "extelt.offset");
227 if (DestTy->isFloatingPointTy()) {
228 Type *DstIntTy = IntegerType::getIntNTy(C&: X->getContext(), N: DestWidth);
229 Value *Trunc = Builder.CreateTrunc(V: X, DestTy: DstIntTy);
230 return new BitCastInst(Trunc, DestTy);
231 }
232 return new TruncInst(X, DestTy);
233 }
234 }
235
236 if (!X->getType()->isVectorTy())
237 return nullptr;
238
239 // If this extractelement is using a bitcast from a vector of the same number
240 // of elements, see if we can find the source element from the source vector:
241 // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
242 auto *SrcTy = cast<VectorType>(Val: X->getType());
243 ElementCount NumSrcElts = SrcTy->getElementCount();
244 if (NumSrcElts == NumElts)
245 if (Value *Elt = findScalarElement(V: X, EltNo: ExtIndexC))
246 return new BitCastInst(Elt, DestTy);
247
248 assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
249 "Src and Dst must be the same sort of vector type");
250
251 // If the source elements are wider than the destination, try to shift and
252 // truncate a subset of scalar bits of an insert op.
253 if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
254 Value *Scalar;
255 Value *Vec;
256 uint64_t InsIndexC;
257 if (!match(V: X, P: m_InsertElt(Val: m_Value(V&: Vec), Elt: m_Value(V&: Scalar),
258 Idx: m_ConstantInt(V&: InsIndexC))))
259 return nullptr;
260
261 // The extract must be from the subset of vector elements that we inserted
262 // into. Example: if we inserted element 1 of a <2 x i64> and we are
263 // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
264 // of elements 4-7 of the bitcasted vector.
265 unsigned NarrowingRatio =
266 NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
267
268 if (ExtIndexC / NarrowingRatio != InsIndexC) {
269 // Remove insertelement, if we don't use the inserted element.
270 // extractelement (bitcast (insertelement (Vec, b)), a) ->
271 // extractelement (bitcast (Vec), a)
272 // FIXME: this should be removed to SimplifyDemandedVectorElts,
273 // once scale vectors are supported.
274 if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
275 Value *NewBC = Builder.CreateBitCast(V: Vec, DestTy: Ext.getVectorOperandType());
276 return ExtractElementInst::Create(Vec: NewBC, Idx: Ext.getIndexOperand());
277 }
278 return nullptr;
279 }
280
281 // We are extracting part of the original scalar. How that scalar is
282 // inserted into the vector depends on the endian-ness. Example:
283 // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
284 // +--+--+--+--+--+--+--+--+
285 // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
286 // extelt <4 x i16> V', 3: | |S2|S3|
287 // +--+--+--+--+--+--+--+--+
288 // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
289 // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
290 // In this example, we must right-shift little-endian. Big-endian is just a
291 // truncate.
292 unsigned Chunk = ExtIndexC % NarrowingRatio;
293 if (IsBigEndian)
294 Chunk = NarrowingRatio - 1 - Chunk;
295
296 // Bail out if this is an FP vector to FP vector sequence. That would take
297 // more instructions than we started with unless there is no shift, and it
298 // may not be handled as well in the backend.
299 bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
300 bool NeedDestBitcast = DestTy->isFloatingPointTy();
301 if (NeedSrcBitcast && NeedDestBitcast)
302 return nullptr;
303
304 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
305 unsigned ShAmt = Chunk * DestWidth;
306
307 // TODO: This limitation is more strict than necessary. We could sum the
308 // number of new instructions and subtract the number eliminated to know if
309 // we can proceed.
310 if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
311 if (NeedSrcBitcast || NeedDestBitcast)
312 return nullptr;
313
314 if (NeedSrcBitcast) {
315 Type *SrcIntTy = IntegerType::getIntNTy(C&: Scalar->getContext(), N: SrcWidth);
316 Scalar = Builder.CreateBitCast(V: Scalar, DestTy: SrcIntTy);
317 }
318
319 if (ShAmt) {
320 // Bail out if we could end with more instructions than we started with.
321 if (!Ext.getVectorOperand()->hasOneUse())
322 return nullptr;
323 Scalar = Builder.CreateLShr(LHS: Scalar, RHS: ShAmt);
324 }
325
326 if (NeedDestBitcast) {
327 Type *DestIntTy = IntegerType::getIntNTy(C&: Scalar->getContext(), N: DestWidth);
328 return new BitCastInst(Builder.CreateTrunc(V: Scalar, DestTy: DestIntTy), DestTy);
329 }
330 return new TruncInst(Scalar, DestTy);
331 }
332
333 return nullptr;
334}
335
336/// Find elements of V demanded by UserInstr. If returns false, we were not able
337/// to determine all elements.
338static bool findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr,
339 APInt &UnionUsedElts) {
340 unsigned VWidth = cast<FixedVectorType>(Val: V->getType())->getNumElements();
341
342 switch (UserInstr->getOpcode()) {
343 case Instruction::ExtractElement: {
344 ExtractElementInst *EEI = cast<ExtractElementInst>(Val: UserInstr);
345 assert(EEI->getVectorOperand() == V);
346 ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(Val: EEI->getIndexOperand());
347 if (EEIIndexC && EEIIndexC->getValue().ult(RHS: VWidth)) {
348 UnionUsedElts.setBit(EEIIndexC->getZExtValue());
349 return true;
350 }
351 break;
352 }
353 case Instruction::ShuffleVector: {
354 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(Val: UserInstr);
355 unsigned MaskNumElts =
356 cast<FixedVectorType>(Val: UserInstr->getType())->getNumElements();
357
358 for (auto I : llvm::seq(Size: MaskNumElts)) {
359 unsigned MaskVal = Shuffle->getMaskValue(Elt: I);
360 if (MaskVal == -1u || MaskVal >= 2 * VWidth)
361 continue;
362 if (Shuffle->getOperand(i_nocapture: 0) == V && (MaskVal < VWidth))
363 UnionUsedElts.setBit(MaskVal);
364 if (Shuffle->getOperand(i_nocapture: 1) == V &&
365 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
366 UnionUsedElts.setBit(MaskVal - VWidth);
367 }
368 return true;
369 }
370 default:
371 break;
372 }
373
374 return false;
375}
376
377/// Find union of elements of V demanded by all its users.
378/// If it is known by querying findDemandedEltsBySingleUser that
379/// no user demands an element of V, then the corresponding bit
380/// remains unset in the returned value.
381static APInt findDemandedEltsByAllUsers(Value *V) {
382 unsigned VWidth = cast<FixedVectorType>(Val: V->getType())->getNumElements();
383
384 APInt UnionUsedElts(VWidth, 0);
385 for (const Use &U : V->uses()) {
386 if (Instruction *I = dyn_cast<Instruction>(Val: U.getUser())) {
387 if (!findDemandedEltsBySingleUser(V, UserInstr: I, UnionUsedElts))
388 return APInt::getAllOnes(numBits: VWidth);
389 } else {
390 UnionUsedElts = APInt::getAllOnes(numBits: VWidth);
391 break;
392 }
393
394 if (UnionUsedElts.isAllOnes())
395 break;
396 }
397
398 return UnionUsedElts;
399}
400
401/// Given a constant index for a extractelement or insertelement instruction,
402/// return it with the canonical type if it isn't already canonical. We
403/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
404/// matter, we just want a consistent type to simplify CSE.
405static ConstantInt *getPreferredVectorIndex(ConstantInt *IndexC) {
406 const unsigned IndexBW = IndexC->getBitWidth();
407 if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
408 return nullptr;
409 return ConstantInt::get(Context&: IndexC->getContext(),
410 V: IndexC->getValue().zextOrTrunc(width: 64));
411}
412
413Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
414 Value *SrcVec = EI.getVectorOperand();
415 Value *Index = EI.getIndexOperand();
416 if (Value *V = simplifyExtractElementInst(Vec: SrcVec, Idx: Index,
417 Q: SQ.getWithInstruction(I: &EI)))
418 return replaceInstUsesWith(I&: EI, V);
419
420 // extractelt (select %x, %vec1, %vec2), %const ->
421 // select %x, %vec1[%const], %vec2[%const]
422 // TODO: Support constant folding of multiple select operands:
423 // extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)
424 // If the extractelement will for instance try to do out of bounds accesses
425 // because of the values of %c1 and/or %c2, the sequence could be optimized
426 // early. This is currently not possible because constant folding will reach
427 // an unreachable assertion if it doesn't find a constant operand.
428 if (SelectInst *SI = dyn_cast<SelectInst>(Val: EI.getVectorOperand()))
429 if (SI->getCondition()->getType()->isIntegerTy() &&
430 isa<Constant>(Val: EI.getIndexOperand()))
431 if (Instruction *R = FoldOpIntoSelect(Op&: EI, SI))
432 return R;
433
434 // If extracting a specified index from the vector, see if we can recursively
435 // find a previously computed scalar that was inserted into the vector.
436 auto *IndexC = dyn_cast<ConstantInt>(Val: Index);
437 bool HasKnownValidIndex = false;
438 if (IndexC) {
439 // Canonicalize type of constant indices to i64 to simplify CSE
440 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
441 return replaceOperand(I&: EI, OpNum: 1, V: NewIdx);
442
443 ElementCount EC = EI.getVectorOperandType()->getElementCount();
444 unsigned NumElts = EC.getKnownMinValue();
445 HasKnownValidIndex = IndexC->getValue().ult(RHS: NumElts);
446
447 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: SrcVec)) {
448 Intrinsic::ID IID = II->getIntrinsicID();
449 // Index needs to be lower than the minimum size of the vector, because
450 // for scalable vector, the vector size is known at run time.
451 if (IID == Intrinsic::stepvector && IndexC->getValue().ult(RHS: NumElts)) {
452 Type *Ty = EI.getType();
453 unsigned BitWidth = Ty->getIntegerBitWidth();
454 Value *Idx;
455 // Return index when its value does not exceed the allowed limit
456 // for the element type of the vector.
457 // TODO: Truncate out-of-range values.
458 if (IndexC->getValue().getActiveBits() <= BitWidth)
459 Idx = ConstantInt::get(Ty, V: IndexC->getValue().zextOrTrunc(width: BitWidth));
460 else
461 return nullptr;
462 return replaceInstUsesWith(I&: EI, V: Idx);
463 }
464 }
465
466 // InstSimplify should handle cases where the index is invalid.
467 // For fixed-length vector, it's invalid to extract out-of-range element.
468 if (!EC.isScalable() && IndexC->getValue().uge(RHS: NumElts))
469 return nullptr;
470
471 if (Instruction *I = foldBitcastExtElt(Ext&: EI))
472 return I;
473
474 // If there's a vector PHI feeding a scalar use through this extractelement
475 // instruction, try to scalarize the PHI.
476 if (auto *Phi = dyn_cast<PHINode>(Val: SrcVec))
477 if (Instruction *ScalarPHI = scalarizePHI(EI, PN: Phi))
478 return ScalarPHI;
479 }
480
481 // If SrcVec is a subvector starting at index 0, extract from the
482 // wider source vector
483 Value *V;
484 if (match(V: SrcVec,
485 P: m_Intrinsic<Intrinsic::vector_extract>(Op0: m_Value(V), Op1: m_Zero())))
486 return ExtractElementInst::Create(Vec: V, Idx: Index);
487
488 // TODO come up with a n-ary matcher that subsumes both unary and
489 // binary matchers.
490 UnaryOperator *UO;
491 if (match(V: SrcVec, P: m_UnOp(I&: UO)) && cheapToScalarize(V: SrcVec, EI: Index)) {
492 // extelt (unop X), Index --> unop (extelt X, Index)
493 Value *X = UO->getOperand(i_nocapture: 0);
494 Value *E = Builder.CreateExtractElement(Vec: X, Idx: Index);
495 return UnaryOperator::CreateWithCopiedFlags(Opc: UO->getOpcode(), V: E, CopyO: UO);
496 }
497
498 // If the binop is not speculatable, we cannot hoist the extractelement if
499 // it may make the operand poison.
500 BinaryOperator *BO;
501 if (match(V: SrcVec, P: m_BinOp(I&: BO)) && cheapToScalarize(V: SrcVec, EI: Index) &&
502 (HasKnownValidIndex ||
503 isSafeToSpeculativelyExecuteWithVariableReplaced(I: BO))) {
504 // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
505 Value *X = BO->getOperand(i_nocapture: 0), *Y = BO->getOperand(i_nocapture: 1);
506 Value *E0 = Builder.CreateExtractElement(Vec: X, Idx: Index);
507 Value *E1 = Builder.CreateExtractElement(Vec: Y, Idx: Index);
508 return BinaryOperator::CreateWithCopiedFlags(Opc: BO->getOpcode(), V1: E0, V2: E1, CopyO: BO);
509 }
510
511 Value *X, *Y;
512 CmpPredicate Pred;
513 if (match(V: SrcVec, P: m_Cmp(Pred, L: m_Value(V&: X), R: m_Value(V&: Y))) &&
514 cheapToScalarize(V: SrcVec, EI: Index)) {
515 // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
516 Value *E0 = Builder.CreateExtractElement(Vec: X, Idx: Index);
517 Value *E1 = Builder.CreateExtractElement(Vec: Y, Idx: Index);
518 CmpInst *SrcCmpInst = cast<CmpInst>(Val: SrcVec);
519 return CmpInst::CreateWithCopiedFlags(Op: SrcCmpInst->getOpcode(), Pred, S1: E0, S2: E1,
520 FlagsSource: SrcCmpInst);
521 }
522
523 if (auto *I = dyn_cast<Instruction>(Val: SrcVec)) {
524 if (auto *IE = dyn_cast<InsertElementInst>(Val: I)) {
525 // instsimplify already handled the case where the indices are constants
526 // and equal by value, if both are constants, they must not be the same
527 // value, extract from the pre-inserted value instead.
528 if (isa<Constant>(Val: IE->getOperand(i_nocapture: 2)) && IndexC)
529 return replaceOperand(I&: EI, OpNum: 0, V: IE->getOperand(i_nocapture: 0));
530 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: I)) {
531 auto *VecType = cast<VectorType>(Val: GEP->getType());
532 ElementCount EC = VecType->getElementCount();
533 uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
534 if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
535 // Find out why we have a vector result - these are a few examples:
536 // 1. We have a scalar pointer and a vector of indices, or
537 // 2. We have a vector of pointers and a scalar index, or
538 // 3. We have a vector of pointers and a vector of indices, etc.
539 // Here we only consider combining when there is exactly one vector
540 // operand, since the optimization is less obviously a win due to
541 // needing more than one extractelements.
542
543 unsigned VectorOps =
544 llvm::count_if(Range: GEP->operands(), P: [](const Value *V) {
545 return isa<VectorType>(Val: V->getType());
546 });
547 if (VectorOps == 1) {
548 Value *NewPtr = GEP->getPointerOperand();
549 if (isa<VectorType>(Val: NewPtr->getType()))
550 NewPtr = Builder.CreateExtractElement(Vec: NewPtr, Idx: IndexC);
551
552 SmallVector<Value *> NewOps;
553 for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
554 Value *Op = GEP->getOperand(i_nocapture: I);
555 if (isa<VectorType>(Val: Op->getType()))
556 NewOps.push_back(Elt: Builder.CreateExtractElement(Vec: Op, Idx: IndexC));
557 else
558 NewOps.push_back(Elt: Op);
559 }
560
561 GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
562 PointeeType: GEP->getSourceElementType(), Ptr: NewPtr, IdxList: NewOps);
563 NewGEP->setNoWrapFlags(GEP->getNoWrapFlags());
564 return NewGEP;
565 }
566 }
567 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(Val: I)) {
568 int SplatIndex = getSplatIndex(Mask: SVI->getShuffleMask());
569 // We know the all-0 splat must be reading from the first operand, even
570 // in the case of scalable vectors (vscale is always > 0).
571 if (SplatIndex == 0)
572 return ExtractElementInst::Create(Vec: SVI->getOperand(i_nocapture: 0),
573 Idx: Builder.getInt64(C: 0));
574
575 if (isa<FixedVectorType>(Val: SVI->getType())) {
576 std::optional<int> SrcIdx;
577 // getSplatIndex returns -1 to mean not-found.
578 if (SplatIndex != -1)
579 SrcIdx = SplatIndex;
580 else if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: Index))
581 SrcIdx = SVI->getMaskValue(Elt: CI->getZExtValue());
582
583 if (SrcIdx) {
584 Value *Src;
585 unsigned LHSWidth =
586 cast<FixedVectorType>(Val: SVI->getOperand(i_nocapture: 0)->getType())
587 ->getNumElements();
588
589 if (*SrcIdx < 0)
590 return replaceInstUsesWith(I&: EI, V: PoisonValue::get(T: EI.getType()));
591 if (*SrcIdx < (int)LHSWidth)
592 Src = SVI->getOperand(i_nocapture: 0);
593 else {
594 *SrcIdx -= LHSWidth;
595 Src = SVI->getOperand(i_nocapture: 1);
596 }
597 Type *Int64Ty = Type::getInt64Ty(C&: EI.getContext());
598 return ExtractElementInst::Create(
599 Vec: Src, Idx: ConstantInt::get(Ty: Int64Ty, V: *SrcIdx, IsSigned: false));
600 }
601 }
602 } else if (auto *CI = dyn_cast<CastInst>(Val: I)) {
603 // Canonicalize extractelement(cast) -> cast(extractelement).
604 // Bitcasts can change the number of vector elements, and they cost
605 // nothing.
606 // If the CI has only one use, but that use is inside a loop, this
607 // canonicalization is not profitable because it would turn a vector
608 // operation into scalar operations inside the loop. Apply the transform
609 // when:
610 // - the index is constant and CI has one use, or
611 // - the CI and EI are in the same basic block, so the cast won't be sunk
612 // into a loop.
613 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast) &&
614 (EI.getParent() == CI->getParent() || isa<ConstantInt>(Val: Index))) {
615 Value *EE = Builder.CreateExtractElement(Vec: CI->getOperand(i_nocapture: 0), Idx: Index);
616 return CastInst::Create(CI->getOpcode(), S: EE, Ty: EI.getType());
617 }
618 }
619 }
620
621 // Run demanded elements after other transforms as this can drop flags on
622 // binops. If there's two paths to the same final result, we prefer the
623 // one which doesn't force us to drop flags.
624 if (IndexC) {
625 ElementCount EC = EI.getVectorOperandType()->getElementCount();
626 unsigned NumElts = EC.getKnownMinValue();
627 // This instruction only demands the single element from the input vector.
628 // Skip for scalable type, the number of elements is unknown at
629 // compile-time.
630 if (!EC.isScalable() && NumElts != 1) {
631 // If the input vector has a single use, simplify it based on this use
632 // property.
633 if (SrcVec->hasOneUse()) {
634 APInt PoisonElts(NumElts, 0);
635 APInt DemandedElts(NumElts, 0);
636 DemandedElts.setBit(IndexC->getZExtValue());
637 if (Value *V =
638 SimplifyDemandedVectorElts(V: SrcVec, DemandedElts, PoisonElts))
639 return replaceOperand(I&: EI, OpNum: 0, V);
640 } else {
641 // If the input vector has multiple uses, simplify it based on a union
642 // of all elements used.
643 APInt DemandedElts = findDemandedEltsByAllUsers(V: SrcVec);
644 if (!DemandedElts.isAllOnes()) {
645 APInt PoisonElts(NumElts, 0);
646 if (Value *V = SimplifyDemandedVectorElts(
647 V: SrcVec, DemandedElts, PoisonElts, Depth: 0 /* Depth */,
648 AllowMultipleUsers: true /* AllowMultipleUsers */)) {
649 if (V != SrcVec) {
650 Worklist.addValue(V: SrcVec);
651 SrcVec->replaceAllUsesWith(V);
652 return &EI;
653 }
654 }
655 }
656 }
657 }
658 }
659 return nullptr;
660}
661
662/// If V is a shuffle of values that ONLY returns elements from either LHS or
663/// RHS, return the shuffle mask and true. Otherwise, return false.
664static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
665 SmallVectorImpl<int> &Mask) {
666 assert(LHS->getType() == RHS->getType() &&
667 "Invalid CollectSingleShuffleElements");
668 unsigned NumElts = cast<FixedVectorType>(Val: V->getType())->getNumElements();
669
670 if (match(V, P: m_Poison())) {
671 Mask.assign(NumElts, Elt: -1);
672 return true;
673 }
674
675 if (V == LHS) {
676 for (unsigned i = 0; i != NumElts; ++i)
677 Mask.push_back(Elt: i);
678 return true;
679 }
680
681 if (V == RHS) {
682 for (unsigned i = 0; i != NumElts; ++i)
683 Mask.push_back(Elt: i + NumElts);
684 return true;
685 }
686
687 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(Val: V)) {
688 // If this is an insert of an extract from some other vector, include it.
689 Value *VecOp = IEI->getOperand(i_nocapture: 0);
690 Value *ScalarOp = IEI->getOperand(i_nocapture: 1);
691 Value *IdxOp = IEI->getOperand(i_nocapture: 2);
692
693 if (!isa<ConstantInt>(Val: IdxOp))
694 return false;
695 unsigned InsertedIdx = cast<ConstantInt>(Val: IdxOp)->getZExtValue();
696
697 if (isa<PoisonValue>(Val: ScalarOp)) { // inserting poison into vector.
698 // We can handle this if the vector we are inserting into is
699 // transitively ok.
700 if (collectSingleShuffleElements(V: VecOp, LHS, RHS, Mask)) {
701 // If so, update the mask to reflect the inserted poison.
702 Mask[InsertedIdx] = -1;
703 return true;
704 }
705 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(Val: ScalarOp)){
706 if (isa<ConstantInt>(Val: EI->getOperand(i_nocapture: 1))) {
707 unsigned ExtractedIdx =
708 cast<ConstantInt>(Val: EI->getOperand(i_nocapture: 1))->getZExtValue();
709 unsigned NumLHSElts =
710 cast<FixedVectorType>(Val: LHS->getType())->getNumElements();
711
712 // This must be extracting from either LHS or RHS.
713 if (EI->getOperand(i_nocapture: 0) == LHS || EI->getOperand(i_nocapture: 0) == RHS) {
714 // We can handle this if the vector we are inserting into is
715 // transitively ok.
716 if (collectSingleShuffleElements(V: VecOp, LHS, RHS, Mask)) {
717 // If so, update the mask to reflect the inserted value.
718 if (EI->getOperand(i_nocapture: 0) == LHS) {
719 Mask[InsertedIdx % NumElts] = ExtractedIdx;
720 } else {
721 assert(EI->getOperand(0) == RHS);
722 Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
723 }
724 return true;
725 }
726 }
727 }
728 }
729 }
730
731 return false;
732}
733
734/// If we have insertion into a vector that is wider than the vector that we
735/// are extracting from, try to widen the source vector to allow a single
736/// shufflevector to replace one or more insert/extract pairs.
737static bool replaceExtractElements(InsertElementInst *InsElt,
738 ExtractElementInst *ExtElt,
739 InstCombinerImpl &IC) {
740 auto *InsVecType = cast<FixedVectorType>(Val: InsElt->getType());
741 auto *ExtVecType = cast<FixedVectorType>(Val: ExtElt->getVectorOperandType());
742 unsigned NumInsElts = InsVecType->getNumElements();
743 unsigned NumExtElts = ExtVecType->getNumElements();
744
745 // The inserted-to vector must be wider than the extracted-from vector.
746 if (InsVecType->getElementType() != ExtVecType->getElementType() ||
747 NumExtElts >= NumInsElts)
748 return false;
749
750 Value *ExtVecOp = ExtElt->getVectorOperand();
751 // Bail out on constant vectors.
752 if (isa<ConstantData>(Val: ExtVecOp))
753 return false;
754
755 // Create a shuffle mask to widen the extended-from vector using poison
756 // values. The mask selects all of the values of the original vector followed
757 // by as many poison values as needed to create a vector of the same length
758 // as the inserted-to vector.
759 SmallVector<int, 16> ExtendMask;
760 for (unsigned i = 0; i < NumExtElts; ++i)
761 ExtendMask.push_back(Elt: i);
762 for (unsigned i = NumExtElts; i < NumInsElts; ++i)
763 ExtendMask.push_back(Elt: -1);
764
765 auto *ExtVecOpInst = dyn_cast<Instruction>(Val: ExtVecOp);
766 BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(Val: ExtVecOpInst))
767 ? ExtVecOpInst->getParent()
768 : ExtElt->getParent();
769
770 // TODO: This restriction matches the basic block check below when creating
771 // new extractelement instructions. If that limitation is removed, this one
772 // could also be removed. But for now, we just bail out to ensure that we
773 // will replace the extractelement instruction that is feeding our
774 // insertelement instruction. This allows the insertelement to then be
775 // replaced by a shufflevector. If the insertelement is not replaced, we can
776 // induce infinite looping because there's an optimization for extractelement
777 // that will delete our widening shuffle. This would trigger another attempt
778 // here to create that shuffle, and we spin forever.
779 if (InsertionBlock != InsElt->getParent())
780 return false;
781
782 // TODO: This restriction matches the check in visitInsertElementInst() and
783 // prevents an infinite loop caused by not turning the extract/insert pair
784 // into a shuffle. We really should not need either check, but we're lacking
785 // folds for shufflevectors because we're afraid to generate shuffle masks
786 // that the backend can't handle.
787 if (InsElt->hasOneUse() && isa<InsertElementInst>(Val: InsElt->user_back()))
788 return false;
789
790 auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);
791
792 // Insert the new shuffle after the vector operand of the extract is defined
793 // (as long as it's not a PHI) or at the start of the basic block of the
794 // extract, so any subsequent extracts in the same basic block can use it.
795 // TODO: Insert before the earliest ExtractElementInst that is replaced.
796 if (ExtVecOpInst && !isa<PHINode>(Val: ExtVecOpInst))
797 WideVec->insertAfter(InsertPos: ExtVecOpInst->getIterator());
798 else
799 IC.InsertNewInstWith(New: WideVec, Old: ExtElt->getParent()->getFirstInsertionPt());
800
801 // Replace extracts from the original narrow vector with extracts from the new
802 // wide vector.
803 for (User *U : ExtVecOp->users()) {
804 ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(Val: U);
805 if (!OldExt || OldExt->getParent() != WideVec->getParent())
806 continue;
807 auto *NewExt = ExtractElementInst::Create(Vec: WideVec, Idx: OldExt->getOperand(i_nocapture: 1));
808 IC.InsertNewInstWith(New: NewExt, Old: OldExt->getIterator());
809 IC.replaceInstUsesWith(I&: *OldExt, V: NewExt);
810 // Add the old extracts to the worklist for DCE. We can't remove the
811 // extracts directly, because they may still be used by the calling code.
812 IC.addToWorklist(I: OldExt);
813 }
814
815 return true;
816}
817
818/// We are building a shuffle to create V, which is a sequence of insertelement,
819/// extractelement pairs. If PermittedRHS is set, then we must either use it or
820/// not rely on the second vector source. Return a std::pair containing the
821/// left and right vectors of the proposed shuffle (or 0), and set the Mask
822/// parameter as required.
823///
824/// Note: we intentionally don't try to fold earlier shuffles since they have
825/// often been chosen carefully to be efficiently implementable on the target.
826using ShuffleOps = std::pair<Value *, Value *>;
827
828static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask,
829 Value *PermittedRHS,
830 InstCombinerImpl &IC, bool &Rerun) {
831 assert(V->getType()->isVectorTy() && "Invalid shuffle!");
832 unsigned NumElts = cast<FixedVectorType>(Val: V->getType())->getNumElements();
833
834 if (match(V, P: m_Poison())) {
835 Mask.assign(NumElts, Elt: -1);
836 return std::make_pair(
837 x: PermittedRHS ? PoisonValue::get(T: PermittedRHS->getType()) : V, y: nullptr);
838 }
839
840 if (isa<ConstantAggregateZero>(Val: V)) {
841 Mask.assign(NumElts, Elt: 0);
842 return std::make_pair(x&: V, y: nullptr);
843 }
844
845 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(Val: V)) {
846 // If this is an insert of an extract from some other vector, include it.
847 Value *VecOp = IEI->getOperand(i_nocapture: 0);
848 Value *ScalarOp = IEI->getOperand(i_nocapture: 1);
849 Value *IdxOp = IEI->getOperand(i_nocapture: 2);
850
851 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(Val: ScalarOp)) {
852 if (isa<ConstantInt>(Val: EI->getOperand(i_nocapture: 1)) && isa<ConstantInt>(Val: IdxOp)) {
853 unsigned ExtractedIdx =
854 cast<ConstantInt>(Val: EI->getOperand(i_nocapture: 1))->getZExtValue();
855 unsigned InsertedIdx = cast<ConstantInt>(Val: IdxOp)->getZExtValue();
856
857 // Either the extracted from or inserted into vector must be RHSVec,
858 // otherwise we'd end up with a shuffle of three inputs.
859 if (EI->getOperand(i_nocapture: 0) == PermittedRHS || PermittedRHS == nullptr) {
860 Value *RHS = EI->getOperand(i_nocapture: 0);
861 ShuffleOps LR = collectShuffleElements(V: VecOp, Mask, PermittedRHS: RHS, IC, Rerun);
862 assert(LR.second == nullptr || LR.second == RHS);
863
864 if (LR.first->getType() != RHS->getType()) {
865 // Although we are giving up for now, see if we can create extracts
866 // that match the inserts for another round of combining.
867 if (replaceExtractElements(InsElt: IEI, ExtElt: EI, IC))
868 Rerun = true;
869
870 // We tried our best, but we can't find anything compatible with RHS
871 // further up the chain. Return a trivial shuffle.
872 for (unsigned i = 0; i < NumElts; ++i)
873 Mask[i] = i;
874 return std::make_pair(x&: V, y: nullptr);
875 }
876
877 unsigned NumLHSElts =
878 cast<FixedVectorType>(Val: RHS->getType())->getNumElements();
879 Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
880 return std::make_pair(x&: LR.first, y&: RHS);
881 }
882
883 if (VecOp == PermittedRHS) {
884 // We've gone as far as we can: anything on the other side of the
885 // extractelement will already have been converted into a shuffle.
886 unsigned NumLHSElts =
887 cast<FixedVectorType>(Val: EI->getOperand(i_nocapture: 0)->getType())
888 ->getNumElements();
889 for (unsigned i = 0; i != NumElts; ++i)
890 Mask.push_back(Elt: i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
891 return std::make_pair(x: EI->getOperand(i_nocapture: 0), y&: PermittedRHS);
892 }
893
894 // If this insertelement is a chain that comes from exactly these two
895 // vectors, return the vector and the effective shuffle.
896 if (EI->getOperand(i_nocapture: 0)->getType() == PermittedRHS->getType() &&
897 collectSingleShuffleElements(V: IEI, LHS: EI->getOperand(i_nocapture: 0), RHS: PermittedRHS,
898 Mask))
899 return std::make_pair(x: EI->getOperand(i_nocapture: 0), y&: PermittedRHS);
900 }
901 }
902 }
903
904 // Otherwise, we can't do anything fancy. Return an identity vector.
905 for (unsigned i = 0; i != NumElts; ++i)
906 Mask.push_back(Elt: i);
907 return std::make_pair(x&: V, y: nullptr);
908}
909
910/// Look for chain of insertvalue's that fully define an aggregate, and trace
911/// back the values inserted, see if they are all were extractvalue'd from
912/// the same source aggregate from the exact same element indexes.
913/// If they were, just reuse the source aggregate.
914/// This potentially deals with PHI indirections.
915Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
916 InsertValueInst &OrigIVI) {
917 Type *AggTy = OrigIVI.getType();
918 unsigned NumAggElts;
919 switch (AggTy->getTypeID()) {
920 case Type::StructTyID:
921 NumAggElts = AggTy->getStructNumElements();
922 break;
923 case Type::ArrayTyID:
924 NumAggElts = AggTy->getArrayNumElements();
925 break;
926 default:
927 llvm_unreachable("Unhandled aggregate type?");
928 }
929
930 // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
931 // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
932 // FIXME: any interesting patterns to be caught with larger limit?
933 assert(NumAggElts > 0 && "Aggregate should have elements.");
934 if (NumAggElts > 2)
935 return nullptr;
936
937 static constexpr auto NotFound = std::nullopt;
938 static constexpr auto FoundMismatch = nullptr;
939
940 // Try to find a value of each element of an aggregate.
941 // FIXME: deal with more complex, not one-dimensional, aggregate types
942 SmallVector<std::optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
943
944 // Do we know values for each element of the aggregate?
945 auto KnowAllElts = [&AggElts]() {
946 return !llvm::is_contained(Range&: AggElts, Element: NotFound);
947 };
948
949 int Depth = 0;
950
951 // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
952 // every element being overwritten twice, which should never happen.
953 static const int DepthLimit = 2 * NumAggElts;
954
955 // Recurse up the chain of `insertvalue` aggregate operands until either we've
956 // reconstructed full initializer or can't visit any more `insertvalue`'s.
957 for (InsertValueInst *CurrIVI = &OrigIVI;
958 Depth < DepthLimit && CurrIVI && !KnowAllElts();
959 CurrIVI = dyn_cast<InsertValueInst>(Val: CurrIVI->getAggregateOperand()),
960 ++Depth) {
961 auto *InsertedValue =
962 dyn_cast<Instruction>(Val: CurrIVI->getInsertedValueOperand());
963 if (!InsertedValue)
964 return nullptr; // Inserted value must be produced by an instruction.
965
966 ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
967
968 // Don't bother with more than single-level aggregates.
969 if (Indices.size() != 1)
970 return nullptr; // FIXME: deal with more complex aggregates?
971
972 // Now, we may have already previously recorded the value for this element
973 // of an aggregate. If we did, that means the CurrIVI will later be
974 // overwritten with the already-recorded value. But if not, let's record it!
975 std::optional<Instruction *> &Elt = AggElts[Indices.front()];
976 Elt = Elt.value_or(u&: InsertedValue);
977
978 // FIXME: should we handle chain-terminating undef base operand?
979 }
980
981 // Was that sufficient to deduce the full initializer for the aggregate?
982 if (!KnowAllElts())
983 return nullptr; // Give up then.
984
985 // We now want to find the source[s] of the aggregate elements we've found.
986 // And with "source" we mean the original aggregate[s] from which
987 // the inserted elements were extracted. This may require PHI translation.
988
989 enum class AggregateDescription {
990 /// When analyzing the value that was inserted into an aggregate, we did
991 /// not manage to find defining `extractvalue` instruction to analyze.
992 NotFound,
993 /// When analyzing the value that was inserted into an aggregate, we did
994 /// manage to find defining `extractvalue` instruction[s], and everything
995 /// matched perfectly - aggregate type, element insertion/extraction index.
996 Found,
997 /// When analyzing the value that was inserted into an aggregate, we did
998 /// manage to find defining `extractvalue` instruction, but there was
999 /// a mismatch: either the source type from which the extraction was didn't
1000 /// match the aggregate type into which the insertion was,
1001 /// or the extraction/insertion channels mismatched,
1002 /// or different elements had different source aggregates.
1003 FoundMismatch
1004 };
1005 auto Describe = [](std::optional<Value *> SourceAggregate) {
1006 if (SourceAggregate == NotFound)
1007 return AggregateDescription::NotFound;
1008 if (*SourceAggregate == FoundMismatch)
1009 return AggregateDescription::FoundMismatch;
1010 return AggregateDescription::Found;
1011 };
1012
1013 // If an aggregate element is defined in UseBB, we can't use it in PredBB.
1014 bool EltDefinedInUseBB = false;
1015
1016 // Given the value \p Elt that was being inserted into element \p EltIdx of an
1017 // aggregate AggTy, see if \p Elt was originally defined by an
1018 // appropriate extractvalue (same element index, same aggregate type).
1019 // If found, return the source aggregate from which the extraction was.
1020 // If \p PredBB is provided, does PHI translation of an \p Elt first.
1021 auto FindSourceAggregate =
1022 [&](Instruction *Elt, unsigned EltIdx, std::optional<BasicBlock *> UseBB,
1023 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
1024 // For now(?), only deal with, at most, a single level of PHI indirection.
1025 if (UseBB && PredBB) {
1026 Elt = dyn_cast<Instruction>(Val: Elt->DoPHITranslation(CurBB: *UseBB, PredBB: *PredBB));
1027 if (Elt && Elt->getParent() == *UseBB)
1028 EltDefinedInUseBB = true;
1029 }
1030 // FIXME: deal with multiple levels of PHI indirection?
1031
1032 // Did we find an extraction?
1033 auto *EVI = dyn_cast_or_null<ExtractValueInst>(Val: Elt);
1034 if (!EVI)
1035 return NotFound;
1036
1037 Value *SourceAggregate = EVI->getAggregateOperand();
1038
1039 // Is the extraction from the same type into which the insertion was?
1040 if (SourceAggregate->getType() != AggTy)
1041 return FoundMismatch;
1042 // And the element index doesn't change between extraction and insertion?
1043 if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
1044 return FoundMismatch;
1045
1046 return SourceAggregate; // AggregateDescription::Found
1047 };
1048
1049 // Given elements AggElts that were constructing an aggregate OrigIVI,
1050 // see if we can find appropriate source aggregate for each of the elements,
1051 // and see it's the same aggregate for each element. If so, return it.
1052 auto FindCommonSourceAggregate =
1053 [&](std::optional<BasicBlock *> UseBB,
1054 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
1055 std::optional<Value *> SourceAggregate;
1056
1057 for (auto I : enumerate(First&: AggElts)) {
1058 assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
1059 "We don't store nullptr in SourceAggregate!");
1060 assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
1061 (I.index() != 0) &&
1062 "SourceAggregate should be valid after the first element,");
1063
1064 // For this element, is there a plausible source aggregate?
1065 // FIXME: we could special-case undef element, IFF we know that in the
1066 // source aggregate said element isn't poison.
1067 std::optional<Value *> SourceAggregateForElement =
1068 FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
1069
1070 // Okay, what have we found? Does that correlate with previous findings?
1071
1072 // Regardless of whether or not we have previously found source
1073 // aggregate for previous elements (if any), if we didn't find one for
1074 // this element, passthrough whatever we have just found.
1075 if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
1076 return SourceAggregateForElement;
1077
1078 // Okay, we have found source aggregate for this element.
1079 // Let's see what we already know from previous elements, if any.
1080 switch (Describe(SourceAggregate)) {
1081 case AggregateDescription::NotFound:
1082 // This is apparently the first element that we have examined.
1083 SourceAggregate = SourceAggregateForElement; // Record the aggregate!
1084 continue; // Great, now look at next element.
1085 case AggregateDescription::Found:
1086 // We have previously already successfully examined other elements.
1087 // Is this the same source aggregate we've found for other elements?
1088 if (*SourceAggregateForElement != *SourceAggregate)
1089 return FoundMismatch;
1090 continue; // Still the same aggregate, look at next element.
1091 case AggregateDescription::FoundMismatch:
1092 llvm_unreachable("Can't happen. We would have early-exited then.");
1093 };
1094 }
1095
1096 assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1097 "Must be a valid Value");
1098 return *SourceAggregate;
1099 };
1100
1101 std::optional<Value *> SourceAggregate;
1102
1103 // Can we find the source aggregate without looking at predecessors?
1104 SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/std::nullopt,
1105 /*PredBB=*/std::nullopt);
1106 if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
1107 if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
1108 return nullptr; // Conflicting source aggregates!
1109 ++NumAggregateReconstructionsSimplified;
1110 return replaceInstUsesWith(I&: OrigIVI, V: *SourceAggregate);
1111 }
1112
1113 // Okay, apparently we need to look at predecessors.
1114
1115 // We should be smart about picking the "use" basic block, which will be the
1116 // merge point for aggregate, where we'll insert the final PHI that will be
1117 // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
1118 // We should look in which blocks each of the AggElts is being defined,
1119 // they all should be defined in the same basic block.
1120 BasicBlock *UseBB = nullptr;
1121
1122 for (const std::optional<Instruction *> &I : AggElts) {
1123 BasicBlock *BB = (*I)->getParent();
1124 // If it's the first instruction we've encountered, record the basic block.
1125 if (!UseBB) {
1126 UseBB = BB;
1127 continue;
1128 }
1129 // Otherwise, this must be the same basic block we've seen previously.
1130 if (UseBB != BB)
1131 return nullptr;
1132 }
1133
1134 // If *all* of the elements are basic-block-independent, meaning they are
1135 // either function arguments, or constant expressions, then if we didn't
1136 // handle them without predecessor-aware handling, we won't handle them now.
1137 if (!UseBB)
1138 return nullptr;
1139
1140 // If we didn't manage to find source aggregate without looking at
1141 // predecessors, and there are no predecessors to look at, then we're done.
1142 if (pred_empty(BB: UseBB))
1143 return nullptr;
1144
1145 // Arbitrary predecessor count limit.
1146 static const int PredCountLimit = 64;
1147
1148 // Cache the (non-uniqified!) list of predecessors in a vector,
1149 // checking the limit at the same time for efficiency.
1150 SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
1151 for (BasicBlock *Pred : predecessors(BB: UseBB)) {
1152 // Don't bother if there are too many predecessors.
1153 if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1154 return nullptr;
1155 Preds.emplace_back(Args&: Pred);
1156 }
1157
1158 // For each predecessor, what is the source aggregate,
1159 // from which all the elements were originally extracted from?
1160 // Note that we want for the map to have stable iteration order!
1161 SmallMapVector<BasicBlock *, Value *, 4> SourceAggregates;
1162 bool FoundSrcAgg = false;
1163 for (BasicBlock *Pred : Preds) {
1164 std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1165 SourceAggregates.try_emplace(Key: Pred);
1166 // Did we already evaluate this predecessor?
1167 if (!IV.second)
1168 continue;
1169
1170 // Let's hope that when coming from predecessor Pred, all elements of the
1171 // aggregate produced by OrigIVI must have been originally extracted from
1172 // the same aggregate. Is that so? Can we find said original aggregate?
1173 SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1174 if (Describe(SourceAggregate) == AggregateDescription::Found) {
1175 FoundSrcAgg = true;
1176 IV.first->second = *SourceAggregate;
1177 } else {
1178 // If UseBB is the single successor of Pred, we can add InsertValue to
1179 // Pred.
1180 auto *BI = dyn_cast<UncondBrInst>(Val: Pred->getTerminator());
1181 if (!BI)
1182 return nullptr;
1183 }
1184 }
1185
1186 if (!FoundSrcAgg)
1187 return nullptr;
1188
1189 // Do some sanity check if we need to add insertvalue into predecessors.
1190 auto OrigBB = OrigIVI.getParent();
1191 for (auto &It : SourceAggregates) {
1192 if (Describe(It.second) == AggregateDescription::Found)
1193 continue;
1194
1195 // Element is defined in UseBB, so it can't be used in predecessors.
1196 if (EltDefinedInUseBB)
1197 return nullptr;
1198
1199 // Do this transformation cross loop boundary may cause dead loop. So we
1200 // should avoid this situation. But LoopInfo is not generally available, we
1201 // must be conservative here.
1202 // If OrigIVI is in UseBB and it's the only successor of PredBB, PredBB
1203 // can't be in inner loop.
1204 if (UseBB != OrigBB)
1205 return nullptr;
1206
1207 // Avoid constructing constant aggregate because constant value may expose
1208 // more optimizations.
1209 bool ConstAgg = true;
1210 for (auto Val : AggElts) {
1211 Value *Elt = (*Val)->DoPHITranslation(CurBB: UseBB, PredBB: It.first);
1212 if (!isa<Constant>(Val: Elt)) {
1213 ConstAgg = false;
1214 break;
1215 }
1216 }
1217 if (ConstAgg)
1218 return nullptr;
1219 }
1220
1221 // For predecessors without appropriate source aggregate, create one in the
1222 // predecessor.
1223 for (auto &It : SourceAggregates) {
1224 if (Describe(It.second) == AggregateDescription::Found)
1225 continue;
1226
1227 BasicBlock *Pred = It.first;
1228 Builder.SetInsertPoint(Pred->getTerminator());
1229 Value *V = PoisonValue::get(T: AggTy);
1230 for (auto [Idx, Val] : enumerate(First&: AggElts)) {
1231 Value *Elt = (*Val)->DoPHITranslation(CurBB: UseBB, PredBB: Pred);
1232 V = Builder.CreateInsertValue(Agg: V, Val: Elt, Idxs: Idx);
1233 }
1234
1235 It.second = V;
1236 }
1237
1238 // All good! Now we just need to thread the source aggregates here.
1239 // Note that we have to insert the new PHI here, ourselves, because we can't
1240 // rely on InstCombinerImpl::run() inserting it into the right basic block.
1241 // Note that the same block can be a predecessor more than once,
1242 // and we need to preserve that invariant for the PHI node.
1243 BuilderTy::InsertPointGuard Guard(Builder);
1244 Builder.SetInsertPoint(TheBB: UseBB, IP: UseBB->getFirstNonPHIIt());
1245 auto *PHI =
1246 Builder.CreatePHI(Ty: AggTy, NumReservedValues: Preds.size(), Name: OrigIVI.getName() + ".merged");
1247 for (BasicBlock *Pred : Preds)
1248 PHI->addIncoming(V: SourceAggregates[Pred], BB: Pred);
1249
1250 ++NumAggregateReconstructionsSimplified;
1251 return replaceInstUsesWith(I&: OrigIVI, V: PHI);
1252}
1253
1254/// Try to find redundant insertvalue instructions, like the following ones:
1255/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1256/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1257/// Here the second instruction inserts values at the same indices, as the
1258/// first one, making the first one redundant.
1259/// It should be transformed to:
1260/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1261Instruction *InstCombinerImpl::visitInsertValueInst(InsertValueInst &I) {
1262 if (Value *V = simplifyInsertValueInst(
1263 Agg: I.getAggregateOperand(), Val: I.getInsertedValueOperand(), Idxs: I.getIndices(),
1264 Q: SQ.getWithInstruction(I: &I)))
1265 return replaceInstUsesWith(I, V);
1266
1267 bool IsRedundant = false;
1268 ArrayRef<unsigned int> FirstIndices = I.getIndices();
1269
1270 // If there is a chain of insertvalue instructions (each of them except the
1271 // last one has only one use and it's another insertvalue insn from this
1272 // chain), check if any of the 'children' uses the same indices as the first
1273 // instruction. In this case, the first one is redundant.
1274 Value *V = &I;
1275 unsigned Depth = 0;
1276 while (V->hasOneUse() && Depth < 10) {
1277 User *U = V->user_back();
1278 auto UserInsInst = dyn_cast<InsertValueInst>(Val: U);
1279 if (!UserInsInst || U->getOperand(i: 0) != V)
1280 break;
1281 if (UserInsInst->getIndices() == FirstIndices) {
1282 IsRedundant = true;
1283 break;
1284 }
1285 V = UserInsInst;
1286 Depth++;
1287 }
1288
1289 if (IsRedundant)
1290 return replaceInstUsesWith(I, V: I.getOperand(i_nocapture: 0));
1291
1292 if (Instruction *NewI = foldAggregateConstructionIntoAggregateReuse(OrigIVI&: I))
1293 return NewI;
1294
1295 return nullptr;
1296}
1297
1298static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
1299 // Can not analyze scalable type, the number of elements is not a compile-time
1300 // constant.
1301 if (isa<ScalableVectorType>(Val: Shuf.getOperand(i_nocapture: 0)->getType()))
1302 return false;
1303
1304 int MaskSize = Shuf.getShuffleMask().size();
1305 int VecSize =
1306 cast<FixedVectorType>(Val: Shuf.getOperand(i_nocapture: 0)->getType())->getNumElements();
1307
1308 // A vector select does not change the size of the operands.
1309 if (MaskSize != VecSize)
1310 return false;
1311
1312 // Each mask element must be undefined or choose a vector element from one of
1313 // the source operands without crossing vector lanes.
1314 for (int i = 0; i != MaskSize; ++i) {
1315 int Elt = Shuf.getMaskValue(Elt: i);
1316 if (Elt != -1 && Elt != i && Elt != i + VecSize)
1317 return false;
1318 }
1319
1320 return true;
1321}
1322
1323/// Turn a chain of inserts that splats a value into an insert + shuffle:
1324/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1325/// shufflevector(insertelt(X, %k, 0), poison, zero)
1326static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
1327 // We are interested in the last insert in a chain. So if this insert has a
1328 // single user and that user is an insert, bail.
1329 if (InsElt.hasOneUse() && isa<InsertElementInst>(Val: InsElt.user_back()))
1330 return nullptr;
1331
1332 VectorType *VecTy = InsElt.getType();
1333 // Can not handle scalable type, the number of elements is not a compile-time
1334 // constant.
1335 if (isa<ScalableVectorType>(Val: VecTy))
1336 return nullptr;
1337 unsigned NumElements = cast<FixedVectorType>(Val: VecTy)->getNumElements();
1338
1339 // Do not try to do this for a one-element vector, since that's a nop,
1340 // and will cause an inf-loop.
1341 if (NumElements == 1)
1342 return nullptr;
1343
1344 Value *SplatVal = InsElt.getOperand(i_nocapture: 1);
1345 InsertElementInst *CurrIE = &InsElt;
1346 SmallBitVector ElementPresent(NumElements, false);
1347 InsertElementInst *FirstIE = nullptr;
1348
1349 // Walk the chain backwards, keeping track of which indices we inserted into,
1350 // until we hit something that isn't an insert of the splatted value.
1351 while (CurrIE) {
1352 auto *Idx = dyn_cast<ConstantInt>(Val: CurrIE->getOperand(i_nocapture: 2));
1353 if (!Idx || CurrIE->getOperand(i_nocapture: 1) != SplatVal)
1354 return nullptr;
1355
1356 auto *NextIE = dyn_cast<InsertElementInst>(Val: CurrIE->getOperand(i_nocapture: 0));
1357 // Check none of the intermediate steps have any additional uses, except
1358 // for the root insertelement instruction, which can be re-used, if it
1359 // inserts at position 0.
1360 if (CurrIE != &InsElt &&
1361 (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
1362 return nullptr;
1363
1364 ElementPresent[Idx->getZExtValue()] = true;
1365 FirstIE = CurrIE;
1366 CurrIE = NextIE;
1367 }
1368
1369 // If this is just a single insertelement (not a sequence), we are done.
1370 if (FirstIE == &InsElt)
1371 return nullptr;
1372
1373 // If we are not inserting into a poison vector, make sure we've seen an
1374 // insert into every element.
1375 // TODO: If the base vector is not undef, it might be better to create a splat
1376 // and then a select-shuffle (blend) with the base vector.
1377 if (!match(V: FirstIE->getOperand(i_nocapture: 0), P: m_Poison()))
1378 if (!ElementPresent.all())
1379 return nullptr;
1380
1381 // Create the insert + shuffle.
1382 Type *Int64Ty = Type::getInt64Ty(C&: InsElt.getContext());
1383 PoisonValue *PoisonVec = PoisonValue::get(T: VecTy);
1384 Constant *Zero = ConstantInt::get(Ty: Int64Ty, V: 0);
1385 if (!cast<ConstantInt>(Val: FirstIE->getOperand(i_nocapture: 2))->isZero())
1386 FirstIE = InsertElementInst::Create(Vec: PoisonVec, NewElt: SplatVal, Idx: Zero, NameStr: "",
1387 InsertBefore: InsElt.getIterator());
1388
1389 // Splat from element 0, but replace absent elements with poison in the mask.
1390 SmallVector<int, 16> Mask(NumElements, 0);
1391 for (unsigned i = 0; i != NumElements; ++i)
1392 if (!ElementPresent[i])
1393 Mask[i] = -1;
1394
1395 return new ShuffleVectorInst(FirstIE, Mask);
1396}
1397
1398/// Try to fold an insert element into an existing splat shuffle by changing
1399/// the shuffle's mask to include the index of this insert element.
1400static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
1401 // Check if the vector operand of this insert is a canonical splat shuffle.
1402 auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: InsElt.getOperand(i_nocapture: 0));
1403 if (!Shuf || !Shuf->isZeroEltSplat())
1404 return nullptr;
1405
1406 // Bail out early if shuffle is scalable type. The number of elements in
1407 // shuffle mask is unknown at compile-time.
1408 if (isa<ScalableVectorType>(Val: Shuf->getType()))
1409 return nullptr;
1410
1411 // Check for a constant insertion index.
1412 uint64_t IdxC;
1413 if (!match(V: InsElt.getOperand(i_nocapture: 2), P: m_ConstantInt(V&: IdxC)))
1414 return nullptr;
1415
1416 // Check if the splat shuffle's input is the same as this insert's scalar op.
1417 Value *X = InsElt.getOperand(i_nocapture: 1);
1418 Value *Op0 = Shuf->getOperand(i_nocapture: 0);
1419 if (!match(V: Op0, P: m_InsertElt(Val: m_Undef(), Elt: m_Specific(V: X), Idx: m_ZeroInt())))
1420 return nullptr;
1421
1422 // Replace the shuffle mask element at the index of this insert with a zero.
1423 // For example:
1424 // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1425 // --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1426 unsigned NumMaskElts =
1427 cast<FixedVectorType>(Val: Shuf->getType())->getNumElements();
1428 SmallVector<int, 16> NewMask(NumMaskElts);
1429 for (unsigned i = 0; i != NumMaskElts; ++i)
1430 NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(Elt: i);
1431
1432 return new ShuffleVectorInst(Op0, NewMask);
1433}
1434
1435/// Try to fold an extract+insert element into an existing identity shuffle by
1436/// changing the shuffle's mask to include the index of this insert element.
1437static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {
1438 // Check if the vector operand of this insert is an identity shuffle.
1439 auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: InsElt.getOperand(i_nocapture: 0));
1440 if (!Shuf || !match(V: Shuf->getOperand(i_nocapture: 1), P: m_Poison()) ||
1441 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1442 return nullptr;
1443
1444 // Bail out early if shuffle is scalable type. The number of elements in
1445 // shuffle mask is unknown at compile-time.
1446 if (isa<ScalableVectorType>(Val: Shuf->getType()))
1447 return nullptr;
1448
1449 // Check for a constant insertion index.
1450 uint64_t IdxC;
1451 if (!match(V: InsElt.getOperand(i_nocapture: 2), P: m_ConstantInt(V&: IdxC)))
1452 return nullptr;
1453
1454 // Check if this insert's scalar op is extracted from the identity shuffle's
1455 // input vector.
1456 Value *Scalar = InsElt.getOperand(i_nocapture: 1);
1457 Value *X = Shuf->getOperand(i_nocapture: 0);
1458 if (!match(V: Scalar, P: m_ExtractElt(Val: m_Specific(V: X), Idx: m_SpecificInt(V: IdxC))))
1459 return nullptr;
1460
1461 // Replace the shuffle mask element at the index of this extract+insert with
1462 // that same index value.
1463 // For example:
1464 // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1465 unsigned NumMaskElts =
1466 cast<FixedVectorType>(Val: Shuf->getType())->getNumElements();
1467 SmallVector<int, 16> NewMask(NumMaskElts);
1468 ArrayRef<int> OldMask = Shuf->getShuffleMask();
1469 for (unsigned i = 0; i != NumMaskElts; ++i) {
1470 if (i != IdxC) {
1471 // All mask elements besides the inserted element remain the same.
1472 NewMask[i] = OldMask[i];
1473 } else if (OldMask[i] == (int)IdxC) {
1474 // If the mask element was already set, there's nothing to do
1475 // (demanded elements analysis may unset it later).
1476 return nullptr;
1477 } else {
1478 assert(OldMask[i] == PoisonMaskElem &&
1479 "Unexpected shuffle mask element for identity shuffle");
1480 NewMask[i] = IdxC;
1481 }
1482 }
1483
1484 return new ShuffleVectorInst(X, Shuf->getOperand(i_nocapture: 1), NewMask);
1485}
1486
1487/// If we have an insertelement instruction feeding into another insertelement
1488/// and the 2nd is inserting a constant into the vector, canonicalize that
1489/// constant insertion before the insertion of a variable:
1490///
1491/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1492/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1493///
1494/// This has the potential of eliminating the 2nd insertelement instruction
1495/// via constant folding of the scalar constant into a vector constant.
1496static Instruction *hoistInsEltConst(InsertElementInst &InsElt2,
1497 InstCombiner::BuilderTy &Builder) {
1498 auto *InsElt1 = dyn_cast<InsertElementInst>(Val: InsElt2.getOperand(i_nocapture: 0));
1499 if (!InsElt1 || !InsElt1->hasOneUse())
1500 return nullptr;
1501
1502 Value *X, *Y;
1503 Constant *ScalarC;
1504 ConstantInt *IdxC1, *IdxC2;
1505 if (match(V: InsElt1->getOperand(i_nocapture: 0), P: m_Value(V&: X)) &&
1506 match(V: InsElt1->getOperand(i_nocapture: 1), P: m_Value(V&: Y)) && !isa<Constant>(Val: Y) &&
1507 match(V: InsElt1->getOperand(i_nocapture: 2), P: m_ConstantInt(CI&: IdxC1)) &&
1508 match(V: InsElt2.getOperand(i_nocapture: 1), P: m_Constant(C&: ScalarC)) &&
1509 match(V: InsElt2.getOperand(i_nocapture: 2), P: m_ConstantInt(CI&: IdxC2)) && IdxC1 != IdxC2) {
1510 Value *NewInsElt1 = Builder.CreateInsertElement(Vec: X, NewElt: ScalarC, Idx: IdxC2);
1511 return InsertElementInst::Create(Vec: NewInsElt1, NewElt: Y, Idx: IdxC1);
1512 }
1513
1514 return nullptr;
1515}
1516
1517/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1518/// --> shufflevector X, CVec', Mask'
1519static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
1520 auto *Inst = dyn_cast<Instruction>(Val: InsElt.getOperand(i_nocapture: 0));
1521 // Bail out if the parent has more than one use. In that case, we'd be
1522 // replacing the insertelt with a shuffle, and that's not a clear win.
1523 if (!Inst || !Inst->hasOneUse())
1524 return nullptr;
1525 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: InsElt.getOperand(i_nocapture: 0))) {
1526 // The shuffle must have a constant vector operand. The insertelt must have
1527 // a constant scalar being inserted at a constant position in the vector.
1528 Constant *ShufConstVec, *InsEltScalar;
1529 uint64_t InsEltIndex;
1530 if (!match(V: Shuf->getOperand(i_nocapture: 1), P: m_Constant(C&: ShufConstVec)) ||
1531 !match(V: InsElt.getOperand(i_nocapture: 1), P: m_Constant(C&: InsEltScalar)) ||
1532 !match(V: InsElt.getOperand(i_nocapture: 2), P: m_ConstantInt(V&: InsEltIndex)))
1533 return nullptr;
1534
1535 // Adding an element to an arbitrary shuffle could be expensive, but a
1536 // shuffle that selects elements from vectors without crossing lanes is
1537 // assumed cheap.
1538 // If we're just adding a constant into that shuffle, it will still be
1539 // cheap.
1540 if (!isShuffleEquivalentToSelect(Shuf&: *Shuf))
1541 return nullptr;
1542
1543 // From the above 'select' check, we know that the mask has the same number
1544 // of elements as the vector input operands. We also know that each constant
1545 // input element is used in its lane and can not be used more than once by
1546 // the shuffle. Therefore, replace the constant in the shuffle's constant
1547 // vector with the insertelt constant. Replace the constant in the shuffle's
1548 // mask vector with the insertelt index plus the length of the vector
1549 // (because the constant vector operand of a shuffle is always the 2nd
1550 // operand).
1551 ArrayRef<int> Mask = Shuf->getShuffleMask();
1552 unsigned NumElts = Mask.size();
1553 SmallVector<Constant *, 16> NewShufElts(NumElts);
1554 SmallVector<int, 16> NewMaskElts(NumElts);
1555 for (unsigned I = 0; I != NumElts; ++I) {
1556 if (I == InsEltIndex) {
1557 NewShufElts[I] = InsEltScalar;
1558 NewMaskElts[I] = InsEltIndex + NumElts;
1559 } else {
1560 // Copy over the existing values.
1561 NewShufElts[I] = ShufConstVec->getAggregateElement(Elt: I);
1562 NewMaskElts[I] = Mask[I];
1563 }
1564
1565 // Bail if we failed to find an element.
1566 if (!NewShufElts[I])
1567 return nullptr;
1568 }
1569
1570 // Create new operands for a shuffle that includes the constant of the
1571 // original insertelt. The old shuffle will be dead now.
1572 return new ShuffleVectorInst(Shuf->getOperand(i_nocapture: 0),
1573 ConstantVector::get(V: NewShufElts), NewMaskElts);
1574 } else if (auto *IEI = dyn_cast<InsertElementInst>(Val: Inst)) {
1575 // Transform sequences of insertelements ops with constant data/indexes into
1576 // a single shuffle op.
1577 // Can not handle scalable type, the number of elements needed to create
1578 // shuffle mask is not a compile-time constant.
1579 if (isa<ScalableVectorType>(Val: InsElt.getType()))
1580 return nullptr;
1581 unsigned NumElts =
1582 cast<FixedVectorType>(Val: InsElt.getType())->getNumElements();
1583
1584 uint64_t InsertIdx[2];
1585 Constant *Val[2];
1586 if (!match(V: InsElt.getOperand(i_nocapture: 2), P: m_ConstantInt(V&: InsertIdx[0])) ||
1587 !match(V: InsElt.getOperand(i_nocapture: 1), P: m_Constant(C&: Val[0])) ||
1588 !match(V: IEI->getOperand(i_nocapture: 2), P: m_ConstantInt(V&: InsertIdx[1])) ||
1589 !match(V: IEI->getOperand(i_nocapture: 1), P: m_Constant(C&: Val[1])))
1590 return nullptr;
1591 SmallVector<Constant *, 16> Values(NumElts);
1592 SmallVector<int, 16> Mask(NumElts);
1593 auto ValI = std::begin(arr&: Val);
1594 // Generate new constant vector and mask.
1595 // We have 2 values/masks from the insertelements instructions. Insert them
1596 // into new value/mask vectors.
1597 for (uint64_t I : InsertIdx) {
1598 if (!Values[I]) {
1599 Values[I] = *ValI;
1600 Mask[I] = NumElts + I;
1601 }
1602 ++ValI;
1603 }
1604 // Remaining values are filled with 'poison' values.
1605 for (unsigned I = 0; I < NumElts; ++I) {
1606 if (!Values[I]) {
1607 Values[I] = PoisonValue::get(T: InsElt.getType()->getElementType());
1608 Mask[I] = I;
1609 }
1610 }
1611 // Create new operands for a shuffle that includes the constant of the
1612 // original insertelt.
1613 return new ShuffleVectorInst(IEI->getOperand(i_nocapture: 0),
1614 ConstantVector::get(V: Values), Mask);
1615 }
1616 return nullptr;
1617}
1618
1619/// If both the base vector and the inserted element are extended from the same
1620/// type, do the insert element in the narrow source type followed by extend.
1621/// TODO: This can be extended to include other cast opcodes, but particularly
1622/// if we create a wider insertelement, make sure codegen is not harmed.
1623static Instruction *narrowInsElt(InsertElementInst &InsElt,
1624 InstCombiner::BuilderTy &Builder) {
1625 // We are creating a vector extend. If the original vector extend has another
1626 // use, that would mean we end up with 2 vector extends, so avoid that.
1627 // TODO: We could ease the use-clause to "if at least one op has one use"
1628 // (assuming that the source types match - see next TODO comment).
1629 Value *Vec = InsElt.getOperand(i_nocapture: 0);
1630 if (!Vec->hasOneUse())
1631 return nullptr;
1632
1633 Value *Scalar = InsElt.getOperand(i_nocapture: 1);
1634 Value *X, *Y;
1635 CastInst::CastOps CastOpcode;
1636 if (match(V: Vec, P: m_FPExt(Op: m_Value(V&: X))) && match(V: Scalar, P: m_FPExt(Op: m_Value(V&: Y))))
1637 CastOpcode = Instruction::FPExt;
1638 else if (match(V: Vec, P: m_SExt(Op: m_Value(V&: X))) && match(V: Scalar, P: m_SExt(Op: m_Value(V&: Y))))
1639 CastOpcode = Instruction::SExt;
1640 else if (match(V: Vec, P: m_ZExt(Op: m_Value(V&: X))) && match(V: Scalar, P: m_ZExt(Op: m_Value(V&: Y))))
1641 CastOpcode = Instruction::ZExt;
1642 else
1643 return nullptr;
1644
1645 // TODO: We can allow mismatched types by creating an intermediate cast.
1646 if (X->getType()->getScalarType() != Y->getType())
1647 return nullptr;
1648
1649 // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1650 Value *NewInsElt = Builder.CreateInsertElement(Vec: X, NewElt: Y, Idx: InsElt.getOperand(i_nocapture: 2));
1651 return CastInst::Create(CastOpcode, S: NewInsElt, Ty: InsElt.getType());
1652}
1653
1654/// If we are inserting 2 halves of a value into adjacent elements of a vector,
1655/// try to convert to a single insert with appropriate bitcasts.
1656static Instruction *foldTruncInsEltPair(InsertElementInst &InsElt,
1657 bool IsBigEndian,
1658 InstCombiner::BuilderTy &Builder) {
1659 Value *VecOp = InsElt.getOperand(i_nocapture: 0);
1660 Value *ScalarOp = InsElt.getOperand(i_nocapture: 1);
1661 Value *IndexOp = InsElt.getOperand(i_nocapture: 2);
1662
1663 // Pattern depends on endian because we expect lower index is inserted first.
1664 // Big endian:
1665 // inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1
1666 // Little endian:
1667 // inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1
1668 // Note: It is not safe to do this transform with an arbitrary base vector
1669 // because the bitcast of that vector to fewer/larger elements could
1670 // allow poison to spill into an element that was not poison before.
1671 // TODO: Detect smaller fractions of the scalar.
1672 // TODO: One-use checks are conservative.
1673 auto *VTy = dyn_cast<FixedVectorType>(Val: InsElt.getType());
1674 Value *Scalar0, *BaseVec;
1675 uint64_t Index0, Index1;
1676 if (!VTy || (VTy->getNumElements() & 1) ||
1677 !match(V: IndexOp, P: m_ConstantInt(V&: Index1)) ||
1678 !match(V: VecOp, P: m_InsertElt(Val: m_Value(V&: BaseVec), Elt: m_Value(V&: Scalar0),
1679 Idx: m_ConstantInt(V&: Index0))) ||
1680 !match(V: BaseVec, P: m_Undef()))
1681 return nullptr;
1682
1683 // The first insert must be to the index one less than this one, and
1684 // the first insert must be to an even index.
1685 if (Index0 + 1 != Index1 || Index0 & 1)
1686 return nullptr;
1687
1688 // For big endian, the high half of the value should be inserted first.
1689 // For little endian, the low half of the value should be inserted first.
1690 Value *X;
1691 uint64_t ShAmt;
1692 if (IsBigEndian) {
1693 if (!match(V: ScalarOp, P: m_Trunc(Op: m_Value(V&: X))) ||
1694 !match(V: Scalar0, P: m_Trunc(Op: m_LShr(L: m_Specific(V: X), R: m_ConstantInt(V&: ShAmt)))))
1695 return nullptr;
1696 } else {
1697 if (!match(V: Scalar0, P: m_Trunc(Op: m_Value(V&: X))) ||
1698 !match(V: ScalarOp, P: m_Trunc(Op: m_LShr(L: m_Specific(V: X), R: m_ConstantInt(V&: ShAmt)))))
1699 return nullptr;
1700 }
1701
1702 Type *SrcTy = X->getType();
1703 unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
1704 unsigned VecEltWidth = VTy->getScalarSizeInBits();
1705 if (ScalarWidth != VecEltWidth * 2 || ShAmt != VecEltWidth)
1706 return nullptr;
1707
1708 // Bitcast the base vector to a vector type with the source element type.
1709 Type *CastTy = FixedVectorType::get(ElementType: SrcTy, NumElts: VTy->getNumElements() / 2);
1710 Value *CastBaseVec = Builder.CreateBitCast(V: BaseVec, DestTy: CastTy);
1711
1712 // Scale the insert index for a vector with half as many elements.
1713 // bitcast (inselt (bitcast BaseVec), X, NewIndex)
1714 uint64_t NewIndex = IsBigEndian ? Index1 / 2 : Index0 / 2;
1715 Value *NewInsert = Builder.CreateInsertElement(Vec: CastBaseVec, NewElt: X, Idx: NewIndex);
1716 return new BitCastInst(NewInsert, VTy);
1717}
1718
1719Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
1720 Value *VecOp = IE.getOperand(i_nocapture: 0);
1721 Value *ScalarOp = IE.getOperand(i_nocapture: 1);
1722 Value *IdxOp = IE.getOperand(i_nocapture: 2);
1723
1724 if (auto *V = simplifyInsertElementInst(
1725 Vec: VecOp, Elt: ScalarOp, Idx: IdxOp, Q: SQ.getWithInstruction(I: &IE)))
1726 return replaceInstUsesWith(I&: IE, V);
1727
1728 // Canonicalize type of constant indices to i64 to simplify CSE
1729 if (auto *IndexC = dyn_cast<ConstantInt>(Val: IdxOp)) {
1730 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
1731 return replaceOperand(I&: IE, OpNum: 2, V: NewIdx);
1732
1733 Value *BaseVec, *OtherScalar;
1734 uint64_t OtherIndexVal;
1735 if (match(V: VecOp, P: m_OneUse(SubPattern: m_InsertElt(Val: m_Value(V&: BaseVec),
1736 Elt: m_Value(V&: OtherScalar),
1737 Idx: m_ConstantInt(V&: OtherIndexVal)))) &&
1738 !isa<Constant>(Val: OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {
1739 Value *NewIns = Builder.CreateInsertElement(Vec: BaseVec, NewElt: ScalarOp, Idx: IdxOp);
1740 return InsertElementInst::Create(Vec: NewIns, NewElt: OtherScalar,
1741 Idx: Builder.getInt64(C: OtherIndexVal));
1742 }
1743 }
1744
1745 // If the scalar is bitcast and inserted into undef, do the insert in the
1746 // source type followed by bitcast.
1747 // TODO: Generalize for insert into any constant, not just undef?
1748 Value *ScalarSrc;
1749 if (match(V: VecOp, P: m_Undef()) &&
1750 match(V: ScalarOp, P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: ScalarSrc)))) &&
1751 (ScalarSrc->getType()->isIntegerTy() ||
1752 ScalarSrc->getType()->isFloatingPointTy())) {
1753 // inselt undef, (bitcast ScalarSrc), IdxOp -->
1754 // bitcast (inselt undef, ScalarSrc, IdxOp)
1755 Type *ScalarTy = ScalarSrc->getType();
1756 Type *VecTy = VectorType::get(ElementType: ScalarTy, EC: IE.getType()->getElementCount());
1757 Constant *NewUndef = isa<PoisonValue>(Val: VecOp) ? PoisonValue::get(T: VecTy)
1758 : UndefValue::get(T: VecTy);
1759 Value *NewInsElt = Builder.CreateInsertElement(Vec: NewUndef, NewElt: ScalarSrc, Idx: IdxOp);
1760 return new BitCastInst(NewInsElt, IE.getType());
1761 }
1762
1763 // If the vector and scalar are both bitcast from the same element type, do
1764 // the insert in that source type followed by bitcast.
1765 Value *VecSrc;
1766 if (match(V: VecOp, P: m_BitCast(Op: m_Value(V&: VecSrc))) &&
1767 match(V: ScalarOp, P: m_BitCast(Op: m_Value(V&: ScalarSrc))) &&
1768 (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1769 VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1770 cast<VectorType>(Val: VecSrc->getType())->getElementType() ==
1771 ScalarSrc->getType()) {
1772 // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1773 // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1774 Value *NewInsElt = Builder.CreateInsertElement(Vec: VecSrc, NewElt: ScalarSrc, Idx: IdxOp);
1775 return new BitCastInst(NewInsElt, IE.getType());
1776 }
1777
1778 // If the inserted element was extracted from some other fixed-length vector
1779 // and both indexes are valid constants, try to turn this into a shuffle.
1780 // Can not handle scalable vector type, the number of elements needed to
1781 // create shuffle mask is not a compile-time constant.
1782 uint64_t InsertedIdx, ExtractedIdx;
1783 Value *ExtVecOp;
1784 if (isa<FixedVectorType>(Val: IE.getType()) &&
1785 match(V: IdxOp, P: m_ConstantInt(V&: InsertedIdx)) &&
1786 match(V: ScalarOp,
1787 P: m_ExtractElt(Val: m_Value(V&: ExtVecOp), Idx: m_ConstantInt(V&: ExtractedIdx))) &&
1788 isa<FixedVectorType>(Val: ExtVecOp->getType()) &&
1789 ExtractedIdx <
1790 cast<FixedVectorType>(Val: ExtVecOp->getType())->getNumElements()) {
1791 // TODO: Looking at the user(s) to determine if this insert is a
1792 // fold-to-shuffle opportunity does not match the usual instcombine
1793 // constraints. We should decide if the transform is worthy based only
1794 // on this instruction and its operands, but that may not work currently.
1795 //
1796 // Here, we are trying to avoid creating shuffles before reaching
1797 // the end of a chain of extract-insert pairs. This is complicated because
1798 // we do not generally form arbitrary shuffle masks in instcombine
1799 // (because those may codegen poorly), but collectShuffleElements() does
1800 // exactly that.
1801 //
1802 // The rules for determining what is an acceptable target-independent
1803 // shuffle mask are fuzzy because they evolve based on the backend's
1804 // capabilities and real-world impact.
1805 auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1806 if (!Insert.hasOneUse())
1807 return true;
1808 auto *InsertUser = dyn_cast<InsertElementInst>(Val: Insert.user_back());
1809 if (!InsertUser)
1810 return true;
1811 return false;
1812 };
1813
1814 // Try to form a shuffle from a chain of extract-insert ops.
1815 if (isShuffleRootCandidate(IE)) {
1816 bool Rerun = true;
1817 while (Rerun) {
1818 Rerun = false;
1819
1820 SmallVector<int, 16> Mask;
1821 ShuffleOps LR =
1822 collectShuffleElements(V: &IE, Mask, PermittedRHS: nullptr, IC&: *this, Rerun);
1823
1824 // The proposed shuffle may be trivial, in which case we shouldn't
1825 // perform the combine.
1826 if (LR.first != &IE && LR.second != &IE) {
1827 // We now have a shuffle of LHS, RHS, Mask.
1828 if (LR.second == nullptr)
1829 LR.second = PoisonValue::get(T: LR.first->getType());
1830 return new ShuffleVectorInst(LR.first, LR.second, Mask);
1831 }
1832 }
1833 }
1834 }
1835
1836 if (auto VecTy = dyn_cast<FixedVectorType>(Val: VecOp->getType())) {
1837 unsigned VWidth = VecTy->getNumElements();
1838 APInt PoisonElts(VWidth, 0);
1839 APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
1840 if (Value *V = SimplifyDemandedVectorElts(V: &IE, DemandedElts: AllOnesEltMask,
1841 PoisonElts)) {
1842 if (V != &IE)
1843 return replaceInstUsesWith(I&: IE, V);
1844 return &IE;
1845 }
1846 }
1847
1848 if (Instruction *Shuf = foldConstantInsEltIntoShuffle(InsElt&: IE))
1849 return Shuf;
1850
1851 if (Instruction *NewInsElt = hoistInsEltConst(InsElt2&: IE, Builder))
1852 return NewInsElt;
1853
1854 if (Instruction *Broadcast = foldInsSequenceIntoSplat(InsElt&: IE))
1855 return Broadcast;
1856
1857 if (Instruction *Splat = foldInsEltIntoSplat(InsElt&: IE))
1858 return Splat;
1859
1860 if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(InsElt&: IE))
1861 return IdentityShuf;
1862
1863 if (Instruction *Ext = narrowInsElt(InsElt&: IE, Builder))
1864 return Ext;
1865
1866 if (Instruction *Ext = foldTruncInsEltPair(InsElt&: IE, IsBigEndian: DL.isBigEndian(), Builder))
1867 return Ext;
1868
1869 return nullptr;
1870}
1871
1872/// Return true if we can evaluate the specified expression tree if the vector
1873/// elements were shuffled in a different order.
1874static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask,
1875 unsigned Depth = 5) {
1876 // We can always reorder the elements of a constant.
1877 if (isa<Constant>(Val: V))
1878 return true;
1879
1880 // We won't reorder vector arguments. No IPO here.
1881 Instruction *I = dyn_cast<Instruction>(Val: V);
1882 if (!I) return false;
1883
1884 // Two users may expect different orders of the elements. Don't try it.
1885 if (!I->hasOneUse())
1886 return false;
1887
1888 if (Depth == 0) return false;
1889
1890 switch (I->getOpcode()) {
1891 case Instruction::UDiv:
1892 case Instruction::SDiv:
1893 case Instruction::URem:
1894 case Instruction::SRem:
1895 // Propagating an undefined shuffle mask element to integer div/rem is not
1896 // allowed because those opcodes can create immediate undefined behavior
1897 // from an undefined element in an operand.
1898 if (llvm::is_contained(Range&: Mask, Element: -1))
1899 return false;
1900 [[fallthrough]];
1901 case Instruction::Add:
1902 case Instruction::FAdd:
1903 case Instruction::Sub:
1904 case Instruction::FSub:
1905 case Instruction::Mul:
1906 case Instruction::FMul:
1907 case Instruction::FDiv:
1908 case Instruction::FRem:
1909 case Instruction::Shl:
1910 case Instruction::LShr:
1911 case Instruction::AShr:
1912 case Instruction::And:
1913 case Instruction::Or:
1914 case Instruction::Xor:
1915 case Instruction::ICmp:
1916 case Instruction::FCmp:
1917 case Instruction::Trunc:
1918 case Instruction::ZExt:
1919 case Instruction::SExt:
1920 case Instruction::FPToUI:
1921 case Instruction::FPToSI:
1922 case Instruction::UIToFP:
1923 case Instruction::SIToFP:
1924 case Instruction::FPTrunc:
1925 case Instruction::FPExt:
1926 case Instruction::GetElementPtr: {
1927 // Bail out if we would create longer vector ops. We could allow creating
1928 // longer vector ops, but that may result in more expensive codegen.
1929 Type *ITy = I->getType();
1930 if (ITy->isVectorTy() &&
1931 Mask.size() > cast<FixedVectorType>(Val: ITy)->getNumElements())
1932 return false;
1933 for (Value *Operand : I->operands()) {
1934 if (!canEvaluateShuffled(V: Operand, Mask, Depth: Depth - 1))
1935 return false;
1936 }
1937 return true;
1938 }
1939 case Instruction::InsertElement: {
1940 ConstantInt *CI = dyn_cast<ConstantInt>(Val: I->getOperand(i: 2));
1941 if (!CI) return false;
1942 int ElementNumber = CI->getLimitedValue();
1943
1944 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1945 // can't put an element into multiple indices.
1946 bool SeenOnce = false;
1947 for (int I : Mask) {
1948 if (I == ElementNumber) {
1949 if (SeenOnce)
1950 return false;
1951 SeenOnce = true;
1952 }
1953 }
1954 return canEvaluateShuffled(V: I->getOperand(i: 0), Mask, Depth: Depth - 1);
1955 }
1956 }
1957 return false;
1958}
1959
1960/// Rebuild a new instruction just like 'I' but with the new operands given.
1961/// In the event of type mismatch, the type of the operands is correct.
1962static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps,
1963 IRBuilderBase &Builder) {
1964 Builder.SetInsertPoint(I);
1965 switch (I->getOpcode()) {
1966 case Instruction::Add:
1967 case Instruction::FAdd:
1968 case Instruction::Sub:
1969 case Instruction::FSub:
1970 case Instruction::Mul:
1971 case Instruction::FMul:
1972 case Instruction::UDiv:
1973 case Instruction::SDiv:
1974 case Instruction::FDiv:
1975 case Instruction::URem:
1976 case Instruction::SRem:
1977 case Instruction::FRem:
1978 case Instruction::Shl:
1979 case Instruction::LShr:
1980 case Instruction::AShr:
1981 case Instruction::And:
1982 case Instruction::Or:
1983 case Instruction::Xor: {
1984 BinaryOperator *BO = cast<BinaryOperator>(Val: I);
1985 assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1986 Value *New = Builder.CreateBinOp(Opc: cast<BinaryOperator>(Val: I)->getOpcode(),
1987 LHS: NewOps[0], RHS: NewOps[1]);
1988 if (auto *NewI = dyn_cast<Instruction>(Val: New)) {
1989 if (isa<OverflowingBinaryOperator>(Val: BO)) {
1990 NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1991 NewI->setHasNoSignedWrap(BO->hasNoSignedWrap());
1992 }
1993 if (isa<PossiblyExactOperator>(Val: BO)) {
1994 NewI->setIsExact(BO->isExact());
1995 }
1996 if (isa<FPMathOperator>(Val: BO))
1997 NewI->copyFastMathFlags(I);
1998 }
1999 return New;
2000 }
2001 case Instruction::ICmp:
2002 assert(NewOps.size() == 2 && "icmp with #ops != 2");
2003 return Builder.CreateICmp(P: cast<ICmpInst>(Val: I)->getPredicate(), LHS: NewOps[0],
2004 RHS: NewOps[1]);
2005 case Instruction::FCmp:
2006 assert(NewOps.size() == 2 && "fcmp with #ops != 2");
2007 return Builder.CreateFCmp(P: cast<FCmpInst>(Val: I)->getPredicate(), LHS: NewOps[0],
2008 RHS: NewOps[1]);
2009 case Instruction::Trunc:
2010 case Instruction::ZExt:
2011 case Instruction::SExt:
2012 case Instruction::FPToUI:
2013 case Instruction::FPToSI:
2014 case Instruction::UIToFP:
2015 case Instruction::SIToFP:
2016 case Instruction::FPTrunc:
2017 case Instruction::FPExt: {
2018 // It's possible that the mask has a different number of elements from
2019 // the original cast. We recompute the destination type to match the mask.
2020 Type *DestTy = VectorType::get(
2021 ElementType: I->getType()->getScalarType(),
2022 EC: cast<VectorType>(Val: NewOps[0]->getType())->getElementCount());
2023 assert(NewOps.size() == 1 && "cast with #ops != 1");
2024 return Builder.CreateCast(Op: cast<CastInst>(Val: I)->getOpcode(), V: NewOps[0],
2025 DestTy);
2026 }
2027 case Instruction::GetElementPtr: {
2028 Value *Ptr = NewOps[0];
2029 ArrayRef<Value*> Idx = NewOps.slice(N: 1);
2030 return Builder.CreateGEP(Ty: cast<GEPOperator>(Val: I)->getSourceElementType(),
2031 Ptr, IdxList: Idx, Name: "",
2032 NW: cast<GEPOperator>(Val: I)->getNoWrapFlags());
2033 }
2034 }
2035 llvm_unreachable("failed to rebuild vector instructions");
2036}
2037
2038static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask,
2039 IRBuilderBase &Builder) {
2040 // Mask.size() does not need to be equal to the number of vector elements.
2041
2042 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
2043 Type *EltTy = V->getType()->getScalarType();
2044
2045 if (isa<PoisonValue>(Val: V))
2046 return PoisonValue::get(T: FixedVectorType::get(ElementType: EltTy, NumElts: Mask.size()));
2047
2048 if (match(V, P: m_Undef()))
2049 return UndefValue::get(T: FixedVectorType::get(ElementType: EltTy, NumElts: Mask.size()));
2050
2051 if (isa<ConstantAggregateZero>(Val: V))
2052 return ConstantAggregateZero::get(Ty: FixedVectorType::get(ElementType: EltTy, NumElts: Mask.size()));
2053
2054 if (Constant *C = dyn_cast<Constant>(Val: V))
2055 return ConstantExpr::getShuffleVector(V1: C, V2: PoisonValue::get(T: C->getType()),
2056 Mask);
2057
2058 Instruction *I = cast<Instruction>(Val: V);
2059 switch (I->getOpcode()) {
2060 case Instruction::Add:
2061 case Instruction::FAdd:
2062 case Instruction::Sub:
2063 case Instruction::FSub:
2064 case Instruction::Mul:
2065 case Instruction::FMul:
2066 case Instruction::UDiv:
2067 case Instruction::SDiv:
2068 case Instruction::FDiv:
2069 case Instruction::URem:
2070 case Instruction::SRem:
2071 case Instruction::FRem:
2072 case Instruction::Shl:
2073 case Instruction::LShr:
2074 case Instruction::AShr:
2075 case Instruction::And:
2076 case Instruction::Or:
2077 case Instruction::Xor:
2078 case Instruction::ICmp:
2079 case Instruction::FCmp:
2080 case Instruction::Trunc:
2081 case Instruction::ZExt:
2082 case Instruction::SExt:
2083 case Instruction::FPToUI:
2084 case Instruction::FPToSI:
2085 case Instruction::UIToFP:
2086 case Instruction::SIToFP:
2087 case Instruction::FPTrunc:
2088 case Instruction::FPExt:
2089 case Instruction::Select:
2090 case Instruction::GetElementPtr: {
2091 SmallVector<Value*, 8> NewOps;
2092 bool NeedsRebuild =
2093 (Mask.size() !=
2094 cast<FixedVectorType>(Val: I->getType())->getNumElements());
2095 for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
2096 Value *V;
2097 // Recursively call evaluateInDifferentElementOrder on vector arguments
2098 // as well. E.g. GetElementPtr may have scalar operands even if the
2099 // return value is a vector, so we need to examine the operand type.
2100 if (I->getOperand(i)->getType()->isVectorTy())
2101 V = evaluateInDifferentElementOrder(V: I->getOperand(i), Mask, Builder);
2102 else
2103 V = I->getOperand(i);
2104 NewOps.push_back(Elt: V);
2105 NeedsRebuild |= (V != I->getOperand(i));
2106 }
2107 if (NeedsRebuild)
2108 return buildNew(I, NewOps, Builder);
2109 return I;
2110 }
2111 case Instruction::InsertElement: {
2112 int Element = cast<ConstantInt>(Val: I->getOperand(i: 2))->getLimitedValue();
2113
2114 // The insertelement was inserting at Element. Figure out which element
2115 // that becomes after shuffling. The answer is guaranteed to be unique
2116 // by CanEvaluateShuffled.
2117 bool Found = false;
2118 int Index = 0;
2119 for (int e = Mask.size(); Index != e; ++Index) {
2120 if (Mask[Index] == Element) {
2121 Found = true;
2122 break;
2123 }
2124 }
2125
2126 // If element is not in Mask, no need to handle the operand 1 (element to
2127 // be inserted). Just evaluate values in operand 0 according to Mask.
2128 if (!Found)
2129 return evaluateInDifferentElementOrder(V: I->getOperand(i: 0), Mask, Builder);
2130
2131 Value *V = evaluateInDifferentElementOrder(V: I->getOperand(i: 0), Mask,
2132 Builder);
2133 Builder.SetInsertPoint(I);
2134 return Builder.CreateInsertElement(Vec: V, NewElt: I->getOperand(i: 1), Idx: Index);
2135 }
2136 }
2137 llvm_unreachable("failed to reorder elements of vector instruction!");
2138}
2139
2140// Returns true if the shuffle is extracting a contiguous range of values from
2141// LHS, for example:
2142// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2143// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
2144// Shuffles to: |EE|FF|GG|HH|
2145// +--+--+--+--+
2146static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
2147 ArrayRef<int> Mask) {
2148 unsigned LHSElems =
2149 cast<FixedVectorType>(Val: SVI.getOperand(i_nocapture: 0)->getType())->getNumElements();
2150 unsigned MaskElems = Mask.size();
2151 unsigned BegIdx = Mask.front();
2152 unsigned EndIdx = Mask.back();
2153 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
2154 return false;
2155 for (unsigned I = 0; I != MaskElems; ++I)
2156 if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
2157 return false;
2158 return true;
2159}
2160
2161/// These are the ingredients in an alternate form binary operator as described
2162/// below.
2163struct BinopElts {
2164 BinaryOperator::BinaryOps Opcode;
2165 Value *Op0;
2166 Value *Op1;
2167 BinopElts(BinaryOperator::BinaryOps Opc = (BinaryOperator::BinaryOps)0,
2168 Value *V0 = nullptr, Value *V1 = nullptr) :
2169 Opcode(Opc), Op0(V0), Op1(V1) {}
2170 operator bool() const { return Opcode != 0; }
2171};
2172
2173/// Binops may be transformed into binops with different opcodes and operands.
2174/// Reverse the usual canonicalization to enable folds with the non-canonical
2175/// form of the binop. If a transform is possible, return the elements of the
2176/// new binop. If not, return invalid elements.
2177static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL) {
2178 Value *BO0 = BO->getOperand(i_nocapture: 0), *BO1 = BO->getOperand(i_nocapture: 1);
2179 Type *Ty = BO->getType();
2180 switch (BO->getOpcode()) {
2181 case Instruction::Shl: {
2182 // shl X, C --> mul X, (1 << C)
2183 Constant *C;
2184 if (match(V: BO1, P: m_ImmConstant(C))) {
2185 Constant *ShlOne = ConstantFoldBinaryOpOperands(
2186 Opcode: Instruction::Shl, LHS: ConstantInt::get(Ty, V: 1), RHS: C, DL);
2187 assert(ShlOne && "Constant folding of immediate constants failed");
2188 return {Instruction::Mul, BO0, ShlOne};
2189 }
2190 break;
2191 }
2192 case Instruction::Or: {
2193 // or disjoin X, C --> add X, C
2194 if (cast<PossiblyDisjointInst>(Val: BO)->isDisjoint())
2195 return {Instruction::Add, BO0, BO1};
2196 break;
2197 }
2198 case Instruction::Sub:
2199 // sub 0, X --> mul X, -1
2200 if (match(V: BO0, P: m_ZeroInt()))
2201 return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)};
2202 break;
2203 default:
2204 break;
2205 }
2206 return {};
2207}
2208
2209/// A select shuffle of a select shuffle with a shared operand can be reduced
2210/// to a single select shuffle. This is an obvious improvement in IR, and the
2211/// backend is expected to lower select shuffles efficiently.
2212static Instruction *foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf) {
2213 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2214
2215 Value *Op0 = Shuf.getOperand(i_nocapture: 0), *Op1 = Shuf.getOperand(i_nocapture: 1);
2216 SmallVector<int, 16> Mask;
2217 Shuf.getShuffleMask(Result&: Mask);
2218 unsigned NumElts = Mask.size();
2219
2220 // Canonicalize a select shuffle with common operand as Op1.
2221 auto *ShufOp = dyn_cast<ShuffleVectorInst>(Val: Op0);
2222 if (ShufOp && ShufOp->isSelect() &&
2223 (ShufOp->getOperand(i_nocapture: 0) == Op1 || ShufOp->getOperand(i_nocapture: 1) == Op1)) {
2224 std::swap(a&: Op0, b&: Op1);
2225 ShuffleVectorInst::commuteShuffleMask(Mask, InVecNumElts: NumElts);
2226 }
2227
2228 ShufOp = dyn_cast<ShuffleVectorInst>(Val: Op1);
2229 if (!ShufOp || !ShufOp->isSelect() ||
2230 (ShufOp->getOperand(i_nocapture: 0) != Op0 && ShufOp->getOperand(i_nocapture: 1) != Op0))
2231 return nullptr;
2232
2233 Value *X = ShufOp->getOperand(i_nocapture: 0), *Y = ShufOp->getOperand(i_nocapture: 1);
2234 SmallVector<int, 16> Mask1;
2235 ShufOp->getShuffleMask(Result&: Mask1);
2236 assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
2237
2238 // Canonicalize common operand (Op0) as X (first operand of first shuffle).
2239 if (Y == Op0) {
2240 std::swap(a&: X, b&: Y);
2241 ShuffleVectorInst::commuteShuffleMask(Mask: Mask1, InVecNumElts: NumElts);
2242 }
2243
2244 // If the mask chooses from X (operand 0), it stays the same.
2245 // If the mask chooses from the earlier shuffle, the other mask value is
2246 // transferred to the combined select shuffle:
2247 // shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2248 SmallVector<int, 16> NewMask(NumElts);
2249 for (unsigned i = 0; i != NumElts; ++i)
2250 NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];
2251
2252 // A select mask with undef elements might look like an identity mask.
2253 assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) ||
2254 ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) &&
2255 "Unexpected shuffle mask");
2256 return new ShuffleVectorInst(X, Y, NewMask);
2257}
2258
2259static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf,
2260 const SimplifyQuery &SQ) {
2261 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2262
2263 // Are we shuffling together some value and that same value after it has been
2264 // modified by a binop with a constant?
2265 Value *Op0 = Shuf.getOperand(i_nocapture: 0), *Op1 = Shuf.getOperand(i_nocapture: 1);
2266 Constant *C;
2267 bool Op0IsBinop;
2268 if (match(V: Op0, P: m_BinOp(L: m_Specific(V: Op1), R: m_Constant(C))))
2269 Op0IsBinop = true;
2270 else if (match(V: Op1, P: m_BinOp(L: m_Specific(V: Op0), R: m_Constant(C))))
2271 Op0IsBinop = false;
2272 else
2273 return nullptr;
2274
2275 // The identity constant for a binop leaves a variable operand unchanged. For
2276 // a vector, this is a splat of something like 0, -1, or 1.
2277 // If there's no identity constant for this binop, we're done.
2278 auto *BO = cast<BinaryOperator>(Val: Op0IsBinop ? Op0 : Op1);
2279 BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
2280 Constant *IdC = ConstantExpr::getBinOpIdentity(Opcode: BOpcode, Ty: Shuf.getType(), AllowRHSConstant: true);
2281 if (!IdC)
2282 return nullptr;
2283
2284 Value *X = Op0IsBinop ? Op1 : Op0;
2285
2286 // Prevent folding in the case the non-binop operand might have NaN values.
2287 // If X can have NaN elements then we have that the floating point math
2288 // operation in the transformed code may not preserve the exact NaN
2289 // bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`.
2290 // This makes the transformation incorrect since the original program would
2291 // have preserved the exact NaN bit-pattern.
2292 // Avoid the folding if X can have NaN elements.
2293 bool IsFloatingPointTy =
2294 Shuf.getType()->getElementType()->isFloatingPointTy();
2295 if (IsFloatingPointTy && !isKnownNeverNaN(V: X, SQ))
2296 return nullptr;
2297
2298 // Shuffle identity constants into the lanes that return the original value.
2299 // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
2300 // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
2301 // The existing binop constant vector remains in the same operand position.
2302 ArrayRef<int> Mask = Shuf.getShuffleMask();
2303 Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(V1: C, V2: IdC, Mask) :
2304 ConstantExpr::getShuffleVector(V1: IdC, V2: C, Mask);
2305
2306 bool MightCreatePoisonOrUB =
2307 is_contained(Range&: Mask, Element: PoisonMaskElem) &&
2308 (Instruction::isIntDivRem(Opcode: BOpcode) || Instruction::isShift(Opcode: BOpcode));
2309 if (MightCreatePoisonOrUB)
2310 NewC = InstCombiner::getSafeVectorConstantForBinop(Opcode: BOpcode, In: NewC, IsRHSConstant: true);
2311
2312 // shuf (bop X, C), X, M --> bop X, C'
2313 // shuf X, (bop X, C), M --> bop X, C'
2314 BinaryOperator *NewBO = BinaryOperator::Create(Op: BOpcode, S1: X, S2: NewC);
2315 NewBO->copyIRFlags(V: BO);
2316
2317 // Drop noinf FMF if X can be Inf. If X can have Inf elements and noinf FMF is
2318 // set, the transformation may generate poison where the original program
2319 // would preserve the Inf value.
2320 if (IsFloatingPointTy && NewBO->hasNoInfs() && !isKnownNeverInfinity(V: X, SQ))
2321 NewBO->setHasNoInfs(false);
2322
2323 // An undef shuffle mask element may propagate as an undef constant element in
2324 // the new binop. That would produce poison where the original code might not.
2325 // If we already made a safe constant, then there's no danger.
2326 if (is_contained(Range&: Mask, Element: PoisonMaskElem) && !MightCreatePoisonOrUB)
2327 NewBO->dropPoisonGeneratingFlags();
2328 return NewBO;
2329}
2330
2331/// If we have an insert of a scalar to a non-zero element of an undefined
2332/// vector and then shuffle that value, that's the same as inserting to the zero
2333/// element and shuffling. Splatting from the zero element is recognized as the
2334/// canonical form of splat.
2335static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
2336 InstCombiner::BuilderTy &Builder) {
2337 Value *Op0 = Shuf.getOperand(i_nocapture: 0), *Op1 = Shuf.getOperand(i_nocapture: 1);
2338 ArrayRef<int> Mask = Shuf.getShuffleMask();
2339 Value *X;
2340 uint64_t IndexC;
2341
2342 // Match a shuffle that is a splat to a non-zero element.
2343 if (!match(V: Op0, P: m_OneUse(SubPattern: m_InsertElt(Val: m_Poison(), Elt: m_Value(V&: X),
2344 Idx: m_ConstantInt(V&: IndexC)))) ||
2345 !match(V: Op1, P: m_Poison()) || match(Mask, P: m_ZeroMask()) || IndexC == 0)
2346 return nullptr;
2347
2348 // Insert into element 0 of a poison vector.
2349 PoisonValue *PoisonVec = PoisonValue::get(T: Shuf.getType());
2350 Value *NewIns = Builder.CreateInsertElement(Vec: PoisonVec, NewElt: X, Idx: (uint64_t)0);
2351
2352 // Splat from element 0. Any mask element that is poison remains poison.
2353 // For example:
2354 // shuf (inselt poison, X, 2), _, <2,2,undef>
2355 // --> shuf (inselt poison, X, 0), poison, <0,0,undef>
2356 unsigned NumMaskElts =
2357 cast<FixedVectorType>(Val: Shuf.getType())->getNumElements();
2358 SmallVector<int, 16> NewMask(NumMaskElts, 0);
2359 for (unsigned i = 0; i != NumMaskElts; ++i)
2360 if (Mask[i] == PoisonMaskElem)
2361 NewMask[i] = Mask[i];
2362
2363 return new ShuffleVectorInst(NewIns, NewMask);
2364}
2365
2366/// Try to fold shuffles that are the equivalent of a vector select.
2367Instruction *InstCombinerImpl::foldSelectShuffle(ShuffleVectorInst &Shuf) {
2368 if (!Shuf.isSelect())
2369 return nullptr;
2370
2371 // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2372 // Commuting undef to operand 0 conflicts with another canonicalization.
2373 unsigned NumElts = cast<FixedVectorType>(Val: Shuf.getType())->getNumElements();
2374 if (!match(V: Shuf.getOperand(i_nocapture: 1), P: m_Undef()) &&
2375 Shuf.getMaskValue(Elt: 0) >= (int)NumElts) {
2376 // TODO: Can we assert that both operands of a shuffle-select are not undef
2377 // (otherwise, it would have been folded by instsimplify?
2378 Shuf.commute();
2379 return &Shuf;
2380 }
2381
2382 if (Instruction *I = foldSelectShuffleOfSelectShuffle(Shuf))
2383 return I;
2384
2385 if (Instruction *I = foldSelectShuffleWith1Binop(
2386 Shuf, SQ: getSimplifyQuery().getWithInstruction(I: &Shuf)))
2387 return I;
2388
2389 BinaryOperator *B0, *B1;
2390 if (!match(V: Shuf.getOperand(i_nocapture: 0), P: m_BinOp(I&: B0)) ||
2391 !match(V: Shuf.getOperand(i_nocapture: 1), P: m_BinOp(I&: B1)))
2392 return nullptr;
2393
2394 // If one operand is "0 - X", allow that to be viewed as "X * -1"
2395 // (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
2396 // with a multiply, we will exit because C0/C1 will not be set.
2397 Value *X, *Y;
2398 Constant *C0 = nullptr, *C1 = nullptr;
2399 bool ConstantsAreOp1;
2400 if (match(V: B0, P: m_BinOp(L: m_Constant(C&: C0), R: m_Value(V&: X))) &&
2401 match(V: B1, P: m_BinOp(L: m_Constant(C&: C1), R: m_Value(V&: Y))))
2402 ConstantsAreOp1 = false;
2403 else if (match(V: B0, P: m_CombineOr(Ps: m_BinOp(L: m_Value(V&: X), R: m_Constant(C&: C0)),
2404 Ps: m_Neg(V: m_Value(V&: X)))) &&
2405 match(V: B1, P: m_CombineOr(Ps: m_BinOp(L: m_Value(V&: Y), R: m_Constant(C&: C1)),
2406 Ps: m_Neg(V: m_Value(V&: Y)))))
2407 ConstantsAreOp1 = true;
2408 else
2409 return nullptr;
2410
2411 // We need matching binops to fold the lanes together.
2412 BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
2413 BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
2414 bool DropNSW = false;
2415 if (ConstantsAreOp1 && Opc0 != Opc1) {
2416 // TODO: We drop "nsw" if shift is converted into multiply because it may
2417 // not be correct when the shift amount is BitWidth - 1. We could examine
2418 // each vector element to determine if it is safe to keep that flag.
2419 if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
2420 DropNSW = true;
2421 if (BinopElts AltB0 = getAlternateBinop(BO: B0, DL)) {
2422 assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
2423 Opc0 = AltB0.Opcode;
2424 C0 = cast<Constant>(Val: AltB0.Op1);
2425 } else if (BinopElts AltB1 = getAlternateBinop(BO: B1, DL)) {
2426 assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2427 Opc1 = AltB1.Opcode;
2428 C1 = cast<Constant>(Val: AltB1.Op1);
2429 }
2430 }
2431
2432 if (Opc0 != Opc1 || !C0 || !C1)
2433 return nullptr;
2434
2435 // The opcodes must be the same. Use a new name to make that clear.
2436 BinaryOperator::BinaryOps BOpc = Opc0;
2437
2438 // Select the constant elements needed for the single binop.
2439 ArrayRef<int> Mask = Shuf.getShuffleMask();
2440 Constant *NewC = ConstantExpr::getShuffleVector(V1: C0, V2: C1, Mask);
2441
2442 // We are moving a binop after a shuffle. When a shuffle has an undefined
2443 // mask element, the result is undefined, but it is not poison or undefined
2444 // behavior. That is not necessarily true for div/rem/shift.
2445 bool MightCreatePoisonOrUB =
2446 is_contained(Range&: Mask, Element: PoisonMaskElem) &&
2447 (Instruction::isIntDivRem(Opcode: BOpc) || Instruction::isShift(Opcode: BOpc));
2448 if (MightCreatePoisonOrUB)
2449 NewC = InstCombiner::getSafeVectorConstantForBinop(Opcode: BOpc, In: NewC,
2450 IsRHSConstant: ConstantsAreOp1);
2451
2452 Value *V;
2453 if (X == Y) {
2454 // Remove a binop and the shuffle by rearranging the constant:
2455 // shuffle (op V, C0), (op V, C1), M --> op V, C'
2456 // shuffle (op C0, V), (op C1, V), M --> op C', V
2457 V = X;
2458 } else {
2459 // If there are 2 different variable operands, we must create a new shuffle
2460 // (select) first, so check uses to ensure that we don't end up with more
2461 // instructions than we started with.
2462 if (!B0->hasOneUse() && !B1->hasOneUse())
2463 return nullptr;
2464
2465 // If we use the original shuffle mask and op1 is *variable*, we would be
2466 // putting an undef into operand 1 of div/rem/shift. This is either UB or
2467 // poison. We do not have to guard against UB when *constants* are op1
2468 // because safe constants guarantee that we do not overflow sdiv/srem (and
2469 // there's no danger for other opcodes).
2470 // TODO: To allow this case, create a new shuffle mask with no undefs.
2471 if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2472 return nullptr;
2473
2474 // Note: In general, we do not create new shuffles in InstCombine because we
2475 // do not know if a target can lower an arbitrary shuffle optimally. In this
2476 // case, the shuffle uses the existing mask, so there is no additional risk.
2477
2478 // Select the variable vectors first, then perform the binop:
2479 // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2480 // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2481 V = Builder.CreateShuffleVector(V1: X, V2: Y, Mask);
2482 }
2483
2484 Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(Opc: BOpc, LHS: V, RHS: NewC) :
2485 Builder.CreateBinOp(Opc: BOpc, LHS: NewC, RHS: V);
2486
2487 // Flags are intersected from the 2 source binops. But there are 2 exceptions:
2488 // 1. If we changed an opcode, poison conditions might have changed.
2489 // 2. If the shuffle had undef mask elements, the new binop might have undefs
2490 // where the original code did not. But if we already made a safe constant,
2491 // then there's no danger.
2492 if (auto *NewI = dyn_cast<Instruction>(Val: NewBO)) {
2493 NewI->copyIRFlags(V: B0);
2494 NewI->andIRFlags(V: B1);
2495 if (DropNSW)
2496 NewI->setHasNoSignedWrap(false);
2497 if (is_contained(Range&: Mask, Element: PoisonMaskElem) && !MightCreatePoisonOrUB)
2498 NewI->dropPoisonGeneratingFlags();
2499 }
2500 return replaceInstUsesWith(I&: Shuf, V: NewBO);
2501}
2502
2503/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2504/// Example (little endian):
2505/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2506static Instruction *foldTruncShuffle(ShuffleVectorInst &Shuf,
2507 bool IsBigEndian) {
2508 // This must be a bitcasted shuffle of 1 vector integer operand.
2509 Type *DestType = Shuf.getType();
2510 Value *X;
2511 if (!match(V: Shuf.getOperand(i_nocapture: 0), P: m_BitCast(Op: m_Value(V&: X))) ||
2512 !match(V: Shuf.getOperand(i_nocapture: 1), P: m_Poison()) || !DestType->isIntOrIntVectorTy())
2513 return nullptr;
2514
2515 // The source type must have the same number of elements as the shuffle,
2516 // and the source element type must be larger than the shuffle element type.
2517 Type *SrcType = X->getType();
2518 if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2519 cast<FixedVectorType>(Val: SrcType)->getNumElements() !=
2520 cast<FixedVectorType>(Val: DestType)->getNumElements() ||
2521 SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
2522 return nullptr;
2523
2524 assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2525 "Expected a shuffle that decreases length");
2526
2527 // Last, check that the mask chooses the correct low bits for each narrow
2528 // element in the result.
2529 uint64_t TruncRatio =
2530 SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2531 ArrayRef<int> Mask = Shuf.getShuffleMask();
2532 for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2533 if (Mask[i] == PoisonMaskElem)
2534 continue;
2535 uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2536 assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2537 if (Mask[i] != (int)LSBIndex)
2538 return nullptr;
2539 }
2540
2541 return new TruncInst(X, DestType);
2542}
2543
2544/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2545/// narrowing (concatenating with poison and extracting back to the original
2546/// length). This allows replacing the wide select with a narrow select.
2547static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
2548 InstCombiner::BuilderTy &Builder) {
2549 // This must be a narrowing identity shuffle. It extracts the 1st N elements
2550 // of the 1st vector operand of a shuffle.
2551 if (!match(V: Shuf.getOperand(i_nocapture: 1), P: m_Poison()) || !Shuf.isIdentityWithExtract())
2552 return nullptr;
2553
2554 // The vector being shuffled must be a vector select that we can eliminate.
2555 // TODO: The one-use requirement could be eased if X and/or Y are constants.
2556 Value *Cond, *X, *Y;
2557 if (!match(V: Shuf.getOperand(i_nocapture: 0),
2558 P: m_OneUse(SubPattern: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: X), R: m_Value(V&: Y)))))
2559 return nullptr;
2560
2561 // We need a narrow condition value. It must be extended with poison elements
2562 // and have the same number of elements as this shuffle.
2563 unsigned NarrowNumElts =
2564 cast<FixedVectorType>(Val: Shuf.getType())->getNumElements();
2565 Value *NarrowCond;
2566 if (!match(V: Cond, P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: NarrowCond), v2: m_Poison()))) ||
2567 cast<FixedVectorType>(Val: NarrowCond->getType())->getNumElements() !=
2568 NarrowNumElts ||
2569 !cast<ShuffleVectorInst>(Val: Cond)->isIdentityWithPadding())
2570 return nullptr;
2571
2572 // shuf (sel (shuf NarrowCond, poison, WideMask), X, Y), poison, NarrowMask)
2573 // -->
2574 // sel NarrowCond, (shuf X, poison, NarrowMask), (shuf Y, poison, NarrowMask)
2575 Value *NarrowX = Builder.CreateShuffleVector(V: X, Mask: Shuf.getShuffleMask());
2576 Value *NarrowY = Builder.CreateShuffleVector(V: Y, Mask: Shuf.getShuffleMask());
2577 return SelectInst::Create(C: NarrowCond, S1: NarrowX, S2: NarrowY);
2578}
2579
2580/// Canonicalize FP negate/abs after shuffle.
2581static Instruction *foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf,
2582 InstCombiner::BuilderTy &Builder) {
2583 auto *S0 = dyn_cast<Instruction>(Val: Shuf.getOperand(i_nocapture: 0));
2584 Value *X;
2585 if (!S0 || !match(V: S0, P: m_CombineOr(Ps: m_FNeg(X: m_Value(V&: X)), Ps: m_FAbs(Op0: m_Value(V&: X)))))
2586 return nullptr;
2587
2588 bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
2589
2590 // Match 2-input (binary) shuffle.
2591 auto *S1 = dyn_cast<Instruction>(Val: Shuf.getOperand(i_nocapture: 1));
2592 Value *Y;
2593 if (!S1 || !match(V: S1, P: m_CombineOr(Ps: m_FNeg(X: m_Value(V&: Y)), Ps: m_FAbs(Op0: m_Value(V&: Y)))) ||
2594 S0->getOpcode() != S1->getOpcode() ||
2595 (!S0->hasOneUse() && !S1->hasOneUse()))
2596 return nullptr;
2597
2598 // shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
2599 Value *NewShuf = Builder.CreateShuffleVector(V1: X, V2: Y, Mask: Shuf.getShuffleMask());
2600 Instruction *NewF;
2601 if (IsFNeg) {
2602 NewF = UnaryOperator::CreateFNeg(V: NewShuf);
2603 } else {
2604 Function *FAbs = Intrinsic::getOrInsertDeclaration(
2605 M: Shuf.getModule(), id: Intrinsic::fabs, OverloadTys: Shuf.getType());
2606 NewF = CallInst::Create(Func: FAbs, Args: {NewShuf});
2607 }
2608 NewF->copyIRFlags(V: S0);
2609 NewF->andIRFlags(V: S1);
2610 return NewF;
2611}
2612
2613/// Canonicalize casts after shuffle.
2614static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf,
2615 InstCombiner::BuilderTy &Builder) {
2616 auto *Cast0 = dyn_cast<CastInst>(Val: Shuf.getOperand(i_nocapture: 0));
2617 if (!Cast0)
2618 return nullptr;
2619
2620 // TODO: Allow other opcodes? That would require easing the type restrictions
2621 // below here.
2622 CastInst::CastOps CastOpcode = Cast0->getOpcode();
2623 switch (CastOpcode) {
2624 case Instruction::SExt:
2625 case Instruction::ZExt:
2626 case Instruction::FPToSI:
2627 case Instruction::FPToUI:
2628 case Instruction::SIToFP:
2629 case Instruction::UIToFP:
2630 break;
2631 default:
2632 return nullptr;
2633 }
2634
2635 VectorType *CastSrcTy = cast<VectorType>(Val: Cast0->getSrcTy());
2636 VectorType *ShufTy = Shuf.getType();
2637 VectorType *ShufOpTy = cast<VectorType>(Val: Shuf.getOperand(i_nocapture: 0)->getType());
2638
2639 // TODO: Allow length-increasing shuffles?
2640 if (ShufTy->getElementCount().getKnownMinValue() >
2641 ShufOpTy->getElementCount().getKnownMinValue())
2642 return nullptr;
2643
2644 // shuffle (cast X), Poison, identity-with-extract-mask -->
2645 // cast (shuffle X, Poison, identity-with-extract-mask).
2646 if (isa<PoisonValue>(Val: Shuf.getOperand(i_nocapture: 1)) && Cast0->hasOneUse() &&
2647 Shuf.isIdentityWithExtract()) {
2648 auto *NewIns = Builder.CreateShuffleVector(V1: Cast0->getOperand(i_nocapture: 0),
2649 V2: PoisonValue::get(T: CastSrcTy),
2650 Mask: Shuf.getShuffleMask());
2651 return CastInst::Create(Cast0->getOpcode(), S: NewIns, Ty: Shuf.getType());
2652 }
2653
2654 auto *Cast1 = dyn_cast<CastInst>(Val: Shuf.getOperand(i_nocapture: 1));
2655 // Do we have 2 matching cast operands?
2656 if (!Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2657 Cast0->getSrcTy() != Cast1->getSrcTy())
2658 return nullptr;
2659
2660 // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
2661 assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2662 "Expected fixed vector operands for casts and binary shuffle");
2663 if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2664 return nullptr;
2665
2666 // At least one of the operands must have only one use (the shuffle).
2667 if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2668 return nullptr;
2669
2670 // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2671 Value *X = Cast0->getOperand(i_nocapture: 0);
2672 Value *Y = Cast1->getOperand(i_nocapture: 0);
2673 Value *NewShuf = Builder.CreateShuffleVector(V1: X, V2: Y, Mask: Shuf.getShuffleMask());
2674 return CastInst::Create(CastOpcode, S: NewShuf, Ty: ShufTy);
2675}
2676
2677/// Try to fold an extract subvector operation.
2678static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
2679 Value *Op0 = Shuf.getOperand(i_nocapture: 0), *Op1 = Shuf.getOperand(i_nocapture: 1);
2680 if (!Shuf.isIdentityWithExtract() || !match(V: Op1, P: m_Poison()))
2681 return nullptr;
2682
2683 // Check if we are extracting all bits of an inserted scalar:
2684 // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2685 Value *X;
2686 if (match(V: Op0, P: m_BitCast(Op: m_InsertElt(Val: m_Value(), Elt: m_Value(V&: X), Idx: m_Zero()))) &&
2687 X->getType()->getPrimitiveSizeInBits() ==
2688 Shuf.getType()->getPrimitiveSizeInBits())
2689 return new BitCastInst(X, Shuf.getType());
2690
2691 // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2692 Value *Y;
2693 ArrayRef<int> Mask;
2694 if (!match(V: Op0, P: m_Shuffle(v1: m_Value(V&: X), v2: m_Value(V&: Y), mask: m_Mask(Mask))))
2695 return nullptr;
2696
2697 // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2698 // then combining may result in worse codegen.
2699 if (!Op0->hasOneUse())
2700 return nullptr;
2701
2702 // We are extracting a subvector from a shuffle. Remove excess elements from
2703 // the 1st shuffle mask to eliminate the extract.
2704 //
2705 // This transform is conservatively limited to identity extracts because we do
2706 // not allow arbitrary shuffle mask creation as a target-independent transform
2707 // (because we can't guarantee that will lower efficiently).
2708 //
2709 // If the extracting shuffle has an poison mask element, it transfers to the
2710 // new shuffle mask. Otherwise, copy the original mask element. Example:
2711 // shuf (shuf X, Y, <C0, C1, C2, poison, C4>), poison, <0, poison, 2, 3> -->
2712 // shuf X, Y, <C0, poison, C2, poison>
2713 unsigned NumElts = cast<FixedVectorType>(Val: Shuf.getType())->getNumElements();
2714 SmallVector<int, 16> NewMask(NumElts);
2715 assert(NumElts < Mask.size() &&
2716 "Identity with extract must have less elements than its inputs");
2717
2718 for (unsigned i = 0; i != NumElts; ++i) {
2719 int ExtractMaskElt = Shuf.getMaskValue(Elt: i);
2720 int MaskElt = Mask[i];
2721 NewMask[i] = ExtractMaskElt == PoisonMaskElem ? ExtractMaskElt : MaskElt;
2722 }
2723 return new ShuffleVectorInst(X, Y, NewMask);
2724}
2725
2726/// Try to replace a shuffle with an insertelement or try to replace a shuffle
2727/// operand with the operand of an insertelement.
2728static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
2729 InstCombinerImpl &IC) {
2730 Value *V0 = Shuf.getOperand(i_nocapture: 0), *V1 = Shuf.getOperand(i_nocapture: 1);
2731 SmallVector<int, 16> Mask;
2732 Shuf.getShuffleMask(Result&: Mask);
2733
2734 int NumElts = Mask.size();
2735 int InpNumElts = cast<FixedVectorType>(Val: V0->getType())->getNumElements();
2736
2737 // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2738 // not be able to handle it there if the insertelement has >1 use.
2739 // If the shuffle has an insertelement operand but does not choose the
2740 // inserted scalar element from that value, then we can replace that shuffle
2741 // operand with the source vector of the insertelement.
2742 Value *X;
2743 uint64_t IdxC;
2744 if (match(V: V0, P: m_InsertElt(Val: m_Value(V&: X), Elt: m_Value(), Idx: m_ConstantInt(V&: IdxC)))) {
2745 // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2746 if (!is_contained(Range&: Mask, Element: (int)IdxC))
2747 return IC.replaceOperand(I&: Shuf, OpNum: 0, V: X);
2748 }
2749 if (match(V: V1, P: m_InsertElt(Val: m_Value(V&: X), Elt: m_Value(), Idx: m_ConstantInt(V&: IdxC)))) {
2750 // Offset the index constant by the vector width because we are checking for
2751 // accesses to the 2nd vector input of the shuffle.
2752 IdxC += InpNumElts;
2753 // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2754 if (!is_contained(Range&: Mask, Element: (int)IdxC))
2755 return IC.replaceOperand(I&: Shuf, OpNum: 1, V: X);
2756 }
2757 // For the rest of the transform, the shuffle must not change vector sizes.
2758 // TODO: This restriction could be removed if the insert has only one use
2759 // (because the transform would require a new length-changing shuffle).
2760 if (NumElts != InpNumElts)
2761 return nullptr;
2762
2763 // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2764 auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
2765 // We need an insertelement with a constant index.
2766 if (!match(V: V0, P: m_InsertElt(Val: m_Value(), Elt: m_Value(V&: Scalar),
2767 Idx: m_ConstantInt(CI&: IndexC))))
2768 return false;
2769
2770 // Test the shuffle mask to see if it splices the inserted scalar into the
2771 // operand 1 vector of the shuffle.
2772 int NewInsIndex = -1;
2773 for (int i = 0; i != NumElts; ++i) {
2774 // Ignore undef mask elements.
2775 if (Mask[i] == -1)
2776 continue;
2777
2778 // The shuffle takes elements of operand 1 without lane changes.
2779 if (Mask[i] == NumElts + i)
2780 continue;
2781
2782 // The shuffle must choose the inserted scalar exactly once.
2783 if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2784 return false;
2785
2786 // The shuffle is placing the inserted scalar into element i.
2787 NewInsIndex = i;
2788 }
2789
2790 assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
2791
2792 // Index is updated to the potentially translated insertion lane.
2793 IndexC = ConstantInt::get(Ty: IndexC->getIntegerType(), V: NewInsIndex);
2794 return true;
2795 };
2796
2797 // If the shuffle is unnecessary, insert the scalar operand directly into
2798 // operand 1 of the shuffle. Example:
2799 // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2800 Value *Scalar;
2801 ConstantInt *IndexC;
2802 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2803 return InsertElementInst::Create(Vec: V1, NewElt: Scalar, Idx: IndexC);
2804
2805 // Try again after commuting shuffle. Example:
2806 // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2807 // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2808 std::swap(a&: V0, b&: V1);
2809 ShuffleVectorInst::commuteShuffleMask(Mask, InVecNumElts: NumElts);
2810 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2811 return InsertElementInst::Create(Vec: V1, NewElt: Scalar, Idx: IndexC);
2812
2813 return nullptr;
2814}
2815
2816static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
2817 // Match the operands as identity with padding (also known as concatenation
2818 // with undef) shuffles of the same source type. The backend is expected to
2819 // recreate these concatenations from a shuffle of narrow operands.
2820 auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Val: Shuf.getOperand(i_nocapture: 0));
2821 auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Val: Shuf.getOperand(i_nocapture: 1));
2822 if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2823 !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2824 return nullptr;
2825
2826 // We limit this transform to power-of-2 types because we expect that the
2827 // backend can convert the simplified IR patterns to identical nodes as the
2828 // original IR.
2829 // TODO: If we can verify the same behavior for arbitrary types, the
2830 // power-of-2 checks can be removed.
2831 Value *X = Shuffle0->getOperand(i_nocapture: 0);
2832 Value *Y = Shuffle1->getOperand(i_nocapture: 0);
2833 if (X->getType() != Y->getType() ||
2834 !isPowerOf2_32(Value: cast<FixedVectorType>(Val: Shuf.getType())->getNumElements()) ||
2835 !isPowerOf2_32(
2836 Value: cast<FixedVectorType>(Val: Shuffle0->getType())->getNumElements()) ||
2837 !isPowerOf2_32(Value: cast<FixedVectorType>(Val: X->getType())->getNumElements()) ||
2838 match(V: X, P: m_Undef()) || match(V: Y, P: m_Undef()))
2839 return nullptr;
2840 assert(match(Shuffle0->getOperand(1), m_Undef()) &&
2841 match(Shuffle1->getOperand(1), m_Undef()) &&
2842 "Unexpected operand for identity shuffle");
2843
2844 // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2845 // operands directly by adjusting the shuffle mask to account for the narrower
2846 // types:
2847 // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2848 int NarrowElts = cast<FixedVectorType>(Val: X->getType())->getNumElements();
2849 int WideElts = cast<FixedVectorType>(Val: Shuffle0->getType())->getNumElements();
2850 assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2851
2852 ArrayRef<int> Mask = Shuf.getShuffleMask();
2853 SmallVector<int, 16> NewMask(Mask.size(), -1);
2854 for (int i = 0, e = Mask.size(); i != e; ++i) {
2855 if (Mask[i] == -1)
2856 continue;
2857
2858 // If this shuffle is choosing an undef element from 1 of the sources, that
2859 // element is undef.
2860 if (Mask[i] < WideElts) {
2861 if (Shuffle0->getMaskValue(Elt: Mask[i]) == -1)
2862 continue;
2863 } else {
2864 if (Shuffle1->getMaskValue(Elt: Mask[i] - WideElts) == -1)
2865 continue;
2866 }
2867
2868 // If this shuffle is choosing from the 1st narrow op, the mask element is
2869 // the same. If this shuffle is choosing from the 2nd narrow op, the mask
2870 // element is offset down to adjust for the narrow vector widths.
2871 if (Mask[i] < WideElts) {
2872 assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2873 NewMask[i] = Mask[i];
2874 } else {
2875 assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2876 NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2877 }
2878 }
2879 return new ShuffleVectorInst(X, Y, NewMask);
2880}
2881
2882// Splatting the first element of the result of a BinOp, where any of the
2883// BinOp's operands are the result of a first element splat can be simplified to
2884// splatting the first element of the result of the BinOp
2885Instruction *InstCombinerImpl::simplifyBinOpSplats(ShuffleVectorInst &SVI) {
2886 if (!match(V: SVI.getOperand(i_nocapture: 1), P: m_Poison()) ||
2887 !match(Mask: SVI.getShuffleMask(), P: m_ZeroMask()) ||
2888 !SVI.getOperand(i_nocapture: 0)->hasOneUse())
2889 return nullptr;
2890
2891 Value *Op0 = SVI.getOperand(i_nocapture: 0);
2892 Value *X, *Y;
2893 if (!match(V: Op0, P: m_BinOp(L: m_Shuffle(v1: m_Value(V&: X), v2: m_Poison(), mask: m_ZeroMask()),
2894 R: m_Value(V&: Y))) &&
2895 !match(V: Op0, P: m_BinOp(L: m_Value(V&: X),
2896 R: m_Shuffle(v1: m_Value(V&: Y), v2: m_Poison(), mask: m_ZeroMask()))))
2897 return nullptr;
2898 if (X->getType() != Y->getType())
2899 return nullptr;
2900
2901 auto *BinOp = cast<BinaryOperator>(Val: Op0);
2902 if (!isSafeToSpeculativelyExecuteWithVariableReplaced(I: BinOp))
2903 return nullptr;
2904
2905 Value *NewBO = Builder.CreateBinOp(Opc: BinOp->getOpcode(), LHS: X, RHS: Y);
2906 if (auto NewBOI = dyn_cast<Instruction>(Val: NewBO))
2907 NewBOI->copyIRFlags(V: BinOp);
2908
2909 return new ShuffleVectorInst(NewBO, SVI.getShuffleMask());
2910}
2911
2912Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
2913 Value *LHS = SVI.getOperand(i_nocapture: 0);
2914 Value *RHS = SVI.getOperand(i_nocapture: 1);
2915 SimplifyQuery ShufQuery = SQ.getWithInstruction(I: &SVI);
2916 if (auto *V = simplifyShuffleVectorInst(Op0: LHS, Op1: RHS, Mask: SVI.getShuffleMask(),
2917 RetTy: SVI.getType(), Q: ShufQuery))
2918 return replaceInstUsesWith(I&: SVI, V);
2919
2920 if (Instruction *I = simplifyBinOpSplats(SVI))
2921 return I;
2922
2923 // Canonicalize splat shuffle to use poison RHS. Handle this explicitly in
2924 // order to support scalable vectors.
2925 if (match(Mask: SVI.getShuffleMask(), P: m_ZeroMask()) && !isa<PoisonValue>(Val: RHS))
2926 return replaceOperand(I&: SVI, OpNum: 1, V: PoisonValue::get(T: RHS->getType()));
2927
2928 if (isa<ScalableVectorType>(Val: LHS->getType()))
2929 return nullptr;
2930
2931 unsigned VWidth = cast<FixedVectorType>(Val: SVI.getType())->getNumElements();
2932 unsigned LHSWidth = cast<FixedVectorType>(Val: LHS->getType())->getNumElements();
2933
2934 // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2935 //
2936 // if X and Y are of the same (vector) type, and the element size is not
2937 // changed by the bitcasts, we can distribute the bitcasts through the
2938 // shuffle, hopefully reducing the number of instructions. We make sure that
2939 // at least one bitcast only has one use, so we don't *increase* the number of
2940 // instructions here.
2941 Value *X, *Y;
2942 if (match(V: LHS, P: m_BitCast(Op: m_Value(V&: X))) && match(V: RHS, P: m_BitCast(Op: m_Value(V&: Y))) &&
2943 X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2944 X->getType()->getScalarSizeInBits() ==
2945 SVI.getType()->getScalarSizeInBits() &&
2946 (LHS->hasOneUse() || RHS->hasOneUse())) {
2947 Value *V = Builder.CreateShuffleVector(V1: X, V2: Y, Mask: SVI.getShuffleMask(),
2948 Name: SVI.getName() + ".uncasted");
2949 return new BitCastInst(V, SVI.getType());
2950 }
2951
2952 ArrayRef<int> Mask = SVI.getShuffleMask();
2953
2954 // Peek through a bitcasted shuffle operand by scaling the mask. If the
2955 // simulated shuffle can simplify, then this shuffle is unnecessary:
2956 // shuf (bitcast X), undef, Mask --> bitcast X'
2957 // TODO: This could be extended to allow length-changing shuffles.
2958 // The transform might also be obsoleted if we allowed canonicalization
2959 // of bitcasted shuffles.
2960 if (match(V: LHS, P: m_BitCast(Op: m_Value(V&: X))) && match(V: RHS, P: m_Undef()) &&
2961 X->getType()->isVectorTy() && VWidth == LHSWidth) {
2962 // Try to create a scaled mask constant.
2963 auto *XType = cast<FixedVectorType>(Val: X->getType());
2964 unsigned XNumElts = XType->getNumElements();
2965 SmallVector<int, 16> ScaledMask;
2966 if (scaleShuffleMaskElts(NumDstElts: XNumElts, Mask, ScaledMask)) {
2967 // If the shuffled source vector simplifies, cast that value to this
2968 // shuffle's type.
2969 if (auto *V = simplifyShuffleVectorInst(Op0: X, Op1: UndefValue::get(T: XType),
2970 Mask: ScaledMask, RetTy: XType, Q: ShufQuery))
2971 return BitCastInst::Create(Instruction::BitCast, S: V, Ty: SVI.getType());
2972 }
2973 }
2974
2975 // shuffle x, x, mask --> shuffle x, undef, mask'
2976 if (LHS == RHS) {
2977 assert(!match(RHS, m_Undef()) &&
2978 "Shuffle with 2 undef ops not simplified?");
2979 return new ShuffleVectorInst(LHS, createUnaryMask(Mask, NumElts: LHSWidth));
2980 }
2981
2982 // shuffle undef, x, mask --> shuffle x, undef, mask'
2983 if (match(V: LHS, P: m_Undef())) {
2984 SVI.commute();
2985 return &SVI;
2986 }
2987
2988 if (Instruction *I = canonicalizeInsertSplat(Shuf&: SVI, Builder))
2989 return I;
2990
2991 if (Instruction *I = foldSelectShuffle(Shuf&: SVI))
2992 return I;
2993
2994 if (Instruction *I = foldTruncShuffle(Shuf&: SVI, IsBigEndian: DL.isBigEndian()))
2995 return I;
2996
2997 if (Instruction *I = narrowVectorSelect(Shuf&: SVI, Builder))
2998 return I;
2999
3000 if (Instruction *I = foldShuffleOfUnaryOps(Shuf&: SVI, Builder))
3001 return I;
3002
3003 if (Instruction *I = foldCastShuffle(Shuf&: SVI, Builder))
3004 return I;
3005
3006 APInt PoisonElts(VWidth, 0);
3007 APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
3008 if (Value *V = SimplifyDemandedVectorElts(V: &SVI, DemandedElts: AllOnesEltMask, PoisonElts)) {
3009 if (V != &SVI)
3010 return replaceInstUsesWith(I&: SVI, V);
3011 return &SVI;
3012 }
3013
3014 if (Instruction *I = foldIdentityExtractShuffle(Shuf&: SVI))
3015 return I;
3016
3017 // These transforms have the potential to lose undef knowledge, so they are
3018 // intentionally placed after SimplifyDemandedVectorElts().
3019 if (Instruction *I = foldShuffleWithInsert(Shuf&: SVI, IC&: *this))
3020 return I;
3021 if (Instruction *I = foldIdentityPaddedShuffles(Shuf&: SVI))
3022 return I;
3023
3024 if (match(V: RHS, P: m_Constant())) {
3025 if (auto *SI = dyn_cast<SelectInst>(Val: LHS)) {
3026 // We cannot do this fold for elementwise select since ShuffleVector is
3027 // not elementwise.
3028 if (SI->getCondition()->getType()->isIntegerTy() &&
3029 (isa<PoisonValue>(Val: RHS) ||
3030 isGuaranteedNotToBePoison(V: SI->getCondition()))) {
3031 if (Instruction *I = FoldOpIntoSelect(Op&: SVI, SI))
3032 return I;
3033 }
3034 }
3035 if (auto *PN = dyn_cast<PHINode>(Val: LHS)) {
3036 if (Instruction *I = foldOpIntoPhi(I&: SVI, PN, /*AllowMultipleUses=*/true))
3037 return I;
3038 }
3039 }
3040
3041 if (match(V: RHS, P: m_Poison()) && canEvaluateShuffled(V: LHS, Mask)) {
3042 Value *V = evaluateInDifferentElementOrder(V: LHS, Mask, Builder);
3043 return replaceInstUsesWith(I&: SVI, V);
3044 }
3045
3046 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
3047 // a non-vector type. We can instead bitcast the original vector followed by
3048 // an extract of the desired element:
3049 //
3050 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
3051 // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3052 // %1 = bitcast <4 x i8> %sroa to i32
3053 // Becomes:
3054 // %bc = bitcast <16 x i8> %in to <4 x i32>
3055 // %ext = extractelement <4 x i32> %bc, i32 0
3056 //
3057 // If the shuffle is extracting a contiguous range of values from the input
3058 // vector then each use which is a bitcast of the extracted size can be
3059 // replaced. This will work if the vector types are compatible, and the begin
3060 // index is aligned to a value in the casted vector type. If the begin index
3061 // isn't aligned then we can shuffle the original vector (keeping the same
3062 // vector type) before extracting.
3063 //
3064 // This code will bail out if the target type is fundamentally incompatible
3065 // with vectors of the source type.
3066 //
3067 // Example of <16 x i8>, target type i32:
3068 // Index range [4,8): v-----------v Will work.
3069 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
3070 // <16 x i8>: | | | | | | | | | | | | | | | | |
3071 // <4 x i32>: | | | | |
3072 // +-----------+-----------+-----------+-----------+
3073 // Index range [6,10): ^-----------^ Needs an extra shuffle.
3074 // Target type i40: ^--------------^ Won't work, bail.
3075 bool MadeChange = false;
3076 if (isShuffleExtractingFromLHS(SVI, Mask)) {
3077 Value *V = LHS;
3078 unsigned MaskElems = Mask.size();
3079 auto *SrcTy = cast<FixedVectorType>(Val: V->getType());
3080 unsigned VecBitWidth = DL.getTypeSizeInBits(Ty: SrcTy);
3081 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(Ty: SrcTy->getElementType());
3082 assert(SrcElemBitWidth && "vector elements must have a bitwidth");
3083 unsigned SrcNumElems = SrcTy->getNumElements();
3084 SmallVector<BitCastInst *, 8> BCs;
3085 DenseMap<Type *, Value *> NewBCs;
3086 for (User *U : SVI.users())
3087 if (BitCastInst *BC = dyn_cast<BitCastInst>(Val: U)) {
3088 // Only visit bitcasts that weren't previously handled.
3089 if (BC->use_empty())
3090 continue;
3091 // Prefer to combine bitcasts of bitcasts before attempting this fold.
3092 if (BC->hasOneUse()) {
3093 auto *BC2 = dyn_cast<BitCastInst>(Val: BC->user_back());
3094 if (BC2 && isEliminableCastPair(CI1: BC, CI2: BC2))
3095 continue;
3096 }
3097 BCs.push_back(Elt: BC);
3098 }
3099 for (BitCastInst *BC : BCs) {
3100 unsigned BegIdx = Mask.front();
3101 Type *TgtTy = BC->getDestTy();
3102 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(Ty: TgtTy);
3103 if (!TgtElemBitWidth)
3104 continue;
3105 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
3106 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
3107 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
3108 if (!VecBitWidthsEqual)
3109 continue;
3110 if (!VectorType::isValidElementType(ElemTy: TgtTy))
3111 continue;
3112 auto *CastSrcTy = FixedVectorType::get(ElementType: TgtTy, NumElts: TgtNumElems);
3113 if (!BegIsAligned) {
3114 // Shuffle the input so [0,NumElements) contains the output, and
3115 // [NumElems,SrcNumElems) is undef.
3116 SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
3117 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
3118 ShuffleMask[I] = Idx;
3119 V = Builder.CreateShuffleVector(V, Mask: ShuffleMask,
3120 Name: SVI.getName() + ".extract");
3121 BegIdx = 0;
3122 }
3123 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
3124 assert(SrcElemsPerTgtElem);
3125 BegIdx /= SrcElemsPerTgtElem;
3126 auto [It, Inserted] = NewBCs.try_emplace(Key: CastSrcTy);
3127 if (Inserted)
3128 It->second = Builder.CreateBitCast(V, DestTy: CastSrcTy, Name: SVI.getName() + ".bc");
3129 auto *Ext = Builder.CreateExtractElement(Vec: It->second, Idx: BegIdx,
3130 Name: SVI.getName() + ".extract");
3131 // The shufflevector isn't being replaced: the bitcast that used it
3132 // is. InstCombine will visit the newly-created instructions.
3133 replaceInstUsesWith(I&: *BC, V: Ext);
3134 MadeChange = true;
3135 }
3136 }
3137
3138 // If the LHS is a shufflevector itself, see if we can combine it with this
3139 // one without producing an unusual shuffle.
3140 // Cases that might be simplified:
3141 // 1.
3142 // x1=shuffle(v1,v2,mask1)
3143 // x=shuffle(x1,undef,mask)
3144 // ==>
3145 // x=shuffle(v1,undef,newMask)
3146 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
3147 // 2.
3148 // x1=shuffle(v1,undef,mask1)
3149 // x=shuffle(x1,x2,mask)
3150 // where v1.size() == mask1.size()
3151 // ==>
3152 // x=shuffle(v1,x2,newMask)
3153 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
3154 // 3.
3155 // x2=shuffle(v2,undef,mask2)
3156 // x=shuffle(x1,x2,mask)
3157 // where v2.size() == mask2.size()
3158 // ==>
3159 // x=shuffle(x1,v2,newMask)
3160 // newMask[i] = (mask[i] < x1.size())
3161 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
3162 // 4.
3163 // x1=shuffle(v1,undef,mask1)
3164 // x2=shuffle(v2,undef,mask2)
3165 // x=shuffle(x1,x2,mask)
3166 // where v1.size() == v2.size()
3167 // ==>
3168 // x=shuffle(v1,v2,newMask)
3169 // newMask[i] = (mask[i] < x1.size())
3170 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
3171 //
3172 // Here we are really conservative:
3173 // we are absolutely afraid of producing a shuffle mask not in the input
3174 // program, because the code gen may not be smart enough to turn a merged
3175 // shuffle into two specific shuffles: it may produce worse code. As such,
3176 // we only merge two shuffles if the result is either a splat or one of the
3177 // input shuffle masks. In this case, merging the shuffles just removes
3178 // one instruction, which we know is safe. This is good for things like
3179 // turning: (splat(splat)) -> splat, or
3180 // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
3181 ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(Val: LHS);
3182 ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(Val: RHS);
3183 if (LHSShuffle)
3184 if (!match(V: LHSShuffle->getOperand(i_nocapture: 1), P: m_Poison()) &&
3185 !match(V: RHS, P: m_Poison()))
3186 LHSShuffle = nullptr;
3187 if (RHSShuffle)
3188 if (!match(V: RHSShuffle->getOperand(i_nocapture: 1), P: m_Poison()))
3189 RHSShuffle = nullptr;
3190 if (!LHSShuffle && !RHSShuffle)
3191 return MadeChange ? &SVI : nullptr;
3192
3193 Value* LHSOp0 = nullptr;
3194 Value* LHSOp1 = nullptr;
3195 Value* RHSOp0 = nullptr;
3196 unsigned LHSOp0Width = 0;
3197 unsigned RHSOp0Width = 0;
3198 if (LHSShuffle) {
3199 LHSOp0 = LHSShuffle->getOperand(i_nocapture: 0);
3200 LHSOp1 = LHSShuffle->getOperand(i_nocapture: 1);
3201 LHSOp0Width = cast<FixedVectorType>(Val: LHSOp0->getType())->getNumElements();
3202 }
3203 if (RHSShuffle) {
3204 RHSOp0 = RHSShuffle->getOperand(i_nocapture: 0);
3205 RHSOp0Width = cast<FixedVectorType>(Val: RHSOp0->getType())->getNumElements();
3206 }
3207 Value* newLHS = LHS;
3208 Value* newRHS = RHS;
3209 if (LHSShuffle) {
3210 // case 1
3211 if (match(V: RHS, P: m_Poison())) {
3212 newLHS = LHSOp0;
3213 newRHS = LHSOp1;
3214 }
3215 // case 2 or 4
3216 else if (LHSOp0Width == LHSWidth) {
3217 newLHS = LHSOp0;
3218 }
3219 }
3220 // case 3 or 4
3221 if (RHSShuffle && RHSOp0Width == LHSWidth) {
3222 newRHS = RHSOp0;
3223 }
3224 // case 4
3225 if (LHSOp0 == RHSOp0) {
3226 newLHS = LHSOp0;
3227 newRHS = nullptr;
3228 }
3229
3230 if (newLHS == LHS && newRHS == RHS)
3231 return MadeChange ? &SVI : nullptr;
3232
3233 ArrayRef<int> LHSMask;
3234 ArrayRef<int> RHSMask;
3235 if (newLHS != LHS)
3236 LHSMask = LHSShuffle->getShuffleMask();
3237 if (RHSShuffle && newRHS != RHS)
3238 RHSMask = RHSShuffle->getShuffleMask();
3239
3240 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
3241 SmallVector<int, 16> newMask;
3242 bool isSplat = true;
3243 int SplatElt = -1;
3244 // Create a new mask for the new ShuffleVectorInst so that the new
3245 // ShuffleVectorInst is equivalent to the original one.
3246 for (unsigned i = 0; i < VWidth; ++i) {
3247 int eltMask;
3248 if (Mask[i] < 0) {
3249 // This element is a poison value.
3250 eltMask = -1;
3251 } else if (Mask[i] < (int)LHSWidth) {
3252 // This element is from left hand side vector operand.
3253 //
3254 // If LHS is going to be replaced (case 1, 2, or 4), calculate the
3255 // new mask value for the element.
3256 if (newLHS != LHS) {
3257 eltMask = LHSMask[Mask[i]];
3258 // If the value selected is an poison value, explicitly specify it
3259 // with a -1 mask value.
3260 if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(Val: LHSOp1))
3261 eltMask = -1;
3262 } else
3263 eltMask = Mask[i];
3264 } else {
3265 // This element is from right hand side vector operand
3266 //
3267 // If the value selected is a poison value, explicitly specify it
3268 // with a -1 mask value. (case 1)
3269 if (match(V: RHS, P: m_Poison()))
3270 eltMask = -1;
3271 // If RHS is going to be replaced (case 3 or 4), calculate the
3272 // new mask value for the element.
3273 else if (newRHS != RHS) {
3274 eltMask = RHSMask[Mask[i]-LHSWidth];
3275 // If the value selected is an poison value, explicitly specify it
3276 // with a -1 mask value.
3277 if (eltMask >= (int)RHSOp0Width) {
3278 assert(match(RHSShuffle->getOperand(1), m_Poison()) &&
3279 "should have been check above");
3280 eltMask = -1;
3281 }
3282 } else
3283 eltMask = Mask[i]-LHSWidth;
3284
3285 // If LHS's width is changed, shift the mask value accordingly.
3286 // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
3287 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
3288 // If newRHS == newLHS, we want to remap any references from newRHS to
3289 // newLHS so that we can properly identify splats that may occur due to
3290 // obfuscation across the two vectors.
3291 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
3292 eltMask += newLHSWidth;
3293 }
3294
3295 // Check if this could still be a splat.
3296 if (eltMask >= 0) {
3297 if (SplatElt >= 0 && SplatElt != eltMask)
3298 isSplat = false;
3299 SplatElt = eltMask;
3300 }
3301
3302 newMask.push_back(Elt: eltMask);
3303 }
3304
3305 // If the result mask is equal to one of the original shuffle masks,
3306 // or is a splat, do the replacement.
3307 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
3308 if (!newRHS)
3309 newRHS = PoisonValue::get(T: newLHS->getType());
3310 return new ShuffleVectorInst(newLHS, newRHS, newMask);
3311 }
3312
3313 return MadeChange ? &SVI : nullptr;
3314}
3315
3316/// Given the following de-interleaving shufflevectors and the consuming zexts:
3317/// ```
3318/// %f0 = shufflevector <8 x i32> %v, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
3319/// %f1 = shufflevector <8 x i32> %v, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
3320/// %z0 = zext <4 x i32> %f0 to <4 x i64>
3321/// %z1 = zext <4 x i32> %f1 to <4 x i64>
3322/// ```
3323/// We can actually bitcast the input value, `%v` first before replacing zexts
3324/// with simple arithmetics on this new bitcast:
3325/// ```
3326/// %bc = bitcast <8 x i32> %v to <4 x i64>
3327// %z0 = and <4 x i64> %bc, splat (i64 4294967295)
3328// %z1 = lshr <4 x i64> %bc, splat (i64 32)
3329/// ```
3330/// This transformation is almost always benefitial as shufflevector is more
3331/// expensive than normal arithmetics.
3332Instruction *
3333InstCombinerImpl::foldExtractionOfVectorDeinterleave(ZExtInst &RootZExt) {
3334 // This pattern involves bitcast that is not compatible with big endian.
3335 if (DL.isBigEndian())
3336 return nullptr;
3337
3338 // The actual value that got de-interleaved.
3339 Value *DIV;
3340
3341 using namespace PatternMatch;
3342 Instruction *SVI = nullptr, *DI = nullptr;
3343 if (!match(
3344 V: &RootZExt,
3345 P: m_ZExt(Op: m_CombineOr(
3346 Ps: m_ExtractValue(V: m_Instruction(I&: DI, P: m_Deinterleave2(Op: m_Value(V&: DIV)))),
3347 Ps: m_Instruction(I&: SVI, P: m_Shuffle(v1: m_Value(), v2: m_Value()))))))
3348 return nullptr;
3349
3350 auto isDeinterleaveShuffle =
3351 [](Instruction *I) -> std::pair<Value *, unsigned> {
3352 Value *V;
3353 ArrayRef<int> ShuffleMask;
3354 unsigned Index;
3355 if (match(V: I, P: m_Shuffle(v1: m_Value(V), v2: m_Undef(), mask: m_Mask(ShuffleMask))) &&
3356 isa<FixedVectorType>(Val: V->getType())) {
3357 unsigned NumInputElements =
3358 cast<VectorType>(Val: V->getType())->getElementCount().getFixedValue();
3359 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask: ShuffleMask, Factor: 2,
3360 Index) &&
3361 Index < 2 &&
3362 ShuffleVectorInst::isSingleSourceMask(Mask: ShuffleMask,
3363 NumSrcElts: NumInputElements) &&
3364 ShuffleMask.size() * 2 == NumInputElements)
3365 return {V, Index};
3366 }
3367 return {nullptr, UINT_MAX};
3368 };
3369
3370 // Validate either the shufflevector or the vector.deinterleave2 and obtain
3371 // the value they're de-interleaving.
3372 if (SVI) {
3373 // We will find other shufflevectors later.
3374 DIV = isDeinterleaveShuffle(SVI).first;
3375 if (!DIV)
3376 return nullptr;
3377 } else {
3378 // We should already capture the value that got de-interleaved (i.e. DIV).
3379 assert(DI && DIV);
3380 if (!all_of(Range: DI->users(), P: [](User *Usr) -> bool {
3381 auto *EV = dyn_cast<ExtractValueInst>(Val: Usr);
3382 return EV && EV->getNumIndices() == 1;
3383 }))
3384 return nullptr;
3385 }
3386
3387 auto *InputVecTy = dyn_cast<VectorType>(Val: DIV->getType());
3388 if (!InputVecTy)
3389 return nullptr;
3390 auto *InElementTy = dyn_cast<IntegerType>(Val: InputVecTy->getElementType());
3391 if (!InElementTy)
3392 return nullptr;
3393 if (!InputVecTy->getElementCount().isKnownEven())
3394 return nullptr;
3395
3396 // {Field instruction, Field index}
3397 SmallVector<std::pair<Instruction *, unsigned>, 4> Fields;
3398 if (SVI) {
3399 for (auto *Usr : DIV->users()) {
3400 auto *FieldI = dyn_cast<Instruction>(Val: Usr);
3401 if (!FieldI)
3402 continue;
3403 auto [V, Index] = isDeinterleaveShuffle(FieldI);
3404 if (V != DIV)
3405 continue;
3406 assert(Index < 2);
3407 // Find the earliest field extraction instruction.
3408 if (FieldI->getParent() != SVI->getParent())
3409 continue;
3410 if (FieldI != SVI && FieldI->comesBefore(Other: SVI))
3411 SVI = FieldI;
3412 Fields.push_back(Elt: {FieldI, Index});
3413 }
3414 } else {
3415 // llvm.vector.deinterleave2.
3416 for (User *Field : DI->users()) {
3417 auto *FieldI = cast<ExtractValueInst>(Val: Field);
3418 unsigned FieldIdx = *FieldI->idx_begin();
3419 assert(FieldIdx < 2);
3420 Fields.push_back(Elt: {FieldI, FieldIdx});
3421 }
3422 }
3423
3424 // {field to be replaced, field index}
3425 SmallVector<std::pair<ZExtInst *, unsigned>, 4> FieldReplacements;
3426 // We commit the transformation only if all the field users can be replaced,
3427 // otherwise the primary de-interleaving construction, regardless of
3428 // llvm.vector.deinterleave2 or shufflevectors, will still be there.
3429 for (auto [Field, FieldIdx] : Fields) {
3430 for (User *FieldUsr : Field->users()) {
3431 auto *ZExt = dyn_cast<ZExtInst>(Val: FieldUsr);
3432 if (!ZExt)
3433 return nullptr;
3434 // Only if it's doubling the element size.
3435 if (ZExt->getDestTy() != ZExt->getSrcTy()->getExtendedType())
3436 return nullptr;
3437 FieldReplacements.push_back(Elt: {ZExt, FieldIdx});
3438 }
3439 }
3440
3441 // This will insert replacement instructions before all the fields users.
3442 Builder.SetInsertPoint(DI ? DI : SVI);
3443
3444 // Double the element size but half the vector length.
3445 auto *BitcastedTy = VectorType::getExtendedElementVectorType(VTy: InputVecTy);
3446 BitcastedTy = VectorType::getHalfElementsVectorType(VTy: BitcastedTy);
3447 // Since we're going to "merge" lanes via bitcast, we need to freeze any
3448 // potential poison lanes first.
3449 Value *Freeze = Builder.CreateFreeze(V: DIV);
3450 Value *Bitcast = Builder.CreateBitCast(V: Freeze, DestTy: BitcastedTy);
3451 unsigned InElementBitWidth = InElementTy->getBitWidth();
3452 auto Mask = APInt::getLowBitsSet(numBits: InElementBitWidth * 2, loBitsSet: InElementBitWidth);
3453 Value *NewField0 = Builder.CreateAnd(LHS: Bitcast, RHS: Mask);
3454 Value *NewField1 = Builder.CreateLShr(LHS: Bitcast, RHS: InElementBitWidth);
3455
3456 for (auto [I, Idx] : FieldReplacements) {
3457 assert(Idx < 2 && "unsupported field index");
3458 replaceInstUsesWith(I&: *I, V: Idx ? NewField1 : NewField0);
3459 // Make sure the old ZExt are in the worklist so that they
3460 // can be removed in the following iterations.
3461 addToWorklist(I);
3462 }
3463
3464 return &RootZExt;
3465}
3466