1//===- InstCombineVectorOps.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements instcombine for ExtractElement, InsertElement and
10// ShuffleVector.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombineInternal.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SmallBitVector.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/Analysis/InstructionSimplify.h"
23#include "llvm/Analysis/VectorUtils.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constant.h"
26#include "llvm/IR/Constants.h"
27#include "llvm/IR/DerivedTypes.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
30#include "llvm/IR/Instructions.h"
31#include "llvm/IR/Operator.h"
32#include "llvm/IR/PatternMatch.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/User.h"
35#include "llvm/IR/Value.h"
36#include "llvm/Support/Casting.h"
37#include "llvm/Support/ErrorHandling.h"
38#include "llvm/Transforms/InstCombine/InstCombiner.h"
39#include <cassert>
40#include <cstdint>
41#include <iterator>
42#include <utility>
43
44#define DEBUG_TYPE "instcombine"
45
46using namespace llvm;
47using namespace PatternMatch;
48
49STATISTIC(NumAggregateReconstructionsSimplified,
50 "Number of aggregate reconstructions turned into reuse of the "
51 "original aggregate");
52
53/// Return true if the value is cheaper to scalarize than it is to leave as a
54/// vector operation. If the extract index \p EI is a constant integer then
55/// some operations may be cheap to scalarize.
56///
57/// FIXME: It's possible to create more instructions than previously existed.
58static bool cheapToScalarize(Value *V, Value *EI) {
59 ConstantInt *CEI = dyn_cast<ConstantInt>(Val: EI);
60
61 // If we can pick a scalar constant value out of a vector, that is free.
62 if (auto *C = dyn_cast<Constant>(Val: V))
63 return CEI || C->getSplatValue();
64
65 if (CEI && match(V, P: m_Intrinsic<Intrinsic::stepvector>())) {
66 ElementCount EC = cast<VectorType>(Val: V->getType())->getElementCount();
67 // Index needs to be lower than the minimum size of the vector, because
68 // for scalable vector, the vector size is known at run time.
69 return CEI->getValue().ult(RHS: EC.getKnownMinValue());
70 }
71
72 // An insertelement to the same constant index as our extract will simplify
73 // to the scalar inserted element. An insertelement to a different constant
74 // index is irrelevant to our extract.
75 if (match(V, P: m_InsertElt(Val: m_Value(), Elt: m_Value(), Idx: m_ConstantInt())))
76 return CEI;
77
78 if (match(V, P: m_OneUse(SubPattern: m_Load(Op: m_Value()))))
79 return true;
80
81 if (match(V, P: m_OneUse(SubPattern: m_UnOp())))
82 return true;
83
84 Value *V0, *V1;
85 if (match(V, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: V0), R: m_Value(V&: V1)))))
86 if (cheapToScalarize(V: V0, EI) || cheapToScalarize(V: V1, EI))
87 return true;
88
89 CmpPredicate UnusedPred;
90 if (match(V, P: m_OneUse(SubPattern: m_Cmp(Pred&: UnusedPred, L: m_Value(V&: V0), R: m_Value(V&: V1)))))
91 if (cheapToScalarize(V: V0, EI) || cheapToScalarize(V: V1, EI))
92 return true;
93
94 return false;
95}
96
97// If we have a PHI node with a vector type that is only used to feed
98// itself and be an operand of extractelement at a constant location,
99// try to replace the PHI of the vector type with a PHI of a scalar type.
100Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
101 PHINode *PN) {
102 SmallVector<Instruction *, 2> Extracts;
103 // The users we want the PHI to have are:
104 // 1) The EI ExtractElement (we already know this)
105 // 2) Possibly more ExtractElements with the same index.
106 // 3) Another operand, which will feed back into the PHI.
107 Instruction *PHIUser = nullptr;
108 for (auto *U : PN->users()) {
109 if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(Val: U)) {
110 if (EI.getIndexOperand() == EU->getIndexOperand())
111 Extracts.push_back(Elt: EU);
112 else
113 return nullptr;
114 } else if (!PHIUser) {
115 PHIUser = cast<Instruction>(Val: U);
116 } else {
117 return nullptr;
118 }
119 }
120
121 if (!PHIUser)
122 return nullptr;
123
124 // Verify that this PHI user has one use, which is the PHI itself,
125 // and that it is a binary operation which is cheap to scalarize.
126 // otherwise return nullptr.
127 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
128 !(isa<BinaryOperator>(Val: PHIUser)) ||
129 !cheapToScalarize(V: PHIUser, EI: EI.getIndexOperand()))
130 return nullptr;
131
132 // Create a scalar PHI node that will replace the vector PHI node
133 // just before the current PHI node.
134 PHINode *scalarPHI = cast<PHINode>(Val: InsertNewInstWith(
135 New: PHINode::Create(Ty: EI.getType(), NumReservedValues: PN->getNumIncomingValues(), NameStr: ""), Old: PN->getIterator()));
136 // Scalarize each PHI operand.
137 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
138 Value *PHIInVal = PN->getIncomingValue(i);
139 BasicBlock *inBB = PN->getIncomingBlock(i);
140 Value *Elt = EI.getIndexOperand();
141 // If the operand is the PHI induction variable:
142 if (PHIInVal == PHIUser) {
143 // Scalarize the binary operation. One operand is the
144 // scalar PHI, and the other is extracted from the other
145 // vector operand.
146 BinaryOperator *B0 = cast<BinaryOperator>(Val: PHIUser);
147 unsigned opId = (B0->getOperand(i_nocapture: 0) == PN) ? 1 : 0;
148 Value *Op = InsertNewInstWith(
149 New: ExtractElementInst::Create(Vec: B0->getOperand(i_nocapture: opId), Idx: Elt,
150 NameStr: B0->getOperand(i_nocapture: opId)->getName() + ".Elt"),
151 Old: B0->getIterator());
152 // Preserve operand order for binary operation to preserve semantics of
153 // non-commutative operations.
154 Value *FirstOp = (B0->getOperand(i_nocapture: 0) == PN) ? scalarPHI : Op;
155 Value *SecondOp = (B0->getOperand(i_nocapture: 0) == PN) ? Op : scalarPHI;
156 Value *newPHIUser =
157 InsertNewInstWith(New: BinaryOperator::CreateWithCopiedFlags(
158 Opc: B0->getOpcode(), V1: FirstOp, V2: SecondOp, CopyO: B0),
159 Old: B0->getIterator());
160 scalarPHI->addIncoming(V: newPHIUser, BB: inBB);
161 } else {
162 // Scalarize PHI input:
163 Instruction *newEI = ExtractElementInst::Create(Vec: PHIInVal, Idx: Elt, NameStr: "");
164 // Insert the new instruction into the predecessor basic block.
165 Instruction *pos = dyn_cast<Instruction>(Val: PHIInVal);
166 BasicBlock::iterator InsertPos;
167 if (pos && !isa<PHINode>(Val: pos)) {
168 InsertPos = ++pos->getIterator();
169 } else {
170 InsertPos = inBB->getFirstInsertionPt();
171 }
172
173 InsertNewInstWith(New: newEI, Old: InsertPos);
174
175 scalarPHI->addIncoming(V: newEI, BB: inBB);
176 }
177 }
178
179 for (auto *E : Extracts) {
180 replaceInstUsesWith(I&: *E, V: scalarPHI);
181 // Add old extract to worklist for DCE.
182 addToWorklist(I: E);
183 }
184
185 return &EI;
186}
187
188Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
189 Value *X;
190 uint64_t ExtIndexC;
191 if (!match(V: Ext.getVectorOperand(), P: m_BitCast(Op: m_Value(V&: X))) ||
192 !match(V: Ext.getIndexOperand(), P: m_ConstantInt(V&: ExtIndexC)))
193 return nullptr;
194
195 ElementCount NumElts =
196 cast<VectorType>(Val: Ext.getVectorOperandType())->getElementCount();
197 Type *DestTy = Ext.getType();
198 unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
199 bool IsBigEndian = DL.isBigEndian();
200
201 // If we are casting an integer to vector and extracting a portion, that is
202 // a shift-right and truncate.
203 if (X->getType()->isIntegerTy()) {
204 assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
205 "Expected fixed vector type for bitcast from scalar integer");
206
207 // Big endian requires adjusting the extract index since MSB is at index 0.
208 // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
209 // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
210 if (IsBigEndian)
211 ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
212 unsigned ShiftAmountC = ExtIndexC * DestWidth;
213 if ((!ShiftAmountC ||
214 isDesirableIntType(BitWidth: X->getType()->getPrimitiveSizeInBits())) &&
215 Ext.getVectorOperand()->hasOneUse()) {
216 if (ShiftAmountC)
217 X = Builder.CreateLShr(LHS: X, RHS: ShiftAmountC, Name: "extelt.offset");
218 if (DestTy->isFloatingPointTy()) {
219 Type *DstIntTy = IntegerType::getIntNTy(C&: X->getContext(), N: DestWidth);
220 Value *Trunc = Builder.CreateTrunc(V: X, DestTy: DstIntTy);
221 return new BitCastInst(Trunc, DestTy);
222 }
223 return new TruncInst(X, DestTy);
224 }
225 }
226
227 if (!X->getType()->isVectorTy())
228 return nullptr;
229
230 // If this extractelement is using a bitcast from a vector of the same number
231 // of elements, see if we can find the source element from the source vector:
232 // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
233 auto *SrcTy = cast<VectorType>(Val: X->getType());
234 ElementCount NumSrcElts = SrcTy->getElementCount();
235 if (NumSrcElts == NumElts)
236 if (Value *Elt = findScalarElement(V: X, EltNo: ExtIndexC))
237 return new BitCastInst(Elt, DestTy);
238
239 assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
240 "Src and Dst must be the same sort of vector type");
241
242 // If the source elements are wider than the destination, try to shift and
243 // truncate a subset of scalar bits of an insert op.
244 if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
245 Value *Scalar;
246 Value *Vec;
247 uint64_t InsIndexC;
248 if (!match(V: X, P: m_InsertElt(Val: m_Value(V&: Vec), Elt: m_Value(V&: Scalar),
249 Idx: m_ConstantInt(V&: InsIndexC))))
250 return nullptr;
251
252 // The extract must be from the subset of vector elements that we inserted
253 // into. Example: if we inserted element 1 of a <2 x i64> and we are
254 // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
255 // of elements 4-7 of the bitcasted vector.
256 unsigned NarrowingRatio =
257 NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
258
259 if (ExtIndexC / NarrowingRatio != InsIndexC) {
260 // Remove insertelement, if we don't use the inserted element.
261 // extractelement (bitcast (insertelement (Vec, b)), a) ->
262 // extractelement (bitcast (Vec), a)
263 // FIXME: this should be removed to SimplifyDemandedVectorElts,
264 // once scale vectors are supported.
265 if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
266 Value *NewBC = Builder.CreateBitCast(V: Vec, DestTy: Ext.getVectorOperandType());
267 return ExtractElementInst::Create(Vec: NewBC, Idx: Ext.getIndexOperand());
268 }
269 return nullptr;
270 }
271
272 // We are extracting part of the original scalar. How that scalar is
273 // inserted into the vector depends on the endian-ness. Example:
274 // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
275 // +--+--+--+--+--+--+--+--+
276 // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
277 // extelt <4 x i16> V', 3: | |S2|S3|
278 // +--+--+--+--+--+--+--+--+
279 // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
280 // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
281 // In this example, we must right-shift little-endian. Big-endian is just a
282 // truncate.
283 unsigned Chunk = ExtIndexC % NarrowingRatio;
284 if (IsBigEndian)
285 Chunk = NarrowingRatio - 1 - Chunk;
286
287 // Bail out if this is an FP vector to FP vector sequence. That would take
288 // more instructions than we started with unless there is no shift, and it
289 // may not be handled as well in the backend.
290 bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
291 bool NeedDestBitcast = DestTy->isFloatingPointTy();
292 if (NeedSrcBitcast && NeedDestBitcast)
293 return nullptr;
294
295 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
296 unsigned ShAmt = Chunk * DestWidth;
297
298 // TODO: This limitation is more strict than necessary. We could sum the
299 // number of new instructions and subtract the number eliminated to know if
300 // we can proceed.
301 if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
302 if (NeedSrcBitcast || NeedDestBitcast)
303 return nullptr;
304
305 if (NeedSrcBitcast) {
306 Type *SrcIntTy = IntegerType::getIntNTy(C&: Scalar->getContext(), N: SrcWidth);
307 Scalar = Builder.CreateBitCast(V: Scalar, DestTy: SrcIntTy);
308 }
309
310 if (ShAmt) {
311 // Bail out if we could end with more instructions than we started with.
312 if (!Ext.getVectorOperand()->hasOneUse())
313 return nullptr;
314 Scalar = Builder.CreateLShr(LHS: Scalar, RHS: ShAmt);
315 }
316
317 if (NeedDestBitcast) {
318 Type *DestIntTy = IntegerType::getIntNTy(C&: Scalar->getContext(), N: DestWidth);
319 return new BitCastInst(Builder.CreateTrunc(V: Scalar, DestTy: DestIntTy), DestTy);
320 }
321 return new TruncInst(Scalar, DestTy);
322 }
323
324 return nullptr;
325}
326
327/// Find elements of V demanded by UserInstr. If returns false, we were not able
328/// to determine all elements.
329static bool findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr,
330 APInt &UnionUsedElts) {
331 unsigned VWidth = cast<FixedVectorType>(Val: V->getType())->getNumElements();
332
333 switch (UserInstr->getOpcode()) {
334 case Instruction::ExtractElement: {
335 ExtractElementInst *EEI = cast<ExtractElementInst>(Val: UserInstr);
336 assert(EEI->getVectorOperand() == V);
337 ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(Val: EEI->getIndexOperand());
338 if (EEIIndexC && EEIIndexC->getValue().ult(RHS: VWidth)) {
339 UnionUsedElts.setBit(EEIIndexC->getZExtValue());
340 return true;
341 }
342 break;
343 }
344 case Instruction::ShuffleVector: {
345 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(Val: UserInstr);
346 unsigned MaskNumElts =
347 cast<FixedVectorType>(Val: UserInstr->getType())->getNumElements();
348
349 for (auto I : llvm::seq(Size: MaskNumElts)) {
350 unsigned MaskVal = Shuffle->getMaskValue(Elt: I);
351 if (MaskVal == -1u || MaskVal >= 2 * VWidth)
352 continue;
353 if (Shuffle->getOperand(i_nocapture: 0) == V && (MaskVal < VWidth))
354 UnionUsedElts.setBit(MaskVal);
355 if (Shuffle->getOperand(i_nocapture: 1) == V &&
356 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
357 UnionUsedElts.setBit(MaskVal - VWidth);
358 }
359 return true;
360 }
361 default:
362 break;
363 }
364
365 return false;
366}
367
368/// Find union of elements of V demanded by all its users.
369/// If it is known by querying findDemandedEltsBySingleUser that
370/// no user demands an element of V, then the corresponding bit
371/// remains unset in the returned value.
372static APInt findDemandedEltsByAllUsers(Value *V) {
373 unsigned VWidth = cast<FixedVectorType>(Val: V->getType())->getNumElements();
374
375 APInt UnionUsedElts(VWidth, 0);
376 for (const Use &U : V->uses()) {
377 if (Instruction *I = dyn_cast<Instruction>(Val: U.getUser())) {
378 if (!findDemandedEltsBySingleUser(V, UserInstr: I, UnionUsedElts))
379 return APInt::getAllOnes(numBits: VWidth);
380 } else {
381 UnionUsedElts = APInt::getAllOnes(numBits: VWidth);
382 break;
383 }
384
385 if (UnionUsedElts.isAllOnes())
386 break;
387 }
388
389 return UnionUsedElts;
390}
391
392/// Given a constant index for a extractelement or insertelement instruction,
393/// return it with the canonical type if it isn't already canonical. We
394/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
395/// matter, we just want a consistent type to simplify CSE.
396static ConstantInt *getPreferredVectorIndex(ConstantInt *IndexC) {
397 const unsigned IndexBW = IndexC->getBitWidth();
398 if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
399 return nullptr;
400 return ConstantInt::get(Context&: IndexC->getContext(),
401 V: IndexC->getValue().zextOrTrunc(width: 64));
402}
403
404Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
405 Value *SrcVec = EI.getVectorOperand();
406 Value *Index = EI.getIndexOperand();
407 if (Value *V = simplifyExtractElementInst(Vec: SrcVec, Idx: Index,
408 Q: SQ.getWithInstruction(I: &EI)))
409 return replaceInstUsesWith(I&: EI, V);
410
411 // extractelt (select %x, %vec1, %vec2), %const ->
412 // select %x, %vec1[%const], %vec2[%const]
413 // TODO: Support constant folding of multiple select operands:
414 // extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)
415 // If the extractelement will for instance try to do out of bounds accesses
416 // because of the values of %c1 and/or %c2, the sequence could be optimized
417 // early. This is currently not possible because constant folding will reach
418 // an unreachable assertion if it doesn't find a constant operand.
419 if (SelectInst *SI = dyn_cast<SelectInst>(Val: EI.getVectorOperand()))
420 if (SI->getCondition()->getType()->isIntegerTy() &&
421 isa<Constant>(Val: EI.getIndexOperand()))
422 if (Instruction *R = FoldOpIntoSelect(Op&: EI, SI))
423 return R;
424
425 // If extracting a specified index from the vector, see if we can recursively
426 // find a previously computed scalar that was inserted into the vector.
427 auto *IndexC = dyn_cast<ConstantInt>(Val: Index);
428 bool HasKnownValidIndex = false;
429 if (IndexC) {
430 // Canonicalize type of constant indices to i64 to simplify CSE
431 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
432 return replaceOperand(I&: EI, OpNum: 1, V: NewIdx);
433
434 ElementCount EC = EI.getVectorOperandType()->getElementCount();
435 unsigned NumElts = EC.getKnownMinValue();
436 HasKnownValidIndex = IndexC->getValue().ult(RHS: NumElts);
437
438 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: SrcVec)) {
439 Intrinsic::ID IID = II->getIntrinsicID();
440 // Index needs to be lower than the minimum size of the vector, because
441 // for scalable vector, the vector size is known at run time.
442 if (IID == Intrinsic::stepvector && IndexC->getValue().ult(RHS: NumElts)) {
443 Type *Ty = EI.getType();
444 unsigned BitWidth = Ty->getIntegerBitWidth();
445 Value *Idx;
446 // Return index when its value does not exceed the allowed limit
447 // for the element type of the vector.
448 // TODO: Truncate out-of-range values.
449 if (IndexC->getValue().getActiveBits() <= BitWidth)
450 Idx = ConstantInt::get(Ty, V: IndexC->getValue().zextOrTrunc(width: BitWidth));
451 else
452 return nullptr;
453 return replaceInstUsesWith(I&: EI, V: Idx);
454 }
455 }
456
457 // InstSimplify should handle cases where the index is invalid.
458 // For fixed-length vector, it's invalid to extract out-of-range element.
459 if (!EC.isScalable() && IndexC->getValue().uge(RHS: NumElts))
460 return nullptr;
461
462 if (Instruction *I = foldBitcastExtElt(Ext&: EI))
463 return I;
464
465 // If there's a vector PHI feeding a scalar use through this extractelement
466 // instruction, try to scalarize the PHI.
467 if (auto *Phi = dyn_cast<PHINode>(Val: SrcVec))
468 if (Instruction *ScalarPHI = scalarizePHI(EI, PN: Phi))
469 return ScalarPHI;
470 }
471
472 // If SrcVec is a subvector starting at index 0, extract from the
473 // wider source vector
474 Value *V;
475 if (match(V: SrcVec,
476 P: m_Intrinsic<Intrinsic::vector_extract>(Op0: m_Value(V), Op1: m_Zero())))
477 return ExtractElementInst::Create(Vec: V, Idx: Index);
478
479 // TODO come up with a n-ary matcher that subsumes both unary and
480 // binary matchers.
481 UnaryOperator *UO;
482 if (match(V: SrcVec, P: m_UnOp(I&: UO)) && cheapToScalarize(V: SrcVec, EI: Index)) {
483 // extelt (unop X), Index --> unop (extelt X, Index)
484 Value *X = UO->getOperand(i_nocapture: 0);
485 Value *E = Builder.CreateExtractElement(Vec: X, Idx: Index);
486 return UnaryOperator::CreateWithCopiedFlags(Opc: UO->getOpcode(), V: E, CopyO: UO);
487 }
488
489 // If the binop is not speculatable, we cannot hoist the extractelement if
490 // it may make the operand poison.
491 BinaryOperator *BO;
492 if (match(V: SrcVec, P: m_BinOp(I&: BO)) && cheapToScalarize(V: SrcVec, EI: Index) &&
493 (HasKnownValidIndex ||
494 isSafeToSpeculativelyExecuteWithVariableReplaced(I: BO))) {
495 // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
496 Value *X = BO->getOperand(i_nocapture: 0), *Y = BO->getOperand(i_nocapture: 1);
497 Value *E0 = Builder.CreateExtractElement(Vec: X, Idx: Index);
498 Value *E1 = Builder.CreateExtractElement(Vec: Y, Idx: Index);
499 return BinaryOperator::CreateWithCopiedFlags(Opc: BO->getOpcode(), V1: E0, V2: E1, CopyO: BO);
500 }
501
502 Value *X, *Y;
503 CmpPredicate Pred;
504 if (match(V: SrcVec, P: m_Cmp(Pred, L: m_Value(V&: X), R: m_Value(V&: Y))) &&
505 cheapToScalarize(V: SrcVec, EI: Index)) {
506 // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
507 Value *E0 = Builder.CreateExtractElement(Vec: X, Idx: Index);
508 Value *E1 = Builder.CreateExtractElement(Vec: Y, Idx: Index);
509 CmpInst *SrcCmpInst = cast<CmpInst>(Val: SrcVec);
510 return CmpInst::CreateWithCopiedFlags(Op: SrcCmpInst->getOpcode(), Pred, S1: E0, S2: E1,
511 FlagsSource: SrcCmpInst);
512 }
513
514 if (auto *I = dyn_cast<Instruction>(Val: SrcVec)) {
515 if (auto *IE = dyn_cast<InsertElementInst>(Val: I)) {
516 // instsimplify already handled the case where the indices are constants
517 // and equal by value, if both are constants, they must not be the same
518 // value, extract from the pre-inserted value instead.
519 if (isa<Constant>(Val: IE->getOperand(i_nocapture: 2)) && IndexC)
520 return replaceOperand(I&: EI, OpNum: 0, V: IE->getOperand(i_nocapture: 0));
521 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: I)) {
522 auto *VecType = cast<VectorType>(Val: GEP->getType());
523 ElementCount EC = VecType->getElementCount();
524 uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
525 if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
526 // Find out why we have a vector result - these are a few examples:
527 // 1. We have a scalar pointer and a vector of indices, or
528 // 2. We have a vector of pointers and a scalar index, or
529 // 3. We have a vector of pointers and a vector of indices, etc.
530 // Here we only consider combining when there is exactly one vector
531 // operand, since the optimization is less obviously a win due to
532 // needing more than one extractelements.
533
534 unsigned VectorOps =
535 llvm::count_if(Range: GEP->operands(), P: [](const Value *V) {
536 return isa<VectorType>(Val: V->getType());
537 });
538 if (VectorOps == 1) {
539 Value *NewPtr = GEP->getPointerOperand();
540 if (isa<VectorType>(Val: NewPtr->getType()))
541 NewPtr = Builder.CreateExtractElement(Vec: NewPtr, Idx: IndexC);
542
543 SmallVector<Value *> NewOps;
544 for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
545 Value *Op = GEP->getOperand(i_nocapture: I);
546 if (isa<VectorType>(Val: Op->getType()))
547 NewOps.push_back(Elt: Builder.CreateExtractElement(Vec: Op, Idx: IndexC));
548 else
549 NewOps.push_back(Elt: Op);
550 }
551
552 GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
553 PointeeType: GEP->getSourceElementType(), Ptr: NewPtr, IdxList: NewOps);
554 NewGEP->setNoWrapFlags(GEP->getNoWrapFlags());
555 return NewGEP;
556 }
557 }
558 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(Val: I)) {
559 int SplatIndex = getSplatIndex(Mask: SVI->getShuffleMask());
560 // We know the all-0 splat must be reading from the first operand, even
561 // in the case of scalable vectors (vscale is always > 0).
562 if (SplatIndex == 0)
563 return ExtractElementInst::Create(Vec: SVI->getOperand(i_nocapture: 0),
564 Idx: Builder.getInt64(C: 0));
565
566 if (isa<FixedVectorType>(Val: SVI->getType())) {
567 std::optional<int> SrcIdx;
568 // getSplatIndex returns -1 to mean not-found.
569 if (SplatIndex != -1)
570 SrcIdx = SplatIndex;
571 else if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: Index))
572 SrcIdx = SVI->getMaskValue(Elt: CI->getZExtValue());
573
574 if (SrcIdx) {
575 Value *Src;
576 unsigned LHSWidth =
577 cast<FixedVectorType>(Val: SVI->getOperand(i_nocapture: 0)->getType())
578 ->getNumElements();
579
580 if (*SrcIdx < 0)
581 return replaceInstUsesWith(I&: EI, V: PoisonValue::get(T: EI.getType()));
582 if (*SrcIdx < (int)LHSWidth)
583 Src = SVI->getOperand(i_nocapture: 0);
584 else {
585 *SrcIdx -= LHSWidth;
586 Src = SVI->getOperand(i_nocapture: 1);
587 }
588 Type *Int64Ty = Type::getInt64Ty(C&: EI.getContext());
589 return ExtractElementInst::Create(
590 Vec: Src, Idx: ConstantInt::get(Ty: Int64Ty, V: *SrcIdx, IsSigned: false));
591 }
592 }
593 } else if (auto *CI = dyn_cast<CastInst>(Val: I)) {
594 // Canonicalize extractelement(cast) -> cast(extractelement).
595 // Bitcasts can change the number of vector elements, and they cost
596 // nothing.
597 // If the CI has only one use, but that use is inside a loop, this
598 // canonicalization is not profitable because it would turn a vector
599 // operation into scalar operations inside the loop. Apply the transform
600 // when:
601 // - the index is constant and CI has one use, or
602 // - the CI and EI are in the same basic block, so the cast won't be sunk
603 // into a loop.
604 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast) &&
605 (EI.getParent() == CI->getParent() || isa<ConstantInt>(Val: Index))) {
606 Value *EE = Builder.CreateExtractElement(Vec: CI->getOperand(i_nocapture: 0), Idx: Index);
607 return CastInst::Create(CI->getOpcode(), S: EE, Ty: EI.getType());
608 }
609 }
610 }
611
612 // Run demanded elements after other transforms as this can drop flags on
613 // binops. If there's two paths to the same final result, we prefer the
614 // one which doesn't force us to drop flags.
615 if (IndexC) {
616 ElementCount EC = EI.getVectorOperandType()->getElementCount();
617 unsigned NumElts = EC.getKnownMinValue();
618 // This instruction only demands the single element from the input vector.
619 // Skip for scalable type, the number of elements is unknown at
620 // compile-time.
621 if (!EC.isScalable() && NumElts != 1) {
622 // If the input vector has a single use, simplify it based on this use
623 // property.
624 if (SrcVec->hasOneUse()) {
625 APInt PoisonElts(NumElts, 0);
626 APInt DemandedElts(NumElts, 0);
627 DemandedElts.setBit(IndexC->getZExtValue());
628 if (Value *V =
629 SimplifyDemandedVectorElts(V: SrcVec, DemandedElts, PoisonElts))
630 return replaceOperand(I&: EI, OpNum: 0, V);
631 } else {
632 // If the input vector has multiple uses, simplify it based on a union
633 // of all elements used.
634 APInt DemandedElts = findDemandedEltsByAllUsers(V: SrcVec);
635 if (!DemandedElts.isAllOnes()) {
636 APInt PoisonElts(NumElts, 0);
637 if (Value *V = SimplifyDemandedVectorElts(
638 V: SrcVec, DemandedElts, PoisonElts, Depth: 0 /* Depth */,
639 AllowMultipleUsers: true /* AllowMultipleUsers */)) {
640 if (V != SrcVec) {
641 Worklist.addValue(V: SrcVec);
642 SrcVec->replaceAllUsesWith(V);
643 return &EI;
644 }
645 }
646 }
647 }
648 }
649 }
650 return nullptr;
651}
652
653/// If V is a shuffle of values that ONLY returns elements from either LHS or
654/// RHS, return the shuffle mask and true. Otherwise, return false.
655static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
656 SmallVectorImpl<int> &Mask) {
657 assert(LHS->getType() == RHS->getType() &&
658 "Invalid CollectSingleShuffleElements");
659 unsigned NumElts = cast<FixedVectorType>(Val: V->getType())->getNumElements();
660
661 if (match(V, P: m_Poison())) {
662 Mask.assign(NumElts, Elt: -1);
663 return true;
664 }
665
666 if (V == LHS) {
667 for (unsigned i = 0; i != NumElts; ++i)
668 Mask.push_back(Elt: i);
669 return true;
670 }
671
672 if (V == RHS) {
673 for (unsigned i = 0; i != NumElts; ++i)
674 Mask.push_back(Elt: i + NumElts);
675 return true;
676 }
677
678 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(Val: V)) {
679 // If this is an insert of an extract from some other vector, include it.
680 Value *VecOp = IEI->getOperand(i_nocapture: 0);
681 Value *ScalarOp = IEI->getOperand(i_nocapture: 1);
682 Value *IdxOp = IEI->getOperand(i_nocapture: 2);
683
684 if (!isa<ConstantInt>(Val: IdxOp))
685 return false;
686 unsigned InsertedIdx = cast<ConstantInt>(Val: IdxOp)->getZExtValue();
687
688 if (isa<PoisonValue>(Val: ScalarOp)) { // inserting poison into vector.
689 // We can handle this if the vector we are inserting into is
690 // transitively ok.
691 if (collectSingleShuffleElements(V: VecOp, LHS, RHS, Mask)) {
692 // If so, update the mask to reflect the inserted poison.
693 Mask[InsertedIdx] = -1;
694 return true;
695 }
696 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(Val: ScalarOp)){
697 if (isa<ConstantInt>(Val: EI->getOperand(i_nocapture: 1))) {
698 unsigned ExtractedIdx =
699 cast<ConstantInt>(Val: EI->getOperand(i_nocapture: 1))->getZExtValue();
700 unsigned NumLHSElts =
701 cast<FixedVectorType>(Val: LHS->getType())->getNumElements();
702
703 // This must be extracting from either LHS or RHS.
704 if (EI->getOperand(i_nocapture: 0) == LHS || EI->getOperand(i_nocapture: 0) == RHS) {
705 // We can handle this if the vector we are inserting into is
706 // transitively ok.
707 if (collectSingleShuffleElements(V: VecOp, LHS, RHS, Mask)) {
708 // If so, update the mask to reflect the inserted value.
709 if (EI->getOperand(i_nocapture: 0) == LHS) {
710 Mask[InsertedIdx % NumElts] = ExtractedIdx;
711 } else {
712 assert(EI->getOperand(0) == RHS);
713 Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
714 }
715 return true;
716 }
717 }
718 }
719 }
720 }
721
722 return false;
723}
724
725/// If we have insertion into a vector that is wider than the vector that we
726/// are extracting from, try to widen the source vector to allow a single
727/// shufflevector to replace one or more insert/extract pairs.
728static bool replaceExtractElements(InsertElementInst *InsElt,
729 ExtractElementInst *ExtElt,
730 InstCombinerImpl &IC) {
731 auto *InsVecType = cast<FixedVectorType>(Val: InsElt->getType());
732 auto *ExtVecType = cast<FixedVectorType>(Val: ExtElt->getVectorOperandType());
733 unsigned NumInsElts = InsVecType->getNumElements();
734 unsigned NumExtElts = ExtVecType->getNumElements();
735
736 // The inserted-to vector must be wider than the extracted-from vector.
737 if (InsVecType->getElementType() != ExtVecType->getElementType() ||
738 NumExtElts >= NumInsElts)
739 return false;
740
741 Value *ExtVecOp = ExtElt->getVectorOperand();
742 // Bail out on constant vectors.
743 if (isa<ConstantData>(Val: ExtVecOp))
744 return false;
745
746 // Create a shuffle mask to widen the extended-from vector using poison
747 // values. The mask selects all of the values of the original vector followed
748 // by as many poison values as needed to create a vector of the same length
749 // as the inserted-to vector.
750 SmallVector<int, 16> ExtendMask;
751 for (unsigned i = 0; i < NumExtElts; ++i)
752 ExtendMask.push_back(Elt: i);
753 for (unsigned i = NumExtElts; i < NumInsElts; ++i)
754 ExtendMask.push_back(Elt: -1);
755
756 auto *ExtVecOpInst = dyn_cast<Instruction>(Val: ExtVecOp);
757 BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(Val: ExtVecOpInst))
758 ? ExtVecOpInst->getParent()
759 : ExtElt->getParent();
760
761 // TODO: This restriction matches the basic block check below when creating
762 // new extractelement instructions. If that limitation is removed, this one
763 // could also be removed. But for now, we just bail out to ensure that we
764 // will replace the extractelement instruction that is feeding our
765 // insertelement instruction. This allows the insertelement to then be
766 // replaced by a shufflevector. If the insertelement is not replaced, we can
767 // induce infinite looping because there's an optimization for extractelement
768 // that will delete our widening shuffle. This would trigger another attempt
769 // here to create that shuffle, and we spin forever.
770 if (InsertionBlock != InsElt->getParent())
771 return false;
772
773 // TODO: This restriction matches the check in visitInsertElementInst() and
774 // prevents an infinite loop caused by not turning the extract/insert pair
775 // into a shuffle. We really should not need either check, but we're lacking
776 // folds for shufflevectors because we're afraid to generate shuffle masks
777 // that the backend can't handle.
778 if (InsElt->hasOneUse() && isa<InsertElementInst>(Val: InsElt->user_back()))
779 return false;
780
781 auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);
782
783 // Insert the new shuffle after the vector operand of the extract is defined
784 // (as long as it's not a PHI) or at the start of the basic block of the
785 // extract, so any subsequent extracts in the same basic block can use it.
786 // TODO: Insert before the earliest ExtractElementInst that is replaced.
787 if (ExtVecOpInst && !isa<PHINode>(Val: ExtVecOpInst))
788 WideVec->insertAfter(InsertPos: ExtVecOpInst->getIterator());
789 else
790 IC.InsertNewInstWith(New: WideVec, Old: ExtElt->getParent()->getFirstInsertionPt());
791
792 // Replace extracts from the original narrow vector with extracts from the new
793 // wide vector.
794 for (User *U : ExtVecOp->users()) {
795 ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(Val: U);
796 if (!OldExt || OldExt->getParent() != WideVec->getParent())
797 continue;
798 auto *NewExt = ExtractElementInst::Create(Vec: WideVec, Idx: OldExt->getOperand(i_nocapture: 1));
799 IC.InsertNewInstWith(New: NewExt, Old: OldExt->getIterator());
800 IC.replaceInstUsesWith(I&: *OldExt, V: NewExt);
801 // Add the old extracts to the worklist for DCE. We can't remove the
802 // extracts directly, because they may still be used by the calling code.
803 IC.addToWorklist(I: OldExt);
804 }
805
806 return true;
807}
808
809/// We are building a shuffle to create V, which is a sequence of insertelement,
810/// extractelement pairs. If PermittedRHS is set, then we must either use it or
811/// not rely on the second vector source. Return a std::pair containing the
812/// left and right vectors of the proposed shuffle (or 0), and set the Mask
813/// parameter as required.
814///
815/// Note: we intentionally don't try to fold earlier shuffles since they have
816/// often been chosen carefully to be efficiently implementable on the target.
817using ShuffleOps = std::pair<Value *, Value *>;
818
819static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask,
820 Value *PermittedRHS,
821 InstCombinerImpl &IC, bool &Rerun) {
822 assert(V->getType()->isVectorTy() && "Invalid shuffle!");
823 unsigned NumElts = cast<FixedVectorType>(Val: V->getType())->getNumElements();
824
825 if (match(V, P: m_Poison())) {
826 Mask.assign(NumElts, Elt: -1);
827 return std::make_pair(
828 x: PermittedRHS ? PoisonValue::get(T: PermittedRHS->getType()) : V, y: nullptr);
829 }
830
831 if (isa<ConstantAggregateZero>(Val: V)) {
832 Mask.assign(NumElts, Elt: 0);
833 return std::make_pair(x&: V, y: nullptr);
834 }
835
836 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(Val: V)) {
837 // If this is an insert of an extract from some other vector, include it.
838 Value *VecOp = IEI->getOperand(i_nocapture: 0);
839 Value *ScalarOp = IEI->getOperand(i_nocapture: 1);
840 Value *IdxOp = IEI->getOperand(i_nocapture: 2);
841
842 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(Val: ScalarOp)) {
843 if (isa<ConstantInt>(Val: EI->getOperand(i_nocapture: 1)) && isa<ConstantInt>(Val: IdxOp)) {
844 unsigned ExtractedIdx =
845 cast<ConstantInt>(Val: EI->getOperand(i_nocapture: 1))->getZExtValue();
846 unsigned InsertedIdx = cast<ConstantInt>(Val: IdxOp)->getZExtValue();
847
848 // Either the extracted from or inserted into vector must be RHSVec,
849 // otherwise we'd end up with a shuffle of three inputs.
850 if (EI->getOperand(i_nocapture: 0) == PermittedRHS || PermittedRHS == nullptr) {
851 Value *RHS = EI->getOperand(i_nocapture: 0);
852 ShuffleOps LR = collectShuffleElements(V: VecOp, Mask, PermittedRHS: RHS, IC, Rerun);
853 assert(LR.second == nullptr || LR.second == RHS);
854
855 if (LR.first->getType() != RHS->getType()) {
856 // Although we are giving up for now, see if we can create extracts
857 // that match the inserts for another round of combining.
858 if (replaceExtractElements(InsElt: IEI, ExtElt: EI, IC))
859 Rerun = true;
860
861 // We tried our best, but we can't find anything compatible with RHS
862 // further up the chain. Return a trivial shuffle.
863 for (unsigned i = 0; i < NumElts; ++i)
864 Mask[i] = i;
865 return std::make_pair(x&: V, y: nullptr);
866 }
867
868 unsigned NumLHSElts =
869 cast<FixedVectorType>(Val: RHS->getType())->getNumElements();
870 Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
871 return std::make_pair(x&: LR.first, y&: RHS);
872 }
873
874 if (VecOp == PermittedRHS) {
875 // We've gone as far as we can: anything on the other side of the
876 // extractelement will already have been converted into a shuffle.
877 unsigned NumLHSElts =
878 cast<FixedVectorType>(Val: EI->getOperand(i_nocapture: 0)->getType())
879 ->getNumElements();
880 for (unsigned i = 0; i != NumElts; ++i)
881 Mask.push_back(Elt: i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
882 return std::make_pair(x: EI->getOperand(i_nocapture: 0), y&: PermittedRHS);
883 }
884
885 // If this insertelement is a chain that comes from exactly these two
886 // vectors, return the vector and the effective shuffle.
887 if (EI->getOperand(i_nocapture: 0)->getType() == PermittedRHS->getType() &&
888 collectSingleShuffleElements(V: IEI, LHS: EI->getOperand(i_nocapture: 0), RHS: PermittedRHS,
889 Mask))
890 return std::make_pair(x: EI->getOperand(i_nocapture: 0), y&: PermittedRHS);
891 }
892 }
893 }
894
895 // Otherwise, we can't do anything fancy. Return an identity vector.
896 for (unsigned i = 0; i != NumElts; ++i)
897 Mask.push_back(Elt: i);
898 return std::make_pair(x&: V, y: nullptr);
899}
900
901/// Look for chain of insertvalue's that fully define an aggregate, and trace
902/// back the values inserted, see if they are all were extractvalue'd from
903/// the same source aggregate from the exact same element indexes.
904/// If they were, just reuse the source aggregate.
905/// This potentially deals with PHI indirections.
906Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
907 InsertValueInst &OrigIVI) {
908 Type *AggTy = OrigIVI.getType();
909 unsigned NumAggElts;
910 switch (AggTy->getTypeID()) {
911 case Type::StructTyID:
912 NumAggElts = AggTy->getStructNumElements();
913 break;
914 case Type::ArrayTyID:
915 NumAggElts = AggTy->getArrayNumElements();
916 break;
917 default:
918 llvm_unreachable("Unhandled aggregate type?");
919 }
920
921 // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
922 // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
923 // FIXME: any interesting patterns to be caught with larger limit?
924 assert(NumAggElts > 0 && "Aggregate should have elements.");
925 if (NumAggElts > 2)
926 return nullptr;
927
928 static constexpr auto NotFound = std::nullopt;
929 static constexpr auto FoundMismatch = nullptr;
930
931 // Try to find a value of each element of an aggregate.
932 // FIXME: deal with more complex, not one-dimensional, aggregate types
933 SmallVector<std::optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
934
935 // Do we know values for each element of the aggregate?
936 auto KnowAllElts = [&AggElts]() {
937 return !llvm::is_contained(Range&: AggElts, Element: NotFound);
938 };
939
940 int Depth = 0;
941
942 // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
943 // every element being overwritten twice, which should never happen.
944 static const int DepthLimit = 2 * NumAggElts;
945
946 // Recurse up the chain of `insertvalue` aggregate operands until either we've
947 // reconstructed full initializer or can't visit any more `insertvalue`'s.
948 for (InsertValueInst *CurrIVI = &OrigIVI;
949 Depth < DepthLimit && CurrIVI && !KnowAllElts();
950 CurrIVI = dyn_cast<InsertValueInst>(Val: CurrIVI->getAggregateOperand()),
951 ++Depth) {
952 auto *InsertedValue =
953 dyn_cast<Instruction>(Val: CurrIVI->getInsertedValueOperand());
954 if (!InsertedValue)
955 return nullptr; // Inserted value must be produced by an instruction.
956
957 ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
958
959 // Don't bother with more than single-level aggregates.
960 if (Indices.size() != 1)
961 return nullptr; // FIXME: deal with more complex aggregates?
962
963 // Now, we may have already previously recorded the value for this element
964 // of an aggregate. If we did, that means the CurrIVI will later be
965 // overwritten with the already-recorded value. But if not, let's record it!
966 std::optional<Instruction *> &Elt = AggElts[Indices.front()];
967 Elt = Elt.value_or(u&: InsertedValue);
968
969 // FIXME: should we handle chain-terminating undef base operand?
970 }
971
972 // Was that sufficient to deduce the full initializer for the aggregate?
973 if (!KnowAllElts())
974 return nullptr; // Give up then.
975
976 // We now want to find the source[s] of the aggregate elements we've found.
977 // And with "source" we mean the original aggregate[s] from which
978 // the inserted elements were extracted. This may require PHI translation.
979
980 enum class AggregateDescription {
981 /// When analyzing the value that was inserted into an aggregate, we did
982 /// not manage to find defining `extractvalue` instruction to analyze.
983 NotFound,
984 /// When analyzing the value that was inserted into an aggregate, we did
985 /// manage to find defining `extractvalue` instruction[s], and everything
986 /// matched perfectly - aggregate type, element insertion/extraction index.
987 Found,
988 /// When analyzing the value that was inserted into an aggregate, we did
989 /// manage to find defining `extractvalue` instruction, but there was
990 /// a mismatch: either the source type from which the extraction was didn't
991 /// match the aggregate type into which the insertion was,
992 /// or the extraction/insertion channels mismatched,
993 /// or different elements had different source aggregates.
994 FoundMismatch
995 };
996 auto Describe = [](std::optional<Value *> SourceAggregate) {
997 if (SourceAggregate == NotFound)
998 return AggregateDescription::NotFound;
999 if (*SourceAggregate == FoundMismatch)
1000 return AggregateDescription::FoundMismatch;
1001 return AggregateDescription::Found;
1002 };
1003
1004 // If an aggregate element is defined in UseBB, we can't use it in PredBB.
1005 bool EltDefinedInUseBB = false;
1006
1007 // Given the value \p Elt that was being inserted into element \p EltIdx of an
1008 // aggregate AggTy, see if \p Elt was originally defined by an
1009 // appropriate extractvalue (same element index, same aggregate type).
1010 // If found, return the source aggregate from which the extraction was.
1011 // If \p PredBB is provided, does PHI translation of an \p Elt first.
1012 auto FindSourceAggregate =
1013 [&](Instruction *Elt, unsigned EltIdx, std::optional<BasicBlock *> UseBB,
1014 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
1015 // For now(?), only deal with, at most, a single level of PHI indirection.
1016 if (UseBB && PredBB) {
1017 Elt = dyn_cast<Instruction>(Val: Elt->DoPHITranslation(CurBB: *UseBB, PredBB: *PredBB));
1018 if (Elt && Elt->getParent() == *UseBB)
1019 EltDefinedInUseBB = true;
1020 }
1021 // FIXME: deal with multiple levels of PHI indirection?
1022
1023 // Did we find an extraction?
1024 auto *EVI = dyn_cast_or_null<ExtractValueInst>(Val: Elt);
1025 if (!EVI)
1026 return NotFound;
1027
1028 Value *SourceAggregate = EVI->getAggregateOperand();
1029
1030 // Is the extraction from the same type into which the insertion was?
1031 if (SourceAggregate->getType() != AggTy)
1032 return FoundMismatch;
1033 // And the element index doesn't change between extraction and insertion?
1034 if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
1035 return FoundMismatch;
1036
1037 return SourceAggregate; // AggregateDescription::Found
1038 };
1039
1040 // Given elements AggElts that were constructing an aggregate OrigIVI,
1041 // see if we can find appropriate source aggregate for each of the elements,
1042 // and see it's the same aggregate for each element. If so, return it.
1043 auto FindCommonSourceAggregate =
1044 [&](std::optional<BasicBlock *> UseBB,
1045 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
1046 std::optional<Value *> SourceAggregate;
1047
1048 for (auto I : enumerate(First&: AggElts)) {
1049 assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
1050 "We don't store nullptr in SourceAggregate!");
1051 assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
1052 (I.index() != 0) &&
1053 "SourceAggregate should be valid after the first element,");
1054
1055 // For this element, is there a plausible source aggregate?
1056 // FIXME: we could special-case undef element, IFF we know that in the
1057 // source aggregate said element isn't poison.
1058 std::optional<Value *> SourceAggregateForElement =
1059 FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
1060
1061 // Okay, what have we found? Does that correlate with previous findings?
1062
1063 // Regardless of whether or not we have previously found source
1064 // aggregate for previous elements (if any), if we didn't find one for
1065 // this element, passthrough whatever we have just found.
1066 if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
1067 return SourceAggregateForElement;
1068
1069 // Okay, we have found source aggregate for this element.
1070 // Let's see what we already know from previous elements, if any.
1071 switch (Describe(SourceAggregate)) {
1072 case AggregateDescription::NotFound:
1073 // This is apparently the first element that we have examined.
1074 SourceAggregate = SourceAggregateForElement; // Record the aggregate!
1075 continue; // Great, now look at next element.
1076 case AggregateDescription::Found:
1077 // We have previously already successfully examined other elements.
1078 // Is this the same source aggregate we've found for other elements?
1079 if (*SourceAggregateForElement != *SourceAggregate)
1080 return FoundMismatch;
1081 continue; // Still the same aggregate, look at next element.
1082 case AggregateDescription::FoundMismatch:
1083 llvm_unreachable("Can't happen. We would have early-exited then.");
1084 };
1085 }
1086
1087 assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1088 "Must be a valid Value");
1089 return *SourceAggregate;
1090 };
1091
1092 std::optional<Value *> SourceAggregate;
1093
1094 // Can we find the source aggregate without looking at predecessors?
1095 SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/std::nullopt,
1096 /*PredBB=*/std::nullopt);
1097 if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
1098 if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
1099 return nullptr; // Conflicting source aggregates!
1100 ++NumAggregateReconstructionsSimplified;
1101 return replaceInstUsesWith(I&: OrigIVI, V: *SourceAggregate);
1102 }
1103
1104 // Okay, apparently we need to look at predecessors.
1105
1106 // We should be smart about picking the "use" basic block, which will be the
1107 // merge point for aggregate, where we'll insert the final PHI that will be
1108 // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
1109 // We should look in which blocks each of the AggElts is being defined,
1110 // they all should be defined in the same basic block.
1111 BasicBlock *UseBB = nullptr;
1112
1113 for (const std::optional<Instruction *> &I : AggElts) {
1114 BasicBlock *BB = (*I)->getParent();
1115 // If it's the first instruction we've encountered, record the basic block.
1116 if (!UseBB) {
1117 UseBB = BB;
1118 continue;
1119 }
1120 // Otherwise, this must be the same basic block we've seen previously.
1121 if (UseBB != BB)
1122 return nullptr;
1123 }
1124
1125 // If *all* of the elements are basic-block-independent, meaning they are
1126 // either function arguments, or constant expressions, then if we didn't
1127 // handle them without predecessor-aware handling, we won't handle them now.
1128 if (!UseBB)
1129 return nullptr;
1130
1131 // If we didn't manage to find source aggregate without looking at
1132 // predecessors, and there are no predecessors to look at, then we're done.
1133 if (pred_empty(BB: UseBB))
1134 return nullptr;
1135
1136 // Arbitrary predecessor count limit.
1137 static const int PredCountLimit = 64;
1138
1139 // Cache the (non-uniqified!) list of predecessors in a vector,
1140 // checking the limit at the same time for efficiency.
1141 SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
1142 for (BasicBlock *Pred : predecessors(BB: UseBB)) {
1143 // Don't bother if there are too many predecessors.
1144 if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1145 return nullptr;
1146 Preds.emplace_back(Args&: Pred);
1147 }
1148
1149 // For each predecessor, what is the source aggregate,
1150 // from which all the elements were originally extracted from?
1151 // Note that we want for the map to have stable iteration order!
1152 SmallMapVector<BasicBlock *, Value *, 4> SourceAggregates;
1153 bool FoundSrcAgg = false;
1154 for (BasicBlock *Pred : Preds) {
1155 std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1156 SourceAggregates.try_emplace(Key: Pred);
1157 // Did we already evaluate this predecessor?
1158 if (!IV.second)
1159 continue;
1160
1161 // Let's hope that when coming from predecessor Pred, all elements of the
1162 // aggregate produced by OrigIVI must have been originally extracted from
1163 // the same aggregate. Is that so? Can we find said original aggregate?
1164 SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1165 if (Describe(SourceAggregate) == AggregateDescription::Found) {
1166 FoundSrcAgg = true;
1167 IV.first->second = *SourceAggregate;
1168 } else {
1169 // If UseBB is the single successor of Pred, we can add InsertValue to
1170 // Pred.
1171 auto *BI = dyn_cast<BranchInst>(Val: Pred->getTerminator());
1172 if (!BI || !BI->isUnconditional())
1173 return nullptr;
1174 }
1175 }
1176
1177 if (!FoundSrcAgg)
1178 return nullptr;
1179
1180 // Do some sanity check if we need to add insertvalue into predecessors.
1181 auto OrigBB = OrigIVI.getParent();
1182 for (auto &It : SourceAggregates) {
1183 if (Describe(It.second) == AggregateDescription::Found)
1184 continue;
1185
1186 // Element is defined in UseBB, so it can't be used in predecessors.
1187 if (EltDefinedInUseBB)
1188 return nullptr;
1189
1190 // Do this transformation cross loop boundary may cause dead loop. So we
1191 // should avoid this situation. But LoopInfo is not generally available, we
1192 // must be conservative here.
1193 // If OrigIVI is in UseBB and it's the only successor of PredBB, PredBB
1194 // can't be in inner loop.
1195 if (UseBB != OrigBB)
1196 return nullptr;
1197
1198 // Avoid constructing constant aggregate because constant value may expose
1199 // more optimizations.
1200 bool ConstAgg = true;
1201 for (auto Val : AggElts) {
1202 Value *Elt = (*Val)->DoPHITranslation(CurBB: UseBB, PredBB: It.first);
1203 if (!isa<Constant>(Val: Elt)) {
1204 ConstAgg = false;
1205 break;
1206 }
1207 }
1208 if (ConstAgg)
1209 return nullptr;
1210 }
1211
1212 // For predecessors without appropriate source aggregate, create one in the
1213 // predecessor.
1214 for (auto &It : SourceAggregates) {
1215 if (Describe(It.second) == AggregateDescription::Found)
1216 continue;
1217
1218 BasicBlock *Pred = It.first;
1219 Builder.SetInsertPoint(Pred->getTerminator());
1220 Value *V = PoisonValue::get(T: AggTy);
1221 for (auto [Idx, Val] : enumerate(First&: AggElts)) {
1222 Value *Elt = (*Val)->DoPHITranslation(CurBB: UseBB, PredBB: Pred);
1223 V = Builder.CreateInsertValue(Agg: V, Val: Elt, Idxs: Idx);
1224 }
1225
1226 It.second = V;
1227 }
1228
1229 // All good! Now we just need to thread the source aggregates here.
1230 // Note that we have to insert the new PHI here, ourselves, because we can't
1231 // rely on InstCombinerImpl::run() inserting it into the right basic block.
1232 // Note that the same block can be a predecessor more than once,
1233 // and we need to preserve that invariant for the PHI node.
1234 BuilderTy::InsertPointGuard Guard(Builder);
1235 Builder.SetInsertPoint(TheBB: UseBB, IP: UseBB->getFirstNonPHIIt());
1236 auto *PHI =
1237 Builder.CreatePHI(Ty: AggTy, NumReservedValues: Preds.size(), Name: OrigIVI.getName() + ".merged");
1238 for (BasicBlock *Pred : Preds)
1239 PHI->addIncoming(V: SourceAggregates[Pred], BB: Pred);
1240
1241 ++NumAggregateReconstructionsSimplified;
1242 return replaceInstUsesWith(I&: OrigIVI, V: PHI);
1243}
1244
1245/// Try to find redundant insertvalue instructions, like the following ones:
1246/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1247/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1248/// Here the second instruction inserts values at the same indices, as the
1249/// first one, making the first one redundant.
1250/// It should be transformed to:
1251/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1252Instruction *InstCombinerImpl::visitInsertValueInst(InsertValueInst &I) {
1253 if (Value *V = simplifyInsertValueInst(
1254 Agg: I.getAggregateOperand(), Val: I.getInsertedValueOperand(), Idxs: I.getIndices(),
1255 Q: SQ.getWithInstruction(I: &I)))
1256 return replaceInstUsesWith(I, V);
1257
1258 bool IsRedundant = false;
1259 ArrayRef<unsigned int> FirstIndices = I.getIndices();
1260
1261 // If there is a chain of insertvalue instructions (each of them except the
1262 // last one has only one use and it's another insertvalue insn from this
1263 // chain), check if any of the 'children' uses the same indices as the first
1264 // instruction. In this case, the first one is redundant.
1265 Value *V = &I;
1266 unsigned Depth = 0;
1267 while (V->hasOneUse() && Depth < 10) {
1268 User *U = V->user_back();
1269 auto UserInsInst = dyn_cast<InsertValueInst>(Val: U);
1270 if (!UserInsInst || U->getOperand(i: 0) != V)
1271 break;
1272 if (UserInsInst->getIndices() == FirstIndices) {
1273 IsRedundant = true;
1274 break;
1275 }
1276 V = UserInsInst;
1277 Depth++;
1278 }
1279
1280 if (IsRedundant)
1281 return replaceInstUsesWith(I, V: I.getOperand(i_nocapture: 0));
1282
1283 if (Instruction *NewI = foldAggregateConstructionIntoAggregateReuse(OrigIVI&: I))
1284 return NewI;
1285
1286 return nullptr;
1287}
1288
1289static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
1290 // Can not analyze scalable type, the number of elements is not a compile-time
1291 // constant.
1292 if (isa<ScalableVectorType>(Val: Shuf.getOperand(i_nocapture: 0)->getType()))
1293 return false;
1294
1295 int MaskSize = Shuf.getShuffleMask().size();
1296 int VecSize =
1297 cast<FixedVectorType>(Val: Shuf.getOperand(i_nocapture: 0)->getType())->getNumElements();
1298
1299 // A vector select does not change the size of the operands.
1300 if (MaskSize != VecSize)
1301 return false;
1302
1303 // Each mask element must be undefined or choose a vector element from one of
1304 // the source operands without crossing vector lanes.
1305 for (int i = 0; i != MaskSize; ++i) {
1306 int Elt = Shuf.getMaskValue(Elt: i);
1307 if (Elt != -1 && Elt != i && Elt != i + VecSize)
1308 return false;
1309 }
1310
1311 return true;
1312}
1313
1314/// Turn a chain of inserts that splats a value into an insert + shuffle:
1315/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1316/// shufflevector(insertelt(X, %k, 0), poison, zero)
1317static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
1318 // We are interested in the last insert in a chain. So if this insert has a
1319 // single user and that user is an insert, bail.
1320 if (InsElt.hasOneUse() && isa<InsertElementInst>(Val: InsElt.user_back()))
1321 return nullptr;
1322
1323 VectorType *VecTy = InsElt.getType();
1324 // Can not handle scalable type, the number of elements is not a compile-time
1325 // constant.
1326 if (isa<ScalableVectorType>(Val: VecTy))
1327 return nullptr;
1328 unsigned NumElements = cast<FixedVectorType>(Val: VecTy)->getNumElements();
1329
1330 // Do not try to do this for a one-element vector, since that's a nop,
1331 // and will cause an inf-loop.
1332 if (NumElements == 1)
1333 return nullptr;
1334
1335 Value *SplatVal = InsElt.getOperand(i_nocapture: 1);
1336 InsertElementInst *CurrIE = &InsElt;
1337 SmallBitVector ElementPresent(NumElements, false);
1338 InsertElementInst *FirstIE = nullptr;
1339
1340 // Walk the chain backwards, keeping track of which indices we inserted into,
1341 // until we hit something that isn't an insert of the splatted value.
1342 while (CurrIE) {
1343 auto *Idx = dyn_cast<ConstantInt>(Val: CurrIE->getOperand(i_nocapture: 2));
1344 if (!Idx || CurrIE->getOperand(i_nocapture: 1) != SplatVal)
1345 return nullptr;
1346
1347 auto *NextIE = dyn_cast<InsertElementInst>(Val: CurrIE->getOperand(i_nocapture: 0));
1348 // Check none of the intermediate steps have any additional uses, except
1349 // for the root insertelement instruction, which can be re-used, if it
1350 // inserts at position 0.
1351 if (CurrIE != &InsElt &&
1352 (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
1353 return nullptr;
1354
1355 ElementPresent[Idx->getZExtValue()] = true;
1356 FirstIE = CurrIE;
1357 CurrIE = NextIE;
1358 }
1359
1360 // If this is just a single insertelement (not a sequence), we are done.
1361 if (FirstIE == &InsElt)
1362 return nullptr;
1363
1364 // If we are not inserting into a poison vector, make sure we've seen an
1365 // insert into every element.
1366 // TODO: If the base vector is not undef, it might be better to create a splat
1367 // and then a select-shuffle (blend) with the base vector.
1368 if (!match(V: FirstIE->getOperand(i_nocapture: 0), P: m_Poison()))
1369 if (!ElementPresent.all())
1370 return nullptr;
1371
1372 // Create the insert + shuffle.
1373 Type *Int64Ty = Type::getInt64Ty(C&: InsElt.getContext());
1374 PoisonValue *PoisonVec = PoisonValue::get(T: VecTy);
1375 Constant *Zero = ConstantInt::get(Ty: Int64Ty, V: 0);
1376 if (!cast<ConstantInt>(Val: FirstIE->getOperand(i_nocapture: 2))->isZero())
1377 FirstIE = InsertElementInst::Create(Vec: PoisonVec, NewElt: SplatVal, Idx: Zero, NameStr: "",
1378 InsertBefore: InsElt.getIterator());
1379
1380 // Splat from element 0, but replace absent elements with poison in the mask.
1381 SmallVector<int, 16> Mask(NumElements, 0);
1382 for (unsigned i = 0; i != NumElements; ++i)
1383 if (!ElementPresent[i])
1384 Mask[i] = -1;
1385
1386 return new ShuffleVectorInst(FirstIE, Mask);
1387}
1388
1389/// Try to fold an insert element into an existing splat shuffle by changing
1390/// the shuffle's mask to include the index of this insert element.
1391static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
1392 // Check if the vector operand of this insert is a canonical splat shuffle.
1393 auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: InsElt.getOperand(i_nocapture: 0));
1394 if (!Shuf || !Shuf->isZeroEltSplat())
1395 return nullptr;
1396
1397 // Bail out early if shuffle is scalable type. The number of elements in
1398 // shuffle mask is unknown at compile-time.
1399 if (isa<ScalableVectorType>(Val: Shuf->getType()))
1400 return nullptr;
1401
1402 // Check for a constant insertion index.
1403 uint64_t IdxC;
1404 if (!match(V: InsElt.getOperand(i_nocapture: 2), P: m_ConstantInt(V&: IdxC)))
1405 return nullptr;
1406
1407 // Check if the splat shuffle's input is the same as this insert's scalar op.
1408 Value *X = InsElt.getOperand(i_nocapture: 1);
1409 Value *Op0 = Shuf->getOperand(i_nocapture: 0);
1410 if (!match(V: Op0, P: m_InsertElt(Val: m_Undef(), Elt: m_Specific(V: X), Idx: m_ZeroInt())))
1411 return nullptr;
1412
1413 // Replace the shuffle mask element at the index of this insert with a zero.
1414 // For example:
1415 // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1416 // --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1417 unsigned NumMaskElts =
1418 cast<FixedVectorType>(Val: Shuf->getType())->getNumElements();
1419 SmallVector<int, 16> NewMask(NumMaskElts);
1420 for (unsigned i = 0; i != NumMaskElts; ++i)
1421 NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(Elt: i);
1422
1423 return new ShuffleVectorInst(Op0, NewMask);
1424}
1425
1426/// Try to fold an extract+insert element into an existing identity shuffle by
1427/// changing the shuffle's mask to include the index of this insert element.
1428static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {
1429 // Check if the vector operand of this insert is an identity shuffle.
1430 auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: InsElt.getOperand(i_nocapture: 0));
1431 if (!Shuf || !match(V: Shuf->getOperand(i_nocapture: 1), P: m_Poison()) ||
1432 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1433 return nullptr;
1434
1435 // Bail out early if shuffle is scalable type. The number of elements in
1436 // shuffle mask is unknown at compile-time.
1437 if (isa<ScalableVectorType>(Val: Shuf->getType()))
1438 return nullptr;
1439
1440 // Check for a constant insertion index.
1441 uint64_t IdxC;
1442 if (!match(V: InsElt.getOperand(i_nocapture: 2), P: m_ConstantInt(V&: IdxC)))
1443 return nullptr;
1444
1445 // Check if this insert's scalar op is extracted from the identity shuffle's
1446 // input vector.
1447 Value *Scalar = InsElt.getOperand(i_nocapture: 1);
1448 Value *X = Shuf->getOperand(i_nocapture: 0);
1449 if (!match(V: Scalar, P: m_ExtractElt(Val: m_Specific(V: X), Idx: m_SpecificInt(V: IdxC))))
1450 return nullptr;
1451
1452 // Replace the shuffle mask element at the index of this extract+insert with
1453 // that same index value.
1454 // For example:
1455 // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1456 unsigned NumMaskElts =
1457 cast<FixedVectorType>(Val: Shuf->getType())->getNumElements();
1458 SmallVector<int, 16> NewMask(NumMaskElts);
1459 ArrayRef<int> OldMask = Shuf->getShuffleMask();
1460 for (unsigned i = 0; i != NumMaskElts; ++i) {
1461 if (i != IdxC) {
1462 // All mask elements besides the inserted element remain the same.
1463 NewMask[i] = OldMask[i];
1464 } else if (OldMask[i] == (int)IdxC) {
1465 // If the mask element was already set, there's nothing to do
1466 // (demanded elements analysis may unset it later).
1467 return nullptr;
1468 } else {
1469 assert(OldMask[i] == PoisonMaskElem &&
1470 "Unexpected shuffle mask element for identity shuffle");
1471 NewMask[i] = IdxC;
1472 }
1473 }
1474
1475 return new ShuffleVectorInst(X, Shuf->getOperand(i_nocapture: 1), NewMask);
1476}
1477
1478/// If we have an insertelement instruction feeding into another insertelement
1479/// and the 2nd is inserting a constant into the vector, canonicalize that
1480/// constant insertion before the insertion of a variable:
1481///
1482/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1483/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1484///
1485/// This has the potential of eliminating the 2nd insertelement instruction
1486/// via constant folding of the scalar constant into a vector constant.
1487static Instruction *hoistInsEltConst(InsertElementInst &InsElt2,
1488 InstCombiner::BuilderTy &Builder) {
1489 auto *InsElt1 = dyn_cast<InsertElementInst>(Val: InsElt2.getOperand(i_nocapture: 0));
1490 if (!InsElt1 || !InsElt1->hasOneUse())
1491 return nullptr;
1492
1493 Value *X, *Y;
1494 Constant *ScalarC;
1495 ConstantInt *IdxC1, *IdxC2;
1496 if (match(V: InsElt1->getOperand(i_nocapture: 0), P: m_Value(V&: X)) &&
1497 match(V: InsElt1->getOperand(i_nocapture: 1), P: m_Value(V&: Y)) && !isa<Constant>(Val: Y) &&
1498 match(V: InsElt1->getOperand(i_nocapture: 2), P: m_ConstantInt(CI&: IdxC1)) &&
1499 match(V: InsElt2.getOperand(i_nocapture: 1), P: m_Constant(C&: ScalarC)) &&
1500 match(V: InsElt2.getOperand(i_nocapture: 2), P: m_ConstantInt(CI&: IdxC2)) && IdxC1 != IdxC2) {
1501 Value *NewInsElt1 = Builder.CreateInsertElement(Vec: X, NewElt: ScalarC, Idx: IdxC2);
1502 return InsertElementInst::Create(Vec: NewInsElt1, NewElt: Y, Idx: IdxC1);
1503 }
1504
1505 return nullptr;
1506}
1507
1508/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1509/// --> shufflevector X, CVec', Mask'
1510static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
1511 auto *Inst = dyn_cast<Instruction>(Val: InsElt.getOperand(i_nocapture: 0));
1512 // Bail out if the parent has more than one use. In that case, we'd be
1513 // replacing the insertelt with a shuffle, and that's not a clear win.
1514 if (!Inst || !Inst->hasOneUse())
1515 return nullptr;
1516 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: InsElt.getOperand(i_nocapture: 0))) {
1517 // The shuffle must have a constant vector operand. The insertelt must have
1518 // a constant scalar being inserted at a constant position in the vector.
1519 Constant *ShufConstVec, *InsEltScalar;
1520 uint64_t InsEltIndex;
1521 if (!match(V: Shuf->getOperand(i_nocapture: 1), P: m_Constant(C&: ShufConstVec)) ||
1522 !match(V: InsElt.getOperand(i_nocapture: 1), P: m_Constant(C&: InsEltScalar)) ||
1523 !match(V: InsElt.getOperand(i_nocapture: 2), P: m_ConstantInt(V&: InsEltIndex)))
1524 return nullptr;
1525
1526 // Adding an element to an arbitrary shuffle could be expensive, but a
1527 // shuffle that selects elements from vectors without crossing lanes is
1528 // assumed cheap.
1529 // If we're just adding a constant into that shuffle, it will still be
1530 // cheap.
1531 if (!isShuffleEquivalentToSelect(Shuf&: *Shuf))
1532 return nullptr;
1533
1534 // From the above 'select' check, we know that the mask has the same number
1535 // of elements as the vector input operands. We also know that each constant
1536 // input element is used in its lane and can not be used more than once by
1537 // the shuffle. Therefore, replace the constant in the shuffle's constant
1538 // vector with the insertelt constant. Replace the constant in the shuffle's
1539 // mask vector with the insertelt index plus the length of the vector
1540 // (because the constant vector operand of a shuffle is always the 2nd
1541 // operand).
1542 ArrayRef<int> Mask = Shuf->getShuffleMask();
1543 unsigned NumElts = Mask.size();
1544 SmallVector<Constant *, 16> NewShufElts(NumElts);
1545 SmallVector<int, 16> NewMaskElts(NumElts);
1546 for (unsigned I = 0; I != NumElts; ++I) {
1547 if (I == InsEltIndex) {
1548 NewShufElts[I] = InsEltScalar;
1549 NewMaskElts[I] = InsEltIndex + NumElts;
1550 } else {
1551 // Copy over the existing values.
1552 NewShufElts[I] = ShufConstVec->getAggregateElement(Elt: I);
1553 NewMaskElts[I] = Mask[I];
1554 }
1555
1556 // Bail if we failed to find an element.
1557 if (!NewShufElts[I])
1558 return nullptr;
1559 }
1560
1561 // Create new operands for a shuffle that includes the constant of the
1562 // original insertelt. The old shuffle will be dead now.
1563 return new ShuffleVectorInst(Shuf->getOperand(i_nocapture: 0),
1564 ConstantVector::get(V: NewShufElts), NewMaskElts);
1565 } else if (auto *IEI = dyn_cast<InsertElementInst>(Val: Inst)) {
1566 // Transform sequences of insertelements ops with constant data/indexes into
1567 // a single shuffle op.
1568 // Can not handle scalable type, the number of elements needed to create
1569 // shuffle mask is not a compile-time constant.
1570 if (isa<ScalableVectorType>(Val: InsElt.getType()))
1571 return nullptr;
1572 unsigned NumElts =
1573 cast<FixedVectorType>(Val: InsElt.getType())->getNumElements();
1574
1575 uint64_t InsertIdx[2];
1576 Constant *Val[2];
1577 if (!match(V: InsElt.getOperand(i_nocapture: 2), P: m_ConstantInt(V&: InsertIdx[0])) ||
1578 !match(V: InsElt.getOperand(i_nocapture: 1), P: m_Constant(C&: Val[0])) ||
1579 !match(V: IEI->getOperand(i_nocapture: 2), P: m_ConstantInt(V&: InsertIdx[1])) ||
1580 !match(V: IEI->getOperand(i_nocapture: 1), P: m_Constant(C&: Val[1])))
1581 return nullptr;
1582 SmallVector<Constant *, 16> Values(NumElts);
1583 SmallVector<int, 16> Mask(NumElts);
1584 auto ValI = std::begin(arr&: Val);
1585 // Generate new constant vector and mask.
1586 // We have 2 values/masks from the insertelements instructions. Insert them
1587 // into new value/mask vectors.
1588 for (uint64_t I : InsertIdx) {
1589 if (!Values[I]) {
1590 Values[I] = *ValI;
1591 Mask[I] = NumElts + I;
1592 }
1593 ++ValI;
1594 }
1595 // Remaining values are filled with 'poison' values.
1596 for (unsigned I = 0; I < NumElts; ++I) {
1597 if (!Values[I]) {
1598 Values[I] = PoisonValue::get(T: InsElt.getType()->getElementType());
1599 Mask[I] = I;
1600 }
1601 }
1602 // Create new operands for a shuffle that includes the constant of the
1603 // original insertelt.
1604 return new ShuffleVectorInst(IEI->getOperand(i_nocapture: 0),
1605 ConstantVector::get(V: Values), Mask);
1606 }
1607 return nullptr;
1608}
1609
1610/// If both the base vector and the inserted element are extended from the same
1611/// type, do the insert element in the narrow source type followed by extend.
1612/// TODO: This can be extended to include other cast opcodes, but particularly
1613/// if we create a wider insertelement, make sure codegen is not harmed.
1614static Instruction *narrowInsElt(InsertElementInst &InsElt,
1615 InstCombiner::BuilderTy &Builder) {
1616 // We are creating a vector extend. If the original vector extend has another
1617 // use, that would mean we end up with 2 vector extends, so avoid that.
1618 // TODO: We could ease the use-clause to "if at least one op has one use"
1619 // (assuming that the source types match - see next TODO comment).
1620 Value *Vec = InsElt.getOperand(i_nocapture: 0);
1621 if (!Vec->hasOneUse())
1622 return nullptr;
1623
1624 Value *Scalar = InsElt.getOperand(i_nocapture: 1);
1625 Value *X, *Y;
1626 CastInst::CastOps CastOpcode;
1627 if (match(V: Vec, P: m_FPExt(Op: m_Value(V&: X))) && match(V: Scalar, P: m_FPExt(Op: m_Value(V&: Y))))
1628 CastOpcode = Instruction::FPExt;
1629 else if (match(V: Vec, P: m_SExt(Op: m_Value(V&: X))) && match(V: Scalar, P: m_SExt(Op: m_Value(V&: Y))))
1630 CastOpcode = Instruction::SExt;
1631 else if (match(V: Vec, P: m_ZExt(Op: m_Value(V&: X))) && match(V: Scalar, P: m_ZExt(Op: m_Value(V&: Y))))
1632 CastOpcode = Instruction::ZExt;
1633 else
1634 return nullptr;
1635
1636 // TODO: We can allow mismatched types by creating an intermediate cast.
1637 if (X->getType()->getScalarType() != Y->getType())
1638 return nullptr;
1639
1640 // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1641 Value *NewInsElt = Builder.CreateInsertElement(Vec: X, NewElt: Y, Idx: InsElt.getOperand(i_nocapture: 2));
1642 return CastInst::Create(CastOpcode, S: NewInsElt, Ty: InsElt.getType());
1643}
1644
1645/// If we are inserting 2 halves of a value into adjacent elements of a vector,
1646/// try to convert to a single insert with appropriate bitcasts.
1647static Instruction *foldTruncInsEltPair(InsertElementInst &InsElt,
1648 bool IsBigEndian,
1649 InstCombiner::BuilderTy &Builder) {
1650 Value *VecOp = InsElt.getOperand(i_nocapture: 0);
1651 Value *ScalarOp = InsElt.getOperand(i_nocapture: 1);
1652 Value *IndexOp = InsElt.getOperand(i_nocapture: 2);
1653
1654 // Pattern depends on endian because we expect lower index is inserted first.
1655 // Big endian:
1656 // inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1
1657 // Little endian:
1658 // inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1
1659 // Note: It is not safe to do this transform with an arbitrary base vector
1660 // because the bitcast of that vector to fewer/larger elements could
1661 // allow poison to spill into an element that was not poison before.
1662 // TODO: Detect smaller fractions of the scalar.
1663 // TODO: One-use checks are conservative.
1664 auto *VTy = dyn_cast<FixedVectorType>(Val: InsElt.getType());
1665 Value *Scalar0, *BaseVec;
1666 uint64_t Index0, Index1;
1667 if (!VTy || (VTy->getNumElements() & 1) ||
1668 !match(V: IndexOp, P: m_ConstantInt(V&: Index1)) ||
1669 !match(V: VecOp, P: m_InsertElt(Val: m_Value(V&: BaseVec), Elt: m_Value(V&: Scalar0),
1670 Idx: m_ConstantInt(V&: Index0))) ||
1671 !match(V: BaseVec, P: m_Undef()))
1672 return nullptr;
1673
1674 // The first insert must be to the index one less than this one, and
1675 // the first insert must be to an even index.
1676 if (Index0 + 1 != Index1 || Index0 & 1)
1677 return nullptr;
1678
1679 // For big endian, the high half of the value should be inserted first.
1680 // For little endian, the low half of the value should be inserted first.
1681 Value *X;
1682 uint64_t ShAmt;
1683 if (IsBigEndian) {
1684 if (!match(V: ScalarOp, P: m_Trunc(Op: m_Value(V&: X))) ||
1685 !match(V: Scalar0, P: m_Trunc(Op: m_LShr(L: m_Specific(V: X), R: m_ConstantInt(V&: ShAmt)))))
1686 return nullptr;
1687 } else {
1688 if (!match(V: Scalar0, P: m_Trunc(Op: m_Value(V&: X))) ||
1689 !match(V: ScalarOp, P: m_Trunc(Op: m_LShr(L: m_Specific(V: X), R: m_ConstantInt(V&: ShAmt)))))
1690 return nullptr;
1691 }
1692
1693 Type *SrcTy = X->getType();
1694 unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
1695 unsigned VecEltWidth = VTy->getScalarSizeInBits();
1696 if (ScalarWidth != VecEltWidth * 2 || ShAmt != VecEltWidth)
1697 return nullptr;
1698
1699 // Bitcast the base vector to a vector type with the source element type.
1700 Type *CastTy = FixedVectorType::get(ElementType: SrcTy, NumElts: VTy->getNumElements() / 2);
1701 Value *CastBaseVec = Builder.CreateBitCast(V: BaseVec, DestTy: CastTy);
1702
1703 // Scale the insert index for a vector with half as many elements.
1704 // bitcast (inselt (bitcast BaseVec), X, NewIndex)
1705 uint64_t NewIndex = IsBigEndian ? Index1 / 2 : Index0 / 2;
1706 Value *NewInsert = Builder.CreateInsertElement(Vec: CastBaseVec, NewElt: X, Idx: NewIndex);
1707 return new BitCastInst(NewInsert, VTy);
1708}
1709
1710Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
1711 Value *VecOp = IE.getOperand(i_nocapture: 0);
1712 Value *ScalarOp = IE.getOperand(i_nocapture: 1);
1713 Value *IdxOp = IE.getOperand(i_nocapture: 2);
1714
1715 if (auto *V = simplifyInsertElementInst(
1716 Vec: VecOp, Elt: ScalarOp, Idx: IdxOp, Q: SQ.getWithInstruction(I: &IE)))
1717 return replaceInstUsesWith(I&: IE, V);
1718
1719 // Canonicalize type of constant indices to i64 to simplify CSE
1720 if (auto *IndexC = dyn_cast<ConstantInt>(Val: IdxOp)) {
1721 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
1722 return replaceOperand(I&: IE, OpNum: 2, V: NewIdx);
1723
1724 Value *BaseVec, *OtherScalar;
1725 uint64_t OtherIndexVal;
1726 if (match(V: VecOp, P: m_OneUse(SubPattern: m_InsertElt(Val: m_Value(V&: BaseVec),
1727 Elt: m_Value(V&: OtherScalar),
1728 Idx: m_ConstantInt(V&: OtherIndexVal)))) &&
1729 !isa<Constant>(Val: OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {
1730 Value *NewIns = Builder.CreateInsertElement(Vec: BaseVec, NewElt: ScalarOp, Idx: IdxOp);
1731 return InsertElementInst::Create(Vec: NewIns, NewElt: OtherScalar,
1732 Idx: Builder.getInt64(C: OtherIndexVal));
1733 }
1734 }
1735
1736 // If the scalar is bitcast and inserted into undef, do the insert in the
1737 // source type followed by bitcast.
1738 // TODO: Generalize for insert into any constant, not just undef?
1739 Value *ScalarSrc;
1740 if (match(V: VecOp, P: m_Undef()) &&
1741 match(V: ScalarOp, P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: ScalarSrc)))) &&
1742 (ScalarSrc->getType()->isIntegerTy() ||
1743 ScalarSrc->getType()->isFloatingPointTy())) {
1744 // inselt undef, (bitcast ScalarSrc), IdxOp -->
1745 // bitcast (inselt undef, ScalarSrc, IdxOp)
1746 Type *ScalarTy = ScalarSrc->getType();
1747 Type *VecTy = VectorType::get(ElementType: ScalarTy, EC: IE.getType()->getElementCount());
1748 Constant *NewUndef = isa<PoisonValue>(Val: VecOp) ? PoisonValue::get(T: VecTy)
1749 : UndefValue::get(T: VecTy);
1750 Value *NewInsElt = Builder.CreateInsertElement(Vec: NewUndef, NewElt: ScalarSrc, Idx: IdxOp);
1751 return new BitCastInst(NewInsElt, IE.getType());
1752 }
1753
1754 // If the vector and scalar are both bitcast from the same element type, do
1755 // the insert in that source type followed by bitcast.
1756 Value *VecSrc;
1757 if (match(V: VecOp, P: m_BitCast(Op: m_Value(V&: VecSrc))) &&
1758 match(V: ScalarOp, P: m_BitCast(Op: m_Value(V&: ScalarSrc))) &&
1759 (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1760 VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1761 cast<VectorType>(Val: VecSrc->getType())->getElementType() ==
1762 ScalarSrc->getType()) {
1763 // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1764 // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1765 Value *NewInsElt = Builder.CreateInsertElement(Vec: VecSrc, NewElt: ScalarSrc, Idx: IdxOp);
1766 return new BitCastInst(NewInsElt, IE.getType());
1767 }
1768
1769 // If the inserted element was extracted from some other fixed-length vector
1770 // and both indexes are valid constants, try to turn this into a shuffle.
1771 // Can not handle scalable vector type, the number of elements needed to
1772 // create shuffle mask is not a compile-time constant.
1773 uint64_t InsertedIdx, ExtractedIdx;
1774 Value *ExtVecOp;
1775 if (isa<FixedVectorType>(Val: IE.getType()) &&
1776 match(V: IdxOp, P: m_ConstantInt(V&: InsertedIdx)) &&
1777 match(V: ScalarOp,
1778 P: m_ExtractElt(Val: m_Value(V&: ExtVecOp), Idx: m_ConstantInt(V&: ExtractedIdx))) &&
1779 isa<FixedVectorType>(Val: ExtVecOp->getType()) &&
1780 ExtractedIdx <
1781 cast<FixedVectorType>(Val: ExtVecOp->getType())->getNumElements()) {
1782 // TODO: Looking at the user(s) to determine if this insert is a
1783 // fold-to-shuffle opportunity does not match the usual instcombine
1784 // constraints. We should decide if the transform is worthy based only
1785 // on this instruction and its operands, but that may not work currently.
1786 //
1787 // Here, we are trying to avoid creating shuffles before reaching
1788 // the end of a chain of extract-insert pairs. This is complicated because
1789 // we do not generally form arbitrary shuffle masks in instcombine
1790 // (because those may codegen poorly), but collectShuffleElements() does
1791 // exactly that.
1792 //
1793 // The rules for determining what is an acceptable target-independent
1794 // shuffle mask are fuzzy because they evolve based on the backend's
1795 // capabilities and real-world impact.
1796 auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1797 if (!Insert.hasOneUse())
1798 return true;
1799 auto *InsertUser = dyn_cast<InsertElementInst>(Val: Insert.user_back());
1800 if (!InsertUser)
1801 return true;
1802 return false;
1803 };
1804
1805 // Try to form a shuffle from a chain of extract-insert ops.
1806 if (isShuffleRootCandidate(IE)) {
1807 bool Rerun = true;
1808 while (Rerun) {
1809 Rerun = false;
1810
1811 SmallVector<int, 16> Mask;
1812 ShuffleOps LR =
1813 collectShuffleElements(V: &IE, Mask, PermittedRHS: nullptr, IC&: *this, Rerun);
1814
1815 // The proposed shuffle may be trivial, in which case we shouldn't
1816 // perform the combine.
1817 if (LR.first != &IE && LR.second != &IE) {
1818 // We now have a shuffle of LHS, RHS, Mask.
1819 if (LR.second == nullptr)
1820 LR.second = PoisonValue::get(T: LR.first->getType());
1821 return new ShuffleVectorInst(LR.first, LR.second, Mask);
1822 }
1823 }
1824 }
1825 }
1826
1827 if (auto VecTy = dyn_cast<FixedVectorType>(Val: VecOp->getType())) {
1828 unsigned VWidth = VecTy->getNumElements();
1829 APInt PoisonElts(VWidth, 0);
1830 APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
1831 if (Value *V = SimplifyDemandedVectorElts(V: &IE, DemandedElts: AllOnesEltMask,
1832 PoisonElts)) {
1833 if (V != &IE)
1834 return replaceInstUsesWith(I&: IE, V);
1835 return &IE;
1836 }
1837 }
1838
1839 if (Instruction *Shuf = foldConstantInsEltIntoShuffle(InsElt&: IE))
1840 return Shuf;
1841
1842 if (Instruction *NewInsElt = hoistInsEltConst(InsElt2&: IE, Builder))
1843 return NewInsElt;
1844
1845 if (Instruction *Broadcast = foldInsSequenceIntoSplat(InsElt&: IE))
1846 return Broadcast;
1847
1848 if (Instruction *Splat = foldInsEltIntoSplat(InsElt&: IE))
1849 return Splat;
1850
1851 if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(InsElt&: IE))
1852 return IdentityShuf;
1853
1854 if (Instruction *Ext = narrowInsElt(InsElt&: IE, Builder))
1855 return Ext;
1856
1857 if (Instruction *Ext = foldTruncInsEltPair(InsElt&: IE, IsBigEndian: DL.isBigEndian(), Builder))
1858 return Ext;
1859
1860 return nullptr;
1861}
1862
1863/// Return true if we can evaluate the specified expression tree if the vector
1864/// elements were shuffled in a different order.
1865static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask,
1866 unsigned Depth = 5) {
1867 // We can always reorder the elements of a constant.
1868 if (isa<Constant>(Val: V))
1869 return true;
1870
1871 // We won't reorder vector arguments. No IPO here.
1872 Instruction *I = dyn_cast<Instruction>(Val: V);
1873 if (!I) return false;
1874
1875 // Two users may expect different orders of the elements. Don't try it.
1876 if (!I->hasOneUse())
1877 return false;
1878
1879 if (Depth == 0) return false;
1880
1881 switch (I->getOpcode()) {
1882 case Instruction::UDiv:
1883 case Instruction::SDiv:
1884 case Instruction::URem:
1885 case Instruction::SRem:
1886 // Propagating an undefined shuffle mask element to integer div/rem is not
1887 // allowed because those opcodes can create immediate undefined behavior
1888 // from an undefined element in an operand.
1889 if (llvm::is_contained(Range&: Mask, Element: -1))
1890 return false;
1891 [[fallthrough]];
1892 case Instruction::Add:
1893 case Instruction::FAdd:
1894 case Instruction::Sub:
1895 case Instruction::FSub:
1896 case Instruction::Mul:
1897 case Instruction::FMul:
1898 case Instruction::FDiv:
1899 case Instruction::FRem:
1900 case Instruction::Shl:
1901 case Instruction::LShr:
1902 case Instruction::AShr:
1903 case Instruction::And:
1904 case Instruction::Or:
1905 case Instruction::Xor:
1906 case Instruction::ICmp:
1907 case Instruction::FCmp:
1908 case Instruction::Trunc:
1909 case Instruction::ZExt:
1910 case Instruction::SExt:
1911 case Instruction::FPToUI:
1912 case Instruction::FPToSI:
1913 case Instruction::UIToFP:
1914 case Instruction::SIToFP:
1915 case Instruction::FPTrunc:
1916 case Instruction::FPExt:
1917 case Instruction::GetElementPtr: {
1918 // Bail out if we would create longer vector ops. We could allow creating
1919 // longer vector ops, but that may result in more expensive codegen.
1920 Type *ITy = I->getType();
1921 if (ITy->isVectorTy() &&
1922 Mask.size() > cast<FixedVectorType>(Val: ITy)->getNumElements())
1923 return false;
1924 for (Value *Operand : I->operands()) {
1925 if (!canEvaluateShuffled(V: Operand, Mask, Depth: Depth - 1))
1926 return false;
1927 }
1928 return true;
1929 }
1930 case Instruction::InsertElement: {
1931 ConstantInt *CI = dyn_cast<ConstantInt>(Val: I->getOperand(i: 2));
1932 if (!CI) return false;
1933 int ElementNumber = CI->getLimitedValue();
1934
1935 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1936 // can't put an element into multiple indices.
1937 bool SeenOnce = false;
1938 for (int I : Mask) {
1939 if (I == ElementNumber) {
1940 if (SeenOnce)
1941 return false;
1942 SeenOnce = true;
1943 }
1944 }
1945 return canEvaluateShuffled(V: I->getOperand(i: 0), Mask, Depth: Depth - 1);
1946 }
1947 }
1948 return false;
1949}
1950
1951/// Rebuild a new instruction just like 'I' but with the new operands given.
1952/// In the event of type mismatch, the type of the operands is correct.
1953static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps,
1954 IRBuilderBase &Builder) {
1955 Builder.SetInsertPoint(I);
1956 switch (I->getOpcode()) {
1957 case Instruction::Add:
1958 case Instruction::FAdd:
1959 case Instruction::Sub:
1960 case Instruction::FSub:
1961 case Instruction::Mul:
1962 case Instruction::FMul:
1963 case Instruction::UDiv:
1964 case Instruction::SDiv:
1965 case Instruction::FDiv:
1966 case Instruction::URem:
1967 case Instruction::SRem:
1968 case Instruction::FRem:
1969 case Instruction::Shl:
1970 case Instruction::LShr:
1971 case Instruction::AShr:
1972 case Instruction::And:
1973 case Instruction::Or:
1974 case Instruction::Xor: {
1975 BinaryOperator *BO = cast<BinaryOperator>(Val: I);
1976 assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1977 Value *New = Builder.CreateBinOp(Opc: cast<BinaryOperator>(Val: I)->getOpcode(),
1978 LHS: NewOps[0], RHS: NewOps[1]);
1979 if (auto *NewI = dyn_cast<Instruction>(Val: New)) {
1980 if (isa<OverflowingBinaryOperator>(Val: BO)) {
1981 NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1982 NewI->setHasNoSignedWrap(BO->hasNoSignedWrap());
1983 }
1984 if (isa<PossiblyExactOperator>(Val: BO)) {
1985 NewI->setIsExact(BO->isExact());
1986 }
1987 if (isa<FPMathOperator>(Val: BO))
1988 NewI->copyFastMathFlags(I);
1989 }
1990 return New;
1991 }
1992 case Instruction::ICmp:
1993 assert(NewOps.size() == 2 && "icmp with #ops != 2");
1994 return Builder.CreateICmp(P: cast<ICmpInst>(Val: I)->getPredicate(), LHS: NewOps[0],
1995 RHS: NewOps[1]);
1996 case Instruction::FCmp:
1997 assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1998 return Builder.CreateFCmp(P: cast<FCmpInst>(Val: I)->getPredicate(), LHS: NewOps[0],
1999 RHS: NewOps[1]);
2000 case Instruction::Trunc:
2001 case Instruction::ZExt:
2002 case Instruction::SExt:
2003 case Instruction::FPToUI:
2004 case Instruction::FPToSI:
2005 case Instruction::UIToFP:
2006 case Instruction::SIToFP:
2007 case Instruction::FPTrunc:
2008 case Instruction::FPExt: {
2009 // It's possible that the mask has a different number of elements from
2010 // the original cast. We recompute the destination type to match the mask.
2011 Type *DestTy = VectorType::get(
2012 ElementType: I->getType()->getScalarType(),
2013 EC: cast<VectorType>(Val: NewOps[0]->getType())->getElementCount());
2014 assert(NewOps.size() == 1 && "cast with #ops != 1");
2015 return Builder.CreateCast(Op: cast<CastInst>(Val: I)->getOpcode(), V: NewOps[0],
2016 DestTy);
2017 }
2018 case Instruction::GetElementPtr: {
2019 Value *Ptr = NewOps[0];
2020 ArrayRef<Value*> Idx = NewOps.slice(N: 1);
2021 return Builder.CreateGEP(Ty: cast<GEPOperator>(Val: I)->getSourceElementType(),
2022 Ptr, IdxList: Idx, Name: "",
2023 NW: cast<GEPOperator>(Val: I)->getNoWrapFlags());
2024 }
2025 }
2026 llvm_unreachable("failed to rebuild vector instructions");
2027}
2028
2029static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask,
2030 IRBuilderBase &Builder) {
2031 // Mask.size() does not need to be equal to the number of vector elements.
2032
2033 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
2034 Type *EltTy = V->getType()->getScalarType();
2035
2036 if (isa<PoisonValue>(Val: V))
2037 return PoisonValue::get(T: FixedVectorType::get(ElementType: EltTy, NumElts: Mask.size()));
2038
2039 if (match(V, P: m_Undef()))
2040 return UndefValue::get(T: FixedVectorType::get(ElementType: EltTy, NumElts: Mask.size()));
2041
2042 if (isa<ConstantAggregateZero>(Val: V))
2043 return ConstantAggregateZero::get(Ty: FixedVectorType::get(ElementType: EltTy, NumElts: Mask.size()));
2044
2045 if (Constant *C = dyn_cast<Constant>(Val: V))
2046 return ConstantExpr::getShuffleVector(V1: C, V2: PoisonValue::get(T: C->getType()),
2047 Mask);
2048
2049 Instruction *I = cast<Instruction>(Val: V);
2050 switch (I->getOpcode()) {
2051 case Instruction::Add:
2052 case Instruction::FAdd:
2053 case Instruction::Sub:
2054 case Instruction::FSub:
2055 case Instruction::Mul:
2056 case Instruction::FMul:
2057 case Instruction::UDiv:
2058 case Instruction::SDiv:
2059 case Instruction::FDiv:
2060 case Instruction::URem:
2061 case Instruction::SRem:
2062 case Instruction::FRem:
2063 case Instruction::Shl:
2064 case Instruction::LShr:
2065 case Instruction::AShr:
2066 case Instruction::And:
2067 case Instruction::Or:
2068 case Instruction::Xor:
2069 case Instruction::ICmp:
2070 case Instruction::FCmp:
2071 case Instruction::Trunc:
2072 case Instruction::ZExt:
2073 case Instruction::SExt:
2074 case Instruction::FPToUI:
2075 case Instruction::FPToSI:
2076 case Instruction::UIToFP:
2077 case Instruction::SIToFP:
2078 case Instruction::FPTrunc:
2079 case Instruction::FPExt:
2080 case Instruction::Select:
2081 case Instruction::GetElementPtr: {
2082 SmallVector<Value*, 8> NewOps;
2083 bool NeedsRebuild =
2084 (Mask.size() !=
2085 cast<FixedVectorType>(Val: I->getType())->getNumElements());
2086 for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
2087 Value *V;
2088 // Recursively call evaluateInDifferentElementOrder on vector arguments
2089 // as well. E.g. GetElementPtr may have scalar operands even if the
2090 // return value is a vector, so we need to examine the operand type.
2091 if (I->getOperand(i)->getType()->isVectorTy())
2092 V = evaluateInDifferentElementOrder(V: I->getOperand(i), Mask, Builder);
2093 else
2094 V = I->getOperand(i);
2095 NewOps.push_back(Elt: V);
2096 NeedsRebuild |= (V != I->getOperand(i));
2097 }
2098 if (NeedsRebuild)
2099 return buildNew(I, NewOps, Builder);
2100 return I;
2101 }
2102 case Instruction::InsertElement: {
2103 int Element = cast<ConstantInt>(Val: I->getOperand(i: 2))->getLimitedValue();
2104
2105 // The insertelement was inserting at Element. Figure out which element
2106 // that becomes after shuffling. The answer is guaranteed to be unique
2107 // by CanEvaluateShuffled.
2108 bool Found = false;
2109 int Index = 0;
2110 for (int e = Mask.size(); Index != e; ++Index) {
2111 if (Mask[Index] == Element) {
2112 Found = true;
2113 break;
2114 }
2115 }
2116
2117 // If element is not in Mask, no need to handle the operand 1 (element to
2118 // be inserted). Just evaluate values in operand 0 according to Mask.
2119 if (!Found)
2120 return evaluateInDifferentElementOrder(V: I->getOperand(i: 0), Mask, Builder);
2121
2122 Value *V = evaluateInDifferentElementOrder(V: I->getOperand(i: 0), Mask,
2123 Builder);
2124 Builder.SetInsertPoint(I);
2125 return Builder.CreateInsertElement(Vec: V, NewElt: I->getOperand(i: 1), Idx: Index);
2126 }
2127 }
2128 llvm_unreachable("failed to reorder elements of vector instruction!");
2129}
2130
2131// Returns true if the shuffle is extracting a contiguous range of values from
2132// LHS, for example:
2133// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2134// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
2135// Shuffles to: |EE|FF|GG|HH|
2136// +--+--+--+--+
2137static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
2138 ArrayRef<int> Mask) {
2139 unsigned LHSElems =
2140 cast<FixedVectorType>(Val: SVI.getOperand(i_nocapture: 0)->getType())->getNumElements();
2141 unsigned MaskElems = Mask.size();
2142 unsigned BegIdx = Mask.front();
2143 unsigned EndIdx = Mask.back();
2144 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
2145 return false;
2146 for (unsigned I = 0; I != MaskElems; ++I)
2147 if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
2148 return false;
2149 return true;
2150}
2151
2152/// These are the ingredients in an alternate form binary operator as described
2153/// below.
2154struct BinopElts {
2155 BinaryOperator::BinaryOps Opcode;
2156 Value *Op0;
2157 Value *Op1;
2158 BinopElts(BinaryOperator::BinaryOps Opc = (BinaryOperator::BinaryOps)0,
2159 Value *V0 = nullptr, Value *V1 = nullptr) :
2160 Opcode(Opc), Op0(V0), Op1(V1) {}
2161 operator bool() const { return Opcode != 0; }
2162};
2163
2164/// Binops may be transformed into binops with different opcodes and operands.
2165/// Reverse the usual canonicalization to enable folds with the non-canonical
2166/// form of the binop. If a transform is possible, return the elements of the
2167/// new binop. If not, return invalid elements.
2168static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL) {
2169 Value *BO0 = BO->getOperand(i_nocapture: 0), *BO1 = BO->getOperand(i_nocapture: 1);
2170 Type *Ty = BO->getType();
2171 switch (BO->getOpcode()) {
2172 case Instruction::Shl: {
2173 // shl X, C --> mul X, (1 << C)
2174 Constant *C;
2175 if (match(V: BO1, P: m_ImmConstant(C))) {
2176 Constant *ShlOne = ConstantFoldBinaryOpOperands(
2177 Opcode: Instruction::Shl, LHS: ConstantInt::get(Ty, V: 1), RHS: C, DL);
2178 assert(ShlOne && "Constant folding of immediate constants failed");
2179 return {Instruction::Mul, BO0, ShlOne};
2180 }
2181 break;
2182 }
2183 case Instruction::Or: {
2184 // or disjoin X, C --> add X, C
2185 if (cast<PossiblyDisjointInst>(Val: BO)->isDisjoint())
2186 return {Instruction::Add, BO0, BO1};
2187 break;
2188 }
2189 case Instruction::Sub:
2190 // sub 0, X --> mul X, -1
2191 if (match(V: BO0, P: m_ZeroInt()))
2192 return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)};
2193 break;
2194 default:
2195 break;
2196 }
2197 return {};
2198}
2199
2200/// A select shuffle of a select shuffle with a shared operand can be reduced
2201/// to a single select shuffle. This is an obvious improvement in IR, and the
2202/// backend is expected to lower select shuffles efficiently.
2203static Instruction *foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf) {
2204 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2205
2206 Value *Op0 = Shuf.getOperand(i_nocapture: 0), *Op1 = Shuf.getOperand(i_nocapture: 1);
2207 SmallVector<int, 16> Mask;
2208 Shuf.getShuffleMask(Result&: Mask);
2209 unsigned NumElts = Mask.size();
2210
2211 // Canonicalize a select shuffle with common operand as Op1.
2212 auto *ShufOp = dyn_cast<ShuffleVectorInst>(Val: Op0);
2213 if (ShufOp && ShufOp->isSelect() &&
2214 (ShufOp->getOperand(i_nocapture: 0) == Op1 || ShufOp->getOperand(i_nocapture: 1) == Op1)) {
2215 std::swap(a&: Op0, b&: Op1);
2216 ShuffleVectorInst::commuteShuffleMask(Mask, InVecNumElts: NumElts);
2217 }
2218
2219 ShufOp = dyn_cast<ShuffleVectorInst>(Val: Op1);
2220 if (!ShufOp || !ShufOp->isSelect() ||
2221 (ShufOp->getOperand(i_nocapture: 0) != Op0 && ShufOp->getOperand(i_nocapture: 1) != Op0))
2222 return nullptr;
2223
2224 Value *X = ShufOp->getOperand(i_nocapture: 0), *Y = ShufOp->getOperand(i_nocapture: 1);
2225 SmallVector<int, 16> Mask1;
2226 ShufOp->getShuffleMask(Result&: Mask1);
2227 assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
2228
2229 // Canonicalize common operand (Op0) as X (first operand of first shuffle).
2230 if (Y == Op0) {
2231 std::swap(a&: X, b&: Y);
2232 ShuffleVectorInst::commuteShuffleMask(Mask: Mask1, InVecNumElts: NumElts);
2233 }
2234
2235 // If the mask chooses from X (operand 0), it stays the same.
2236 // If the mask chooses from the earlier shuffle, the other mask value is
2237 // transferred to the combined select shuffle:
2238 // shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2239 SmallVector<int, 16> NewMask(NumElts);
2240 for (unsigned i = 0; i != NumElts; ++i)
2241 NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];
2242
2243 // A select mask with undef elements might look like an identity mask.
2244 assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) ||
2245 ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) &&
2246 "Unexpected shuffle mask");
2247 return new ShuffleVectorInst(X, Y, NewMask);
2248}
2249
2250static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf,
2251 const SimplifyQuery &SQ) {
2252 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2253
2254 // Are we shuffling together some value and that same value after it has been
2255 // modified by a binop with a constant?
2256 Value *Op0 = Shuf.getOperand(i_nocapture: 0), *Op1 = Shuf.getOperand(i_nocapture: 1);
2257 Constant *C;
2258 bool Op0IsBinop;
2259 if (match(V: Op0, P: m_BinOp(L: m_Specific(V: Op1), R: m_Constant(C))))
2260 Op0IsBinop = true;
2261 else if (match(V: Op1, P: m_BinOp(L: m_Specific(V: Op0), R: m_Constant(C))))
2262 Op0IsBinop = false;
2263 else
2264 return nullptr;
2265
2266 // The identity constant for a binop leaves a variable operand unchanged. For
2267 // a vector, this is a splat of something like 0, -1, or 1.
2268 // If there's no identity constant for this binop, we're done.
2269 auto *BO = cast<BinaryOperator>(Val: Op0IsBinop ? Op0 : Op1);
2270 BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
2271 Constant *IdC = ConstantExpr::getBinOpIdentity(Opcode: BOpcode, Ty: Shuf.getType(), AllowRHSConstant: true);
2272 if (!IdC)
2273 return nullptr;
2274
2275 Value *X = Op0IsBinop ? Op1 : Op0;
2276
2277 // Prevent folding in the case the non-binop operand might have NaN values.
2278 // If X can have NaN elements then we have that the floating point math
2279 // operation in the transformed code may not preserve the exact NaN
2280 // bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`.
2281 // This makes the transformation incorrect since the original program would
2282 // have preserved the exact NaN bit-pattern.
2283 // Avoid the folding if X can have NaN elements.
2284 if (Shuf.getType()->getElementType()->isFloatingPointTy() &&
2285 !isKnownNeverNaN(V: X, SQ))
2286 return nullptr;
2287
2288 // Shuffle identity constants into the lanes that return the original value.
2289 // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
2290 // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
2291 // The existing binop constant vector remains in the same operand position.
2292 ArrayRef<int> Mask = Shuf.getShuffleMask();
2293 Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(V1: C, V2: IdC, Mask) :
2294 ConstantExpr::getShuffleVector(V1: IdC, V2: C, Mask);
2295
2296 bool MightCreatePoisonOrUB =
2297 is_contained(Range&: Mask, Element: PoisonMaskElem) &&
2298 (Instruction::isIntDivRem(Opcode: BOpcode) || Instruction::isShift(Opcode: BOpcode));
2299 if (MightCreatePoisonOrUB)
2300 NewC = InstCombiner::getSafeVectorConstantForBinop(Opcode: BOpcode, In: NewC, IsRHSConstant: true);
2301
2302 // shuf (bop X, C), X, M --> bop X, C'
2303 // shuf X, (bop X, C), M --> bop X, C'
2304 Instruction *NewBO = BinaryOperator::Create(Op: BOpcode, S1: X, S2: NewC);
2305 NewBO->copyIRFlags(V: BO);
2306
2307 // An undef shuffle mask element may propagate as an undef constant element in
2308 // the new binop. That would produce poison where the original code might not.
2309 // If we already made a safe constant, then there's no danger.
2310 if (is_contained(Range&: Mask, Element: PoisonMaskElem) && !MightCreatePoisonOrUB)
2311 NewBO->dropPoisonGeneratingFlags();
2312 return NewBO;
2313}
2314
2315/// If we have an insert of a scalar to a non-zero element of an undefined
2316/// vector and then shuffle that value, that's the same as inserting to the zero
2317/// element and shuffling. Splatting from the zero element is recognized as the
2318/// canonical form of splat.
2319static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
2320 InstCombiner::BuilderTy &Builder) {
2321 Value *Op0 = Shuf.getOperand(i_nocapture: 0), *Op1 = Shuf.getOperand(i_nocapture: 1);
2322 ArrayRef<int> Mask = Shuf.getShuffleMask();
2323 Value *X;
2324 uint64_t IndexC;
2325
2326 // Match a shuffle that is a splat to a non-zero element.
2327 if (!match(V: Op0, P: m_OneUse(SubPattern: m_InsertElt(Val: m_Poison(), Elt: m_Value(V&: X),
2328 Idx: m_ConstantInt(V&: IndexC)))) ||
2329 !match(V: Op1, P: m_Poison()) || match(Mask, P: m_ZeroMask()) || IndexC == 0)
2330 return nullptr;
2331
2332 // Insert into element 0 of a poison vector.
2333 PoisonValue *PoisonVec = PoisonValue::get(T: Shuf.getType());
2334 Value *NewIns = Builder.CreateInsertElement(Vec: PoisonVec, NewElt: X, Idx: (uint64_t)0);
2335
2336 // Splat from element 0. Any mask element that is poison remains poison.
2337 // For example:
2338 // shuf (inselt poison, X, 2), _, <2,2,undef>
2339 // --> shuf (inselt poison, X, 0), poison, <0,0,undef>
2340 unsigned NumMaskElts =
2341 cast<FixedVectorType>(Val: Shuf.getType())->getNumElements();
2342 SmallVector<int, 16> NewMask(NumMaskElts, 0);
2343 for (unsigned i = 0; i != NumMaskElts; ++i)
2344 if (Mask[i] == PoisonMaskElem)
2345 NewMask[i] = Mask[i];
2346
2347 return new ShuffleVectorInst(NewIns, NewMask);
2348}
2349
2350/// Try to fold shuffles that are the equivalent of a vector select.
2351Instruction *InstCombinerImpl::foldSelectShuffle(ShuffleVectorInst &Shuf) {
2352 if (!Shuf.isSelect())
2353 return nullptr;
2354
2355 // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2356 // Commuting undef to operand 0 conflicts with another canonicalization.
2357 unsigned NumElts = cast<FixedVectorType>(Val: Shuf.getType())->getNumElements();
2358 if (!match(V: Shuf.getOperand(i_nocapture: 1), P: m_Undef()) &&
2359 Shuf.getMaskValue(Elt: 0) >= (int)NumElts) {
2360 // TODO: Can we assert that both operands of a shuffle-select are not undef
2361 // (otherwise, it would have been folded by instsimplify?
2362 Shuf.commute();
2363 return &Shuf;
2364 }
2365
2366 if (Instruction *I = foldSelectShuffleOfSelectShuffle(Shuf))
2367 return I;
2368
2369 if (Instruction *I = foldSelectShuffleWith1Binop(
2370 Shuf, SQ: getSimplifyQuery().getWithInstruction(I: &Shuf)))
2371 return I;
2372
2373 BinaryOperator *B0, *B1;
2374 if (!match(V: Shuf.getOperand(i_nocapture: 0), P: m_BinOp(I&: B0)) ||
2375 !match(V: Shuf.getOperand(i_nocapture: 1), P: m_BinOp(I&: B1)))
2376 return nullptr;
2377
2378 // If one operand is "0 - X", allow that to be viewed as "X * -1"
2379 // (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
2380 // with a multiply, we will exit because C0/C1 will not be set.
2381 Value *X, *Y;
2382 Constant *C0 = nullptr, *C1 = nullptr;
2383 bool ConstantsAreOp1;
2384 if (match(V: B0, P: m_BinOp(L: m_Constant(C&: C0), R: m_Value(V&: X))) &&
2385 match(V: B1, P: m_BinOp(L: m_Constant(C&: C1), R: m_Value(V&: Y))))
2386 ConstantsAreOp1 = false;
2387 else if (match(V: B0, P: m_CombineOr(L: m_BinOp(L: m_Value(V&: X), R: m_Constant(C&: C0)),
2388 R: m_Neg(V: m_Value(V&: X)))) &&
2389 match(V: B1, P: m_CombineOr(L: m_BinOp(L: m_Value(V&: Y), R: m_Constant(C&: C1)),
2390 R: m_Neg(V: m_Value(V&: Y)))))
2391 ConstantsAreOp1 = true;
2392 else
2393 return nullptr;
2394
2395 // We need matching binops to fold the lanes together.
2396 BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
2397 BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
2398 bool DropNSW = false;
2399 if (ConstantsAreOp1 && Opc0 != Opc1) {
2400 // TODO: We drop "nsw" if shift is converted into multiply because it may
2401 // not be correct when the shift amount is BitWidth - 1. We could examine
2402 // each vector element to determine if it is safe to keep that flag.
2403 if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
2404 DropNSW = true;
2405 if (BinopElts AltB0 = getAlternateBinop(BO: B0, DL)) {
2406 assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
2407 Opc0 = AltB0.Opcode;
2408 C0 = cast<Constant>(Val: AltB0.Op1);
2409 } else if (BinopElts AltB1 = getAlternateBinop(BO: B1, DL)) {
2410 assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2411 Opc1 = AltB1.Opcode;
2412 C1 = cast<Constant>(Val: AltB1.Op1);
2413 }
2414 }
2415
2416 if (Opc0 != Opc1 || !C0 || !C1)
2417 return nullptr;
2418
2419 // The opcodes must be the same. Use a new name to make that clear.
2420 BinaryOperator::BinaryOps BOpc = Opc0;
2421
2422 // Select the constant elements needed for the single binop.
2423 ArrayRef<int> Mask = Shuf.getShuffleMask();
2424 Constant *NewC = ConstantExpr::getShuffleVector(V1: C0, V2: C1, Mask);
2425
2426 // We are moving a binop after a shuffle. When a shuffle has an undefined
2427 // mask element, the result is undefined, but it is not poison or undefined
2428 // behavior. That is not necessarily true for div/rem/shift.
2429 bool MightCreatePoisonOrUB =
2430 is_contained(Range&: Mask, Element: PoisonMaskElem) &&
2431 (Instruction::isIntDivRem(Opcode: BOpc) || Instruction::isShift(Opcode: BOpc));
2432 if (MightCreatePoisonOrUB)
2433 NewC = InstCombiner::getSafeVectorConstantForBinop(Opcode: BOpc, In: NewC,
2434 IsRHSConstant: ConstantsAreOp1);
2435
2436 Value *V;
2437 if (X == Y) {
2438 // Remove a binop and the shuffle by rearranging the constant:
2439 // shuffle (op V, C0), (op V, C1), M --> op V, C'
2440 // shuffle (op C0, V), (op C1, V), M --> op C', V
2441 V = X;
2442 } else {
2443 // If there are 2 different variable operands, we must create a new shuffle
2444 // (select) first, so check uses to ensure that we don't end up with more
2445 // instructions than we started with.
2446 if (!B0->hasOneUse() && !B1->hasOneUse())
2447 return nullptr;
2448
2449 // If we use the original shuffle mask and op1 is *variable*, we would be
2450 // putting an undef into operand 1 of div/rem/shift. This is either UB or
2451 // poison. We do not have to guard against UB when *constants* are op1
2452 // because safe constants guarantee that we do not overflow sdiv/srem (and
2453 // there's no danger for other opcodes).
2454 // TODO: To allow this case, create a new shuffle mask with no undefs.
2455 if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2456 return nullptr;
2457
2458 // Note: In general, we do not create new shuffles in InstCombine because we
2459 // do not know if a target can lower an arbitrary shuffle optimally. In this
2460 // case, the shuffle uses the existing mask, so there is no additional risk.
2461
2462 // Select the variable vectors first, then perform the binop:
2463 // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2464 // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2465 V = Builder.CreateShuffleVector(V1: X, V2: Y, Mask);
2466 }
2467
2468 Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(Opc: BOpc, LHS: V, RHS: NewC) :
2469 Builder.CreateBinOp(Opc: BOpc, LHS: NewC, RHS: V);
2470
2471 // Flags are intersected from the 2 source binops. But there are 2 exceptions:
2472 // 1. If we changed an opcode, poison conditions might have changed.
2473 // 2. If the shuffle had undef mask elements, the new binop might have undefs
2474 // where the original code did not. But if we already made a safe constant,
2475 // then there's no danger.
2476 if (auto *NewI = dyn_cast<Instruction>(Val: NewBO)) {
2477 NewI->copyIRFlags(V: B0);
2478 NewI->andIRFlags(V: B1);
2479 if (DropNSW)
2480 NewI->setHasNoSignedWrap(false);
2481 if (is_contained(Range&: Mask, Element: PoisonMaskElem) && !MightCreatePoisonOrUB)
2482 NewI->dropPoisonGeneratingFlags();
2483 }
2484 return replaceInstUsesWith(I&: Shuf, V: NewBO);
2485}
2486
2487/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2488/// Example (little endian):
2489/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2490static Instruction *foldTruncShuffle(ShuffleVectorInst &Shuf,
2491 bool IsBigEndian) {
2492 // This must be a bitcasted shuffle of 1 vector integer operand.
2493 Type *DestType = Shuf.getType();
2494 Value *X;
2495 if (!match(V: Shuf.getOperand(i_nocapture: 0), P: m_BitCast(Op: m_Value(V&: X))) ||
2496 !match(V: Shuf.getOperand(i_nocapture: 1), P: m_Poison()) || !DestType->isIntOrIntVectorTy())
2497 return nullptr;
2498
2499 // The source type must have the same number of elements as the shuffle,
2500 // and the source element type must be larger than the shuffle element type.
2501 Type *SrcType = X->getType();
2502 if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2503 cast<FixedVectorType>(Val: SrcType)->getNumElements() !=
2504 cast<FixedVectorType>(Val: DestType)->getNumElements() ||
2505 SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
2506 return nullptr;
2507
2508 assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2509 "Expected a shuffle that decreases length");
2510
2511 // Last, check that the mask chooses the correct low bits for each narrow
2512 // element in the result.
2513 uint64_t TruncRatio =
2514 SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2515 ArrayRef<int> Mask = Shuf.getShuffleMask();
2516 for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2517 if (Mask[i] == PoisonMaskElem)
2518 continue;
2519 uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2520 assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2521 if (Mask[i] != (int)LSBIndex)
2522 return nullptr;
2523 }
2524
2525 return new TruncInst(X, DestType);
2526}
2527
2528/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2529/// narrowing (concatenating with poison and extracting back to the original
2530/// length). This allows replacing the wide select with a narrow select.
2531static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
2532 InstCombiner::BuilderTy &Builder) {
2533 // This must be a narrowing identity shuffle. It extracts the 1st N elements
2534 // of the 1st vector operand of a shuffle.
2535 if (!match(V: Shuf.getOperand(i_nocapture: 1), P: m_Poison()) || !Shuf.isIdentityWithExtract())
2536 return nullptr;
2537
2538 // The vector being shuffled must be a vector select that we can eliminate.
2539 // TODO: The one-use requirement could be eased if X and/or Y are constants.
2540 Value *Cond, *X, *Y;
2541 if (!match(V: Shuf.getOperand(i_nocapture: 0),
2542 P: m_OneUse(SubPattern: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: X), R: m_Value(V&: Y)))))
2543 return nullptr;
2544
2545 // We need a narrow condition value. It must be extended with poison elements
2546 // and have the same number of elements as this shuffle.
2547 unsigned NarrowNumElts =
2548 cast<FixedVectorType>(Val: Shuf.getType())->getNumElements();
2549 Value *NarrowCond;
2550 if (!match(V: Cond, P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: NarrowCond), v2: m_Poison()))) ||
2551 cast<FixedVectorType>(Val: NarrowCond->getType())->getNumElements() !=
2552 NarrowNumElts ||
2553 !cast<ShuffleVectorInst>(Val: Cond)->isIdentityWithPadding())
2554 return nullptr;
2555
2556 // shuf (sel (shuf NarrowCond, poison, WideMask), X, Y), poison, NarrowMask)
2557 // -->
2558 // sel NarrowCond, (shuf X, poison, NarrowMask), (shuf Y, poison, NarrowMask)
2559 Value *NarrowX = Builder.CreateShuffleVector(V: X, Mask: Shuf.getShuffleMask());
2560 Value *NarrowY = Builder.CreateShuffleVector(V: Y, Mask: Shuf.getShuffleMask());
2561 return SelectInst::Create(C: NarrowCond, S1: NarrowX, S2: NarrowY);
2562}
2563
2564/// Canonicalize FP negate/abs after shuffle.
2565static Instruction *foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf,
2566 InstCombiner::BuilderTy &Builder) {
2567 auto *S0 = dyn_cast<Instruction>(Val: Shuf.getOperand(i_nocapture: 0));
2568 Value *X;
2569 if (!S0 || !match(V: S0, P: m_CombineOr(L: m_FNeg(X: m_Value(V&: X)), R: m_FAbs(Op0: m_Value(V&: X)))))
2570 return nullptr;
2571
2572 bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
2573
2574 // Match 2-input (binary) shuffle.
2575 auto *S1 = dyn_cast<Instruction>(Val: Shuf.getOperand(i_nocapture: 1));
2576 Value *Y;
2577 if (!S1 || !match(V: S1, P: m_CombineOr(L: m_FNeg(X: m_Value(V&: Y)), R: m_FAbs(Op0: m_Value(V&: Y)))) ||
2578 S0->getOpcode() != S1->getOpcode() ||
2579 (!S0->hasOneUse() && !S1->hasOneUse()))
2580 return nullptr;
2581
2582 // shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
2583 Value *NewShuf = Builder.CreateShuffleVector(V1: X, V2: Y, Mask: Shuf.getShuffleMask());
2584 Instruction *NewF;
2585 if (IsFNeg) {
2586 NewF = UnaryOperator::CreateFNeg(V: NewShuf);
2587 } else {
2588 Function *FAbs = Intrinsic::getOrInsertDeclaration(
2589 M: Shuf.getModule(), id: Intrinsic::fabs, Tys: Shuf.getType());
2590 NewF = CallInst::Create(Func: FAbs, Args: {NewShuf});
2591 }
2592 NewF->copyIRFlags(V: S0);
2593 NewF->andIRFlags(V: S1);
2594 return NewF;
2595}
2596
2597/// Canonicalize casts after shuffle.
2598static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf,
2599 InstCombiner::BuilderTy &Builder) {
2600 auto *Cast0 = dyn_cast<CastInst>(Val: Shuf.getOperand(i_nocapture: 0));
2601 if (!Cast0)
2602 return nullptr;
2603
2604 // TODO: Allow other opcodes? That would require easing the type restrictions
2605 // below here.
2606 CastInst::CastOps CastOpcode = Cast0->getOpcode();
2607 switch (CastOpcode) {
2608 case Instruction::SExt:
2609 case Instruction::ZExt:
2610 case Instruction::FPToSI:
2611 case Instruction::FPToUI:
2612 case Instruction::SIToFP:
2613 case Instruction::UIToFP:
2614 break;
2615 default:
2616 return nullptr;
2617 }
2618
2619 VectorType *CastSrcTy = cast<VectorType>(Val: Cast0->getSrcTy());
2620 VectorType *ShufTy = Shuf.getType();
2621 VectorType *ShufOpTy = cast<VectorType>(Val: Shuf.getOperand(i_nocapture: 0)->getType());
2622
2623 // TODO: Allow length-increasing shuffles?
2624 if (ShufTy->getElementCount().getKnownMinValue() >
2625 ShufOpTy->getElementCount().getKnownMinValue())
2626 return nullptr;
2627
2628 // shuffle (cast X), Poison, identity-with-extract-mask -->
2629 // cast (shuffle X, Poison, identity-with-extract-mask).
2630 if (isa<PoisonValue>(Val: Shuf.getOperand(i_nocapture: 1)) && Cast0->hasOneUse() &&
2631 Shuf.isIdentityWithExtract()) {
2632 auto *NewIns = Builder.CreateShuffleVector(V1: Cast0->getOperand(i_nocapture: 0),
2633 V2: PoisonValue::get(T: CastSrcTy),
2634 Mask: Shuf.getShuffleMask());
2635 return CastInst::Create(Cast0->getOpcode(), S: NewIns, Ty: Shuf.getType());
2636 }
2637
2638 auto *Cast1 = dyn_cast<CastInst>(Val: Shuf.getOperand(i_nocapture: 1));
2639 // Do we have 2 matching cast operands?
2640 if (!Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2641 Cast0->getSrcTy() != Cast1->getSrcTy())
2642 return nullptr;
2643
2644 // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
2645 assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2646 "Expected fixed vector operands for casts and binary shuffle");
2647 if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2648 return nullptr;
2649
2650 // At least one of the operands must have only one use (the shuffle).
2651 if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2652 return nullptr;
2653
2654 // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2655 Value *X = Cast0->getOperand(i_nocapture: 0);
2656 Value *Y = Cast1->getOperand(i_nocapture: 0);
2657 Value *NewShuf = Builder.CreateShuffleVector(V1: X, V2: Y, Mask: Shuf.getShuffleMask());
2658 return CastInst::Create(CastOpcode, S: NewShuf, Ty: ShufTy);
2659}
2660
2661/// Try to fold an extract subvector operation.
2662static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
2663 Value *Op0 = Shuf.getOperand(i_nocapture: 0), *Op1 = Shuf.getOperand(i_nocapture: 1);
2664 if (!Shuf.isIdentityWithExtract() || !match(V: Op1, P: m_Poison()))
2665 return nullptr;
2666
2667 // Check if we are extracting all bits of an inserted scalar:
2668 // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2669 Value *X;
2670 if (match(V: Op0, P: m_BitCast(Op: m_InsertElt(Val: m_Value(), Elt: m_Value(V&: X), Idx: m_Zero()))) &&
2671 X->getType()->getPrimitiveSizeInBits() ==
2672 Shuf.getType()->getPrimitiveSizeInBits())
2673 return new BitCastInst(X, Shuf.getType());
2674
2675 // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2676 Value *Y;
2677 ArrayRef<int> Mask;
2678 if (!match(V: Op0, P: m_Shuffle(v1: m_Value(V&: X), v2: m_Value(V&: Y), mask: m_Mask(Mask))))
2679 return nullptr;
2680
2681 // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2682 // then combining may result in worse codegen.
2683 if (!Op0->hasOneUse())
2684 return nullptr;
2685
2686 // We are extracting a subvector from a shuffle. Remove excess elements from
2687 // the 1st shuffle mask to eliminate the extract.
2688 //
2689 // This transform is conservatively limited to identity extracts because we do
2690 // not allow arbitrary shuffle mask creation as a target-independent transform
2691 // (because we can't guarantee that will lower efficiently).
2692 //
2693 // If the extracting shuffle has an poison mask element, it transfers to the
2694 // new shuffle mask. Otherwise, copy the original mask element. Example:
2695 // shuf (shuf X, Y, <C0, C1, C2, poison, C4>), poison, <0, poison, 2, 3> -->
2696 // shuf X, Y, <C0, poison, C2, poison>
2697 unsigned NumElts = cast<FixedVectorType>(Val: Shuf.getType())->getNumElements();
2698 SmallVector<int, 16> NewMask(NumElts);
2699 assert(NumElts < Mask.size() &&
2700 "Identity with extract must have less elements than its inputs");
2701
2702 for (unsigned i = 0; i != NumElts; ++i) {
2703 int ExtractMaskElt = Shuf.getMaskValue(Elt: i);
2704 int MaskElt = Mask[i];
2705 NewMask[i] = ExtractMaskElt == PoisonMaskElem ? ExtractMaskElt : MaskElt;
2706 }
2707 return new ShuffleVectorInst(X, Y, NewMask);
2708}
2709
2710/// Try to replace a shuffle with an insertelement or try to replace a shuffle
2711/// operand with the operand of an insertelement.
2712static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
2713 InstCombinerImpl &IC) {
2714 Value *V0 = Shuf.getOperand(i_nocapture: 0), *V1 = Shuf.getOperand(i_nocapture: 1);
2715 SmallVector<int, 16> Mask;
2716 Shuf.getShuffleMask(Result&: Mask);
2717
2718 int NumElts = Mask.size();
2719 int InpNumElts = cast<FixedVectorType>(Val: V0->getType())->getNumElements();
2720
2721 // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2722 // not be able to handle it there if the insertelement has >1 use.
2723 // If the shuffle has an insertelement operand but does not choose the
2724 // inserted scalar element from that value, then we can replace that shuffle
2725 // operand with the source vector of the insertelement.
2726 Value *X;
2727 uint64_t IdxC;
2728 if (match(V: V0, P: m_InsertElt(Val: m_Value(V&: X), Elt: m_Value(), Idx: m_ConstantInt(V&: IdxC)))) {
2729 // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2730 if (!is_contained(Range&: Mask, Element: (int)IdxC))
2731 return IC.replaceOperand(I&: Shuf, OpNum: 0, V: X);
2732 }
2733 if (match(V: V1, P: m_InsertElt(Val: m_Value(V&: X), Elt: m_Value(), Idx: m_ConstantInt(V&: IdxC)))) {
2734 // Offset the index constant by the vector width because we are checking for
2735 // accesses to the 2nd vector input of the shuffle.
2736 IdxC += InpNumElts;
2737 // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2738 if (!is_contained(Range&: Mask, Element: (int)IdxC))
2739 return IC.replaceOperand(I&: Shuf, OpNum: 1, V: X);
2740 }
2741 // For the rest of the transform, the shuffle must not change vector sizes.
2742 // TODO: This restriction could be removed if the insert has only one use
2743 // (because the transform would require a new length-changing shuffle).
2744 if (NumElts != InpNumElts)
2745 return nullptr;
2746
2747 // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2748 auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
2749 // We need an insertelement with a constant index.
2750 if (!match(V: V0, P: m_InsertElt(Val: m_Value(), Elt: m_Value(V&: Scalar),
2751 Idx: m_ConstantInt(CI&: IndexC))))
2752 return false;
2753
2754 // Test the shuffle mask to see if it splices the inserted scalar into the
2755 // operand 1 vector of the shuffle.
2756 int NewInsIndex = -1;
2757 for (int i = 0; i != NumElts; ++i) {
2758 // Ignore undef mask elements.
2759 if (Mask[i] == -1)
2760 continue;
2761
2762 // The shuffle takes elements of operand 1 without lane changes.
2763 if (Mask[i] == NumElts + i)
2764 continue;
2765
2766 // The shuffle must choose the inserted scalar exactly once.
2767 if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2768 return false;
2769
2770 // The shuffle is placing the inserted scalar into element i.
2771 NewInsIndex = i;
2772 }
2773
2774 assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
2775
2776 // Index is updated to the potentially translated insertion lane.
2777 IndexC = ConstantInt::get(Ty: IndexC->getIntegerType(), V: NewInsIndex);
2778 return true;
2779 };
2780
2781 // If the shuffle is unnecessary, insert the scalar operand directly into
2782 // operand 1 of the shuffle. Example:
2783 // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2784 Value *Scalar;
2785 ConstantInt *IndexC;
2786 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2787 return InsertElementInst::Create(Vec: V1, NewElt: Scalar, Idx: IndexC);
2788
2789 // Try again after commuting shuffle. Example:
2790 // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2791 // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2792 std::swap(a&: V0, b&: V1);
2793 ShuffleVectorInst::commuteShuffleMask(Mask, InVecNumElts: NumElts);
2794 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2795 return InsertElementInst::Create(Vec: V1, NewElt: Scalar, Idx: IndexC);
2796
2797 return nullptr;
2798}
2799
2800static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
2801 // Match the operands as identity with padding (also known as concatenation
2802 // with undef) shuffles of the same source type. The backend is expected to
2803 // recreate these concatenations from a shuffle of narrow operands.
2804 auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Val: Shuf.getOperand(i_nocapture: 0));
2805 auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Val: Shuf.getOperand(i_nocapture: 1));
2806 if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2807 !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2808 return nullptr;
2809
2810 // We limit this transform to power-of-2 types because we expect that the
2811 // backend can convert the simplified IR patterns to identical nodes as the
2812 // original IR.
2813 // TODO: If we can verify the same behavior for arbitrary types, the
2814 // power-of-2 checks can be removed.
2815 Value *X = Shuffle0->getOperand(i_nocapture: 0);
2816 Value *Y = Shuffle1->getOperand(i_nocapture: 0);
2817 if (X->getType() != Y->getType() ||
2818 !isPowerOf2_32(Value: cast<FixedVectorType>(Val: Shuf.getType())->getNumElements()) ||
2819 !isPowerOf2_32(
2820 Value: cast<FixedVectorType>(Val: Shuffle0->getType())->getNumElements()) ||
2821 !isPowerOf2_32(Value: cast<FixedVectorType>(Val: X->getType())->getNumElements()) ||
2822 match(V: X, P: m_Undef()) || match(V: Y, P: m_Undef()))
2823 return nullptr;
2824 assert(match(Shuffle0->getOperand(1), m_Undef()) &&
2825 match(Shuffle1->getOperand(1), m_Undef()) &&
2826 "Unexpected operand for identity shuffle");
2827
2828 // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2829 // operands directly by adjusting the shuffle mask to account for the narrower
2830 // types:
2831 // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2832 int NarrowElts = cast<FixedVectorType>(Val: X->getType())->getNumElements();
2833 int WideElts = cast<FixedVectorType>(Val: Shuffle0->getType())->getNumElements();
2834 assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2835
2836 ArrayRef<int> Mask = Shuf.getShuffleMask();
2837 SmallVector<int, 16> NewMask(Mask.size(), -1);
2838 for (int i = 0, e = Mask.size(); i != e; ++i) {
2839 if (Mask[i] == -1)
2840 continue;
2841
2842 // If this shuffle is choosing an undef element from 1 of the sources, that
2843 // element is undef.
2844 if (Mask[i] < WideElts) {
2845 if (Shuffle0->getMaskValue(Elt: Mask[i]) == -1)
2846 continue;
2847 } else {
2848 if (Shuffle1->getMaskValue(Elt: Mask[i] - WideElts) == -1)
2849 continue;
2850 }
2851
2852 // If this shuffle is choosing from the 1st narrow op, the mask element is
2853 // the same. If this shuffle is choosing from the 2nd narrow op, the mask
2854 // element is offset down to adjust for the narrow vector widths.
2855 if (Mask[i] < WideElts) {
2856 assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2857 NewMask[i] = Mask[i];
2858 } else {
2859 assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2860 NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2861 }
2862 }
2863 return new ShuffleVectorInst(X, Y, NewMask);
2864}
2865
2866// Splatting the first element of the result of a BinOp, where any of the
2867// BinOp's operands are the result of a first element splat can be simplified to
2868// splatting the first element of the result of the BinOp
2869Instruction *InstCombinerImpl::simplifyBinOpSplats(ShuffleVectorInst &SVI) {
2870 if (!match(V: SVI.getOperand(i_nocapture: 1), P: m_Poison()) ||
2871 !match(Mask: SVI.getShuffleMask(), P: m_ZeroMask()) ||
2872 !SVI.getOperand(i_nocapture: 0)->hasOneUse())
2873 return nullptr;
2874
2875 Value *Op0 = SVI.getOperand(i_nocapture: 0);
2876 Value *X, *Y;
2877 if (!match(V: Op0, P: m_BinOp(L: m_Shuffle(v1: m_Value(V&: X), v2: m_Poison(), mask: m_ZeroMask()),
2878 R: m_Value(V&: Y))) &&
2879 !match(V: Op0, P: m_BinOp(L: m_Value(V&: X),
2880 R: m_Shuffle(v1: m_Value(V&: Y), v2: m_Poison(), mask: m_ZeroMask()))))
2881 return nullptr;
2882 if (X->getType() != Y->getType())
2883 return nullptr;
2884
2885 auto *BinOp = cast<BinaryOperator>(Val: Op0);
2886 if (!isSafeToSpeculativelyExecuteWithVariableReplaced(I: BinOp))
2887 return nullptr;
2888
2889 Value *NewBO = Builder.CreateBinOp(Opc: BinOp->getOpcode(), LHS: X, RHS: Y);
2890 if (auto NewBOI = dyn_cast<Instruction>(Val: NewBO))
2891 NewBOI->copyIRFlags(V: BinOp);
2892
2893 return new ShuffleVectorInst(NewBO, SVI.getShuffleMask());
2894}
2895
2896Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
2897 Value *LHS = SVI.getOperand(i_nocapture: 0);
2898 Value *RHS = SVI.getOperand(i_nocapture: 1);
2899 SimplifyQuery ShufQuery = SQ.getWithInstruction(I: &SVI);
2900 if (auto *V = simplifyShuffleVectorInst(Op0: LHS, Op1: RHS, Mask: SVI.getShuffleMask(),
2901 RetTy: SVI.getType(), Q: ShufQuery))
2902 return replaceInstUsesWith(I&: SVI, V);
2903
2904 if (Instruction *I = simplifyBinOpSplats(SVI))
2905 return I;
2906
2907 // Canonicalize splat shuffle to use poison RHS. Handle this explicitly in
2908 // order to support scalable vectors.
2909 if (match(Mask: SVI.getShuffleMask(), P: m_ZeroMask()) && !isa<PoisonValue>(Val: RHS))
2910 return replaceOperand(I&: SVI, OpNum: 1, V: PoisonValue::get(T: RHS->getType()));
2911
2912 if (isa<ScalableVectorType>(Val: LHS->getType()))
2913 return nullptr;
2914
2915 unsigned VWidth = cast<FixedVectorType>(Val: SVI.getType())->getNumElements();
2916 unsigned LHSWidth = cast<FixedVectorType>(Val: LHS->getType())->getNumElements();
2917
2918 // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2919 //
2920 // if X and Y are of the same (vector) type, and the element size is not
2921 // changed by the bitcasts, we can distribute the bitcasts through the
2922 // shuffle, hopefully reducing the number of instructions. We make sure that
2923 // at least one bitcast only has one use, so we don't *increase* the number of
2924 // instructions here.
2925 Value *X, *Y;
2926 if (match(V: LHS, P: m_BitCast(Op: m_Value(V&: X))) && match(V: RHS, P: m_BitCast(Op: m_Value(V&: Y))) &&
2927 X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2928 X->getType()->getScalarSizeInBits() ==
2929 SVI.getType()->getScalarSizeInBits() &&
2930 (LHS->hasOneUse() || RHS->hasOneUse())) {
2931 Value *V = Builder.CreateShuffleVector(V1: X, V2: Y, Mask: SVI.getShuffleMask(),
2932 Name: SVI.getName() + ".uncasted");
2933 return new BitCastInst(V, SVI.getType());
2934 }
2935
2936 ArrayRef<int> Mask = SVI.getShuffleMask();
2937
2938 // Peek through a bitcasted shuffle operand by scaling the mask. If the
2939 // simulated shuffle can simplify, then this shuffle is unnecessary:
2940 // shuf (bitcast X), undef, Mask --> bitcast X'
2941 // TODO: This could be extended to allow length-changing shuffles.
2942 // The transform might also be obsoleted if we allowed canonicalization
2943 // of bitcasted shuffles.
2944 if (match(V: LHS, P: m_BitCast(Op: m_Value(V&: X))) && match(V: RHS, P: m_Undef()) &&
2945 X->getType()->isVectorTy() && VWidth == LHSWidth) {
2946 // Try to create a scaled mask constant.
2947 auto *XType = cast<FixedVectorType>(Val: X->getType());
2948 unsigned XNumElts = XType->getNumElements();
2949 SmallVector<int, 16> ScaledMask;
2950 if (scaleShuffleMaskElts(NumDstElts: XNumElts, Mask, ScaledMask)) {
2951 // If the shuffled source vector simplifies, cast that value to this
2952 // shuffle's type.
2953 if (auto *V = simplifyShuffleVectorInst(Op0: X, Op1: UndefValue::get(T: XType),
2954 Mask: ScaledMask, RetTy: XType, Q: ShufQuery))
2955 return BitCastInst::Create(Instruction::BitCast, S: V, Ty: SVI.getType());
2956 }
2957 }
2958
2959 // shuffle x, x, mask --> shuffle x, undef, mask'
2960 if (LHS == RHS) {
2961 assert(!match(RHS, m_Undef()) &&
2962 "Shuffle with 2 undef ops not simplified?");
2963 return new ShuffleVectorInst(LHS, createUnaryMask(Mask, NumElts: LHSWidth));
2964 }
2965
2966 // shuffle undef, x, mask --> shuffle x, undef, mask'
2967 if (match(V: LHS, P: m_Undef())) {
2968 SVI.commute();
2969 return &SVI;
2970 }
2971
2972 if (Instruction *I = canonicalizeInsertSplat(Shuf&: SVI, Builder))
2973 return I;
2974
2975 if (Instruction *I = foldSelectShuffle(Shuf&: SVI))
2976 return I;
2977
2978 if (Instruction *I = foldTruncShuffle(Shuf&: SVI, IsBigEndian: DL.isBigEndian()))
2979 return I;
2980
2981 if (Instruction *I = narrowVectorSelect(Shuf&: SVI, Builder))
2982 return I;
2983
2984 if (Instruction *I = foldShuffleOfUnaryOps(Shuf&: SVI, Builder))
2985 return I;
2986
2987 if (Instruction *I = foldCastShuffle(Shuf&: SVI, Builder))
2988 return I;
2989
2990 APInt PoisonElts(VWidth, 0);
2991 APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
2992 if (Value *V = SimplifyDemandedVectorElts(V: &SVI, DemandedElts: AllOnesEltMask, PoisonElts)) {
2993 if (V != &SVI)
2994 return replaceInstUsesWith(I&: SVI, V);
2995 return &SVI;
2996 }
2997
2998 if (Instruction *I = foldIdentityExtractShuffle(Shuf&: SVI))
2999 return I;
3000
3001 // These transforms have the potential to lose undef knowledge, so they are
3002 // intentionally placed after SimplifyDemandedVectorElts().
3003 if (Instruction *I = foldShuffleWithInsert(Shuf&: SVI, IC&: *this))
3004 return I;
3005 if (Instruction *I = foldIdentityPaddedShuffles(Shuf&: SVI))
3006 return I;
3007
3008 if (match(V: RHS, P: m_Constant())) {
3009 if (auto *SI = dyn_cast<SelectInst>(Val: LHS)) {
3010 // We cannot do this fold for elementwise select since ShuffleVector is
3011 // not elementwise.
3012 if (SI->getCondition()->getType()->isIntegerTy() &&
3013 (isa<PoisonValue>(Val: RHS) ||
3014 isGuaranteedNotToBePoison(V: SI->getCondition()))) {
3015 if (Instruction *I = FoldOpIntoSelect(Op&: SVI, SI))
3016 return I;
3017 }
3018 }
3019 if (auto *PN = dyn_cast<PHINode>(Val: LHS)) {
3020 if (Instruction *I = foldOpIntoPhi(I&: SVI, PN, /*AllowMultipleUses=*/true))
3021 return I;
3022 }
3023 }
3024
3025 if (match(V: RHS, P: m_Poison()) && canEvaluateShuffled(V: LHS, Mask)) {
3026 Value *V = evaluateInDifferentElementOrder(V: LHS, Mask, Builder);
3027 return replaceInstUsesWith(I&: SVI, V);
3028 }
3029
3030 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
3031 // a non-vector type. We can instead bitcast the original vector followed by
3032 // an extract of the desired element:
3033 //
3034 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
3035 // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3036 // %1 = bitcast <4 x i8> %sroa to i32
3037 // Becomes:
3038 // %bc = bitcast <16 x i8> %in to <4 x i32>
3039 // %ext = extractelement <4 x i32> %bc, i32 0
3040 //
3041 // If the shuffle is extracting a contiguous range of values from the input
3042 // vector then each use which is a bitcast of the extracted size can be
3043 // replaced. This will work if the vector types are compatible, and the begin
3044 // index is aligned to a value in the casted vector type. If the begin index
3045 // isn't aligned then we can shuffle the original vector (keeping the same
3046 // vector type) before extracting.
3047 //
3048 // This code will bail out if the target type is fundamentally incompatible
3049 // with vectors of the source type.
3050 //
3051 // Example of <16 x i8>, target type i32:
3052 // Index range [4,8): v-----------v Will work.
3053 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
3054 // <16 x i8>: | | | | | | | | | | | | | | | | |
3055 // <4 x i32>: | | | | |
3056 // +-----------+-----------+-----------+-----------+
3057 // Index range [6,10): ^-----------^ Needs an extra shuffle.
3058 // Target type i40: ^--------------^ Won't work, bail.
3059 bool MadeChange = false;
3060 if (isShuffleExtractingFromLHS(SVI, Mask)) {
3061 Value *V = LHS;
3062 unsigned MaskElems = Mask.size();
3063 auto *SrcTy = cast<FixedVectorType>(Val: V->getType());
3064 unsigned VecBitWidth = DL.getTypeSizeInBits(Ty: SrcTy);
3065 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(Ty: SrcTy->getElementType());
3066 assert(SrcElemBitWidth && "vector elements must have a bitwidth");
3067 unsigned SrcNumElems = SrcTy->getNumElements();
3068 SmallVector<BitCastInst *, 8> BCs;
3069 DenseMap<Type *, Value *> NewBCs;
3070 for (User *U : SVI.users())
3071 if (BitCastInst *BC = dyn_cast<BitCastInst>(Val: U)) {
3072 // Only visit bitcasts that weren't previously handled.
3073 if (BC->use_empty())
3074 continue;
3075 // Prefer to combine bitcasts of bitcasts before attempting this fold.
3076 if (BC->hasOneUse()) {
3077 auto *BC2 = dyn_cast<BitCastInst>(Val: BC->user_back());
3078 if (BC2 && isEliminableCastPair(CI1: BC, CI2: BC2))
3079 continue;
3080 }
3081 BCs.push_back(Elt: BC);
3082 }
3083 for (BitCastInst *BC : BCs) {
3084 unsigned BegIdx = Mask.front();
3085 Type *TgtTy = BC->getDestTy();
3086 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(Ty: TgtTy);
3087 if (!TgtElemBitWidth)
3088 continue;
3089 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
3090 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
3091 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
3092 if (!VecBitWidthsEqual)
3093 continue;
3094 if (!VectorType::isValidElementType(ElemTy: TgtTy))
3095 continue;
3096 auto *CastSrcTy = FixedVectorType::get(ElementType: TgtTy, NumElts: TgtNumElems);
3097 if (!BegIsAligned) {
3098 // Shuffle the input so [0,NumElements) contains the output, and
3099 // [NumElems,SrcNumElems) is undef.
3100 SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
3101 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
3102 ShuffleMask[I] = Idx;
3103 V = Builder.CreateShuffleVector(V, Mask: ShuffleMask,
3104 Name: SVI.getName() + ".extract");
3105 BegIdx = 0;
3106 }
3107 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
3108 assert(SrcElemsPerTgtElem);
3109 BegIdx /= SrcElemsPerTgtElem;
3110 auto [It, Inserted] = NewBCs.try_emplace(Key: CastSrcTy);
3111 if (Inserted)
3112 It->second = Builder.CreateBitCast(V, DestTy: CastSrcTy, Name: SVI.getName() + ".bc");
3113 auto *Ext = Builder.CreateExtractElement(Vec: It->second, Idx: BegIdx,
3114 Name: SVI.getName() + ".extract");
3115 // The shufflevector isn't being replaced: the bitcast that used it
3116 // is. InstCombine will visit the newly-created instructions.
3117 replaceInstUsesWith(I&: *BC, V: Ext);
3118 MadeChange = true;
3119 }
3120 }
3121
3122 // If the LHS is a shufflevector itself, see if we can combine it with this
3123 // one without producing an unusual shuffle.
3124 // Cases that might be simplified:
3125 // 1.
3126 // x1=shuffle(v1,v2,mask1)
3127 // x=shuffle(x1,undef,mask)
3128 // ==>
3129 // x=shuffle(v1,undef,newMask)
3130 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
3131 // 2.
3132 // x1=shuffle(v1,undef,mask1)
3133 // x=shuffle(x1,x2,mask)
3134 // where v1.size() == mask1.size()
3135 // ==>
3136 // x=shuffle(v1,x2,newMask)
3137 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
3138 // 3.
3139 // x2=shuffle(v2,undef,mask2)
3140 // x=shuffle(x1,x2,mask)
3141 // where v2.size() == mask2.size()
3142 // ==>
3143 // x=shuffle(x1,v2,newMask)
3144 // newMask[i] = (mask[i] < x1.size())
3145 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
3146 // 4.
3147 // x1=shuffle(v1,undef,mask1)
3148 // x2=shuffle(v2,undef,mask2)
3149 // x=shuffle(x1,x2,mask)
3150 // where v1.size() == v2.size()
3151 // ==>
3152 // x=shuffle(v1,v2,newMask)
3153 // newMask[i] = (mask[i] < x1.size())
3154 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
3155 //
3156 // Here we are really conservative:
3157 // we are absolutely afraid of producing a shuffle mask not in the input
3158 // program, because the code gen may not be smart enough to turn a merged
3159 // shuffle into two specific shuffles: it may produce worse code. As such,
3160 // we only merge two shuffles if the result is either a splat or one of the
3161 // input shuffle masks. In this case, merging the shuffles just removes
3162 // one instruction, which we know is safe. This is good for things like
3163 // turning: (splat(splat)) -> splat, or
3164 // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
3165 ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(Val: LHS);
3166 ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(Val: RHS);
3167 if (LHSShuffle)
3168 if (!match(V: LHSShuffle->getOperand(i_nocapture: 1), P: m_Poison()) &&
3169 !match(V: RHS, P: m_Poison()))
3170 LHSShuffle = nullptr;
3171 if (RHSShuffle)
3172 if (!match(V: RHSShuffle->getOperand(i_nocapture: 1), P: m_Poison()))
3173 RHSShuffle = nullptr;
3174 if (!LHSShuffle && !RHSShuffle)
3175 return MadeChange ? &SVI : nullptr;
3176
3177 Value* LHSOp0 = nullptr;
3178 Value* LHSOp1 = nullptr;
3179 Value* RHSOp0 = nullptr;
3180 unsigned LHSOp0Width = 0;
3181 unsigned RHSOp0Width = 0;
3182 if (LHSShuffle) {
3183 LHSOp0 = LHSShuffle->getOperand(i_nocapture: 0);
3184 LHSOp1 = LHSShuffle->getOperand(i_nocapture: 1);
3185 LHSOp0Width = cast<FixedVectorType>(Val: LHSOp0->getType())->getNumElements();
3186 }
3187 if (RHSShuffle) {
3188 RHSOp0 = RHSShuffle->getOperand(i_nocapture: 0);
3189 RHSOp0Width = cast<FixedVectorType>(Val: RHSOp0->getType())->getNumElements();
3190 }
3191 Value* newLHS = LHS;
3192 Value* newRHS = RHS;
3193 if (LHSShuffle) {
3194 // case 1
3195 if (match(V: RHS, P: m_Poison())) {
3196 newLHS = LHSOp0;
3197 newRHS = LHSOp1;
3198 }
3199 // case 2 or 4
3200 else if (LHSOp0Width == LHSWidth) {
3201 newLHS = LHSOp0;
3202 }
3203 }
3204 // case 3 or 4
3205 if (RHSShuffle && RHSOp0Width == LHSWidth) {
3206 newRHS = RHSOp0;
3207 }
3208 // case 4
3209 if (LHSOp0 == RHSOp0) {
3210 newLHS = LHSOp0;
3211 newRHS = nullptr;
3212 }
3213
3214 if (newLHS == LHS && newRHS == RHS)
3215 return MadeChange ? &SVI : nullptr;
3216
3217 ArrayRef<int> LHSMask;
3218 ArrayRef<int> RHSMask;
3219 if (newLHS != LHS)
3220 LHSMask = LHSShuffle->getShuffleMask();
3221 if (RHSShuffle && newRHS != RHS)
3222 RHSMask = RHSShuffle->getShuffleMask();
3223
3224 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
3225 SmallVector<int, 16> newMask;
3226 bool isSplat = true;
3227 int SplatElt = -1;
3228 // Create a new mask for the new ShuffleVectorInst so that the new
3229 // ShuffleVectorInst is equivalent to the original one.
3230 for (unsigned i = 0; i < VWidth; ++i) {
3231 int eltMask;
3232 if (Mask[i] < 0) {
3233 // This element is a poison value.
3234 eltMask = -1;
3235 } else if (Mask[i] < (int)LHSWidth) {
3236 // This element is from left hand side vector operand.
3237 //
3238 // If LHS is going to be replaced (case 1, 2, or 4), calculate the
3239 // new mask value for the element.
3240 if (newLHS != LHS) {
3241 eltMask = LHSMask[Mask[i]];
3242 // If the value selected is an poison value, explicitly specify it
3243 // with a -1 mask value.
3244 if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(Val: LHSOp1))
3245 eltMask = -1;
3246 } else
3247 eltMask = Mask[i];
3248 } else {
3249 // This element is from right hand side vector operand
3250 //
3251 // If the value selected is a poison value, explicitly specify it
3252 // with a -1 mask value. (case 1)
3253 if (match(V: RHS, P: m_Poison()))
3254 eltMask = -1;
3255 // If RHS is going to be replaced (case 3 or 4), calculate the
3256 // new mask value for the element.
3257 else if (newRHS != RHS) {
3258 eltMask = RHSMask[Mask[i]-LHSWidth];
3259 // If the value selected is an poison value, explicitly specify it
3260 // with a -1 mask value.
3261 if (eltMask >= (int)RHSOp0Width) {
3262 assert(match(RHSShuffle->getOperand(1), m_Poison()) &&
3263 "should have been check above");
3264 eltMask = -1;
3265 }
3266 } else
3267 eltMask = Mask[i]-LHSWidth;
3268
3269 // If LHS's width is changed, shift the mask value accordingly.
3270 // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
3271 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
3272 // If newRHS == newLHS, we want to remap any references from newRHS to
3273 // newLHS so that we can properly identify splats that may occur due to
3274 // obfuscation across the two vectors.
3275 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
3276 eltMask += newLHSWidth;
3277 }
3278
3279 // Check if this could still be a splat.
3280 if (eltMask >= 0) {
3281 if (SplatElt >= 0 && SplatElt != eltMask)
3282 isSplat = false;
3283 SplatElt = eltMask;
3284 }
3285
3286 newMask.push_back(Elt: eltMask);
3287 }
3288
3289 // If the result mask is equal to one of the original shuffle masks,
3290 // or is a splat, do the replacement.
3291 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
3292 if (!newRHS)
3293 newRHS = PoisonValue::get(T: newLHS->getType());
3294 return new ShuffleVectorInst(newLHS, newRHS, newMask);
3295 }
3296
3297 return MadeChange ? &SVI : nullptr;
3298}
3299