1//===- InstCombineCasts.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visit functions for cast operations.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/SetVector.h"
15#include "llvm/Analysis/ConstantFolding.h"
16#include "llvm/IR/DataLayout.h"
17#include "llvm/IR/DebugInfo.h"
18#include "llvm/IR/PatternMatch.h"
19#include "llvm/Support/KnownBits.h"
20#include "llvm/Transforms/InstCombine/InstCombiner.h"
21#include <optional>
22
23using namespace llvm;
24using namespace PatternMatch;
25
26#define DEBUG_TYPE "instcombine"
27
28/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
29/// true for, actually insert the code to evaluate the expression.
30Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
31 bool isSigned) {
32 if (Constant *C = dyn_cast<Constant>(Val: V))
33 return ConstantFoldIntegerCast(C, DestTy: Ty, IsSigned: isSigned, DL);
34
35 // Otherwise, it must be an instruction.
36 Instruction *I = cast<Instruction>(Val: V);
37 Instruction *Res = nullptr;
38 unsigned Opc = I->getOpcode();
39 switch (Opc) {
40 case Instruction::Add:
41 case Instruction::Sub:
42 case Instruction::Mul:
43 case Instruction::And:
44 case Instruction::Or:
45 case Instruction::Xor:
46 case Instruction::AShr:
47 case Instruction::LShr:
48 case Instruction::Shl:
49 case Instruction::UDiv:
50 case Instruction::URem: {
51 Value *LHS = EvaluateInDifferentType(V: I->getOperand(i: 0), Ty, isSigned);
52 Value *RHS = EvaluateInDifferentType(V: I->getOperand(i: 1), Ty, isSigned);
53 Res = BinaryOperator::Create(Op: (Instruction::BinaryOps)Opc, S1: LHS, S2: RHS);
54 if (Opc == Instruction::LShr || Opc == Instruction::AShr)
55 Res->setIsExact(I->isExact());
56 break;
57 }
58 case Instruction::Trunc:
59 case Instruction::ZExt:
60 case Instruction::SExt:
61 // If the source type of the cast is the type we're trying for then we can
62 // just return the source. There's no need to insert it because it is not
63 // new.
64 if (I->getOperand(i: 0)->getType() == Ty)
65 return I->getOperand(i: 0);
66
67 // Otherwise, must be the same type of cast, so just reinsert a new one.
68 // This also handles the case of zext(trunc(x)) -> zext(x).
69 Res = CastInst::CreateIntegerCast(S: I->getOperand(i: 0), Ty,
70 isSigned: Opc == Instruction::SExt);
71 break;
72 case Instruction::Select: {
73 Value *True = EvaluateInDifferentType(V: I->getOperand(i: 1), Ty, isSigned);
74 Value *False = EvaluateInDifferentType(V: I->getOperand(i: 2), Ty, isSigned);
75 Res = SelectInst::Create(C: I->getOperand(i: 0), S1: True, S2: False);
76 break;
77 }
78 case Instruction::PHI: {
79 PHINode *OPN = cast<PHINode>(Val: I);
80 PHINode *NPN = PHINode::Create(Ty, NumReservedValues: OPN->getNumIncomingValues());
81 for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
82 Value *V =
83 EvaluateInDifferentType(V: OPN->getIncomingValue(i), Ty, isSigned);
84 NPN->addIncoming(V, BB: OPN->getIncomingBlock(i));
85 }
86 Res = NPN;
87 break;
88 }
89 case Instruction::FPToUI:
90 case Instruction::FPToSI:
91 Res = CastInst::Create(
92 static_cast<Instruction::CastOps>(Opc), S: I->getOperand(i: 0), Ty);
93 break;
94 case Instruction::Call:
95 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
96 switch (II->getIntrinsicID()) {
97 default:
98 llvm_unreachable("Unsupported call!");
99 case Intrinsic::vscale: {
100 Function *Fn = Intrinsic::getOrInsertDeclaration(
101 M: I->getModule(), id: Intrinsic::vscale, Tys: {Ty});
102 Res = CallInst::Create(Ty: Fn->getFunctionType(), F: Fn);
103 break;
104 }
105 }
106 }
107 break;
108 case Instruction::ShuffleVector: {
109 auto *ScalarTy = cast<VectorType>(Val: Ty)->getElementType();
110 auto *VTy = cast<VectorType>(Val: I->getOperand(i: 0)->getType());
111 auto *FixedTy = VectorType::get(ElementType: ScalarTy, EC: VTy->getElementCount());
112 Value *Op0 = EvaluateInDifferentType(V: I->getOperand(i: 0), Ty: FixedTy, isSigned);
113 Value *Op1 = EvaluateInDifferentType(V: I->getOperand(i: 1), Ty: FixedTy, isSigned);
114 Res = new ShuffleVectorInst(Op0, Op1,
115 cast<ShuffleVectorInst>(Val: I)->getShuffleMask());
116 break;
117 }
118 default:
119 // TODO: Can handle more cases here.
120 llvm_unreachable("Unreachable!");
121 }
122
123 Res->takeName(V: I);
124 return InsertNewInstWith(New: Res, Old: I->getIterator());
125}
126
127Instruction::CastOps
128InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
129 const CastInst *CI2) {
130 Type *SrcTy = CI1->getSrcTy();
131 Type *MidTy = CI1->getDestTy();
132 Type *DstTy = CI2->getDestTy();
133
134 Instruction::CastOps firstOp = CI1->getOpcode();
135 Instruction::CastOps secondOp = CI2->getOpcode();
136 Type *SrcIntPtrTy =
137 SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
138 Type *MidIntPtrTy =
139 MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
140 Type *DstIntPtrTy =
141 DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
142 unsigned Res = CastInst::isEliminableCastPair(firstOpcode: firstOp, secondOpcode: secondOp, SrcTy, MidTy,
143 DstTy, SrcIntPtrTy, MidIntPtrTy,
144 DstIntPtrTy);
145
146 // We don't want to form an inttoptr or ptrtoint that converts to an integer
147 // type that differs from the pointer size.
148 if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
149 (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
150 Res = 0;
151
152 return Instruction::CastOps(Res);
153}
154
155/// Implement the transforms common to all CastInst visitors.
156Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) {
157 Value *Src = CI.getOperand(i_nocapture: 0);
158 Type *Ty = CI.getType();
159
160 if (auto *SrcC = dyn_cast<Constant>(Val: Src))
161 if (Constant *Res = ConstantFoldCastOperand(Opcode: CI.getOpcode(), C: SrcC, DestTy: Ty, DL))
162 return replaceInstUsesWith(I&: CI, V: Res);
163
164 // Try to eliminate a cast of a cast.
165 if (auto *CSrc = dyn_cast<CastInst>(Val: Src)) { // A->B->C cast
166 if (Instruction::CastOps NewOpc = isEliminableCastPair(CI1: CSrc, CI2: &CI)) {
167 // The first cast (CSrc) is eliminable so we need to fix up or replace
168 // the second cast (CI). CSrc will then have a good chance of being dead.
169 auto *Res = CastInst::Create(NewOpc, S: CSrc->getOperand(i_nocapture: 0), Ty);
170 // Point debug users of the dying cast to the new one.
171 if (CSrc->hasOneUse())
172 replaceAllDbgUsesWith(From&: *CSrc, To&: *Res, DomPoint&: CI, DT);
173 return Res;
174 }
175 }
176
177 if (auto *Sel = dyn_cast<SelectInst>(Val: Src)) {
178 // We are casting a select. Try to fold the cast into the select if the
179 // select does not have a compare instruction with matching operand types
180 // or the select is likely better done in a narrow type.
181 // Creating a select with operands that are different sizes than its
182 // condition may inhibit other folds and lead to worse codegen.
183 auto *Cmp = dyn_cast<CmpInst>(Val: Sel->getCondition());
184 if (!Cmp || Cmp->getOperand(i_nocapture: 0)->getType() != Sel->getType() ||
185 (CI.getOpcode() == Instruction::Trunc &&
186 shouldChangeType(From: CI.getSrcTy(), To: CI.getType()))) {
187
188 // If it's a bitcast involving vectors, make sure it has the same number
189 // of elements on both sides.
190 if (CI.getOpcode() != Instruction::BitCast ||
191 match(V: &CI, P: m_ElementWiseBitCast(Op: m_Value()))) {
192 if (Instruction *NV = FoldOpIntoSelect(Op&: CI, SI: Sel)) {
193 replaceAllDbgUsesWith(From&: *Sel, To&: *NV, DomPoint&: CI, DT);
194 return NV;
195 }
196 }
197 }
198 }
199
200 // If we are casting a PHI, then fold the cast into the PHI.
201 if (auto *PN = dyn_cast<PHINode>(Val: Src)) {
202 // Don't do this if it would create a PHI node with an illegal type from a
203 // legal type.
204 if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
205 shouldChangeType(From: CI.getSrcTy(), To: CI.getType()))
206 if (Instruction *NV = foldOpIntoPhi(I&: CI, PN))
207 return NV;
208 }
209
210 // Canonicalize a unary shuffle after the cast if neither operation changes
211 // the size or element size of the input vector.
212 // TODO: We could allow size-changing ops if that doesn't harm codegen.
213 // cast (shuffle X, Mask) --> shuffle (cast X), Mask
214 Value *X;
215 ArrayRef<int> Mask;
216 if (match(V: Src, P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: X), v2: m_Undef(), mask: m_Mask(Mask))))) {
217 // TODO: Allow scalable vectors?
218 auto *SrcTy = dyn_cast<FixedVectorType>(Val: X->getType());
219 auto *DestTy = dyn_cast<FixedVectorType>(Val: Ty);
220 if (SrcTy && DestTy &&
221 SrcTy->getNumElements() == DestTy->getNumElements() &&
222 SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
223 Value *CastX = Builder.CreateCast(Op: CI.getOpcode(), V: X, DestTy);
224 return new ShuffleVectorInst(CastX, Mask);
225 }
226 }
227
228 return nullptr;
229}
230
231/// Constants and extensions/truncates from the destination type are always
232/// free to be evaluated in that type. This is a helper for canEvaluate*.
233static bool canAlwaysEvaluateInType(Value *V, Type *Ty) {
234 if (isa<Constant>(Val: V))
235 return match(V, P: m_ImmConstant());
236
237 Value *X;
238 if ((match(V, P: m_ZExtOrSExt(Op: m_Value(V&: X))) || match(V, P: m_Trunc(Op: m_Value(V&: X)))) &&
239 X->getType() == Ty)
240 return true;
241
242 return false;
243}
244
245/// Filter out values that we can not evaluate in the destination type for free.
246/// This is a helper for canEvaluate*.
247static bool canNotEvaluateInType(Value *V, Type *Ty) {
248 if (!isa<Instruction>(Val: V))
249 return true;
250 // We don't extend or shrink something that has multiple uses -- doing so
251 // would require duplicating the instruction which isn't profitable.
252 if (!V->hasOneUse())
253 return true;
254
255 return false;
256}
257
258/// Return true if we can evaluate the specified expression tree as type Ty
259/// instead of its larger type, and arrive with the same value.
260/// This is used by code that tries to eliminate truncates.
261///
262/// Ty will always be a type smaller than V. We should return true if trunc(V)
263/// can be computed by computing V in the smaller type. If V is an instruction,
264/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
265/// makes sense if x and y can be efficiently truncated.
266///
267/// This function works on both vectors and scalars.
268///
269static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC,
270 Instruction *CxtI) {
271 if (canAlwaysEvaluateInType(V, Ty))
272 return true;
273 if (canNotEvaluateInType(V, Ty))
274 return false;
275
276 auto *I = cast<Instruction>(Val: V);
277 Type *OrigTy = V->getType();
278 switch (I->getOpcode()) {
279 case Instruction::Add:
280 case Instruction::Sub:
281 case Instruction::Mul:
282 case Instruction::And:
283 case Instruction::Or:
284 case Instruction::Xor:
285 // These operators can all arbitrarily be extended or truncated.
286 return canEvaluateTruncated(V: I->getOperand(i: 0), Ty, IC, CxtI) &&
287 canEvaluateTruncated(V: I->getOperand(i: 1), Ty, IC, CxtI);
288
289 case Instruction::UDiv:
290 case Instruction::URem: {
291 // UDiv and URem can be truncated if all the truncated bits are zero.
292 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
293 uint32_t BitWidth = Ty->getScalarSizeInBits();
294 assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");
295 APInt Mask = APInt::getBitsSetFrom(numBits: OrigBitWidth, loBit: BitWidth);
296 // Do not preserve the original context instruction. Simplifying div/rem
297 // based on later context may introduce a trap.
298 if (IC.MaskedValueIsZero(V: I->getOperand(i: 0), Mask, CxtI: I) &&
299 IC.MaskedValueIsZero(V: I->getOperand(i: 1), Mask, CxtI: I)) {
300 return canEvaluateTruncated(V: I->getOperand(i: 0), Ty, IC, CxtI: I) &&
301 canEvaluateTruncated(V: I->getOperand(i: 1), Ty, IC, CxtI: I);
302 }
303 break;
304 }
305 case Instruction::Shl: {
306 // If we are truncating the result of this SHL, and if it's a shift of an
307 // inrange amount, we can always perform a SHL in a smaller type.
308 uint32_t BitWidth = Ty->getScalarSizeInBits();
309 KnownBits AmtKnownBits =
310 llvm::computeKnownBits(V: I->getOperand(i: 1), DL: IC.getDataLayout());
311 if (AmtKnownBits.getMaxValue().ult(RHS: BitWidth))
312 return canEvaluateTruncated(V: I->getOperand(i: 0), Ty, IC, CxtI) &&
313 canEvaluateTruncated(V: I->getOperand(i: 1), Ty, IC, CxtI);
314 break;
315 }
316 case Instruction::LShr: {
317 // If this is a truncate of a logical shr, we can truncate it to a smaller
318 // lshr iff we know that the bits we would otherwise be shifting in are
319 // already zeros.
320 // TODO: It is enough to check that the bits we would be shifting in are
321 // zero - use AmtKnownBits.getMaxValue().
322 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
323 uint32_t BitWidth = Ty->getScalarSizeInBits();
324 KnownBits AmtKnownBits = IC.computeKnownBits(V: I->getOperand(i: 1), CxtI);
325 APInt MaxShiftAmt = AmtKnownBits.getMaxValue();
326 APInt ShiftedBits = APInt::getBitsSetFrom(numBits: OrigBitWidth, loBit: BitWidth);
327 if (MaxShiftAmt.ult(RHS: BitWidth)) {
328 // If the only user is a trunc then we can narrow the shift if any new
329 // MSBs are not going to be used.
330 if (auto *Trunc = dyn_cast<TruncInst>(Val: V->user_back())) {
331 auto DemandedBits = Trunc->getType()->getScalarSizeInBits();
332 if ((MaxShiftAmt + DemandedBits).ule(RHS: BitWidth))
333 return canEvaluateTruncated(V: I->getOperand(i: 0), Ty, IC, CxtI) &&
334 canEvaluateTruncated(V: I->getOperand(i: 1), Ty, IC, CxtI);
335 }
336 if (IC.MaskedValueIsZero(V: I->getOperand(i: 0), Mask: ShiftedBits, CxtI))
337 return canEvaluateTruncated(V: I->getOperand(i: 0), Ty, IC, CxtI) &&
338 canEvaluateTruncated(V: I->getOperand(i: 1), Ty, IC, CxtI);
339 }
340 break;
341 }
342 case Instruction::AShr: {
343 // If this is a truncate of an arithmetic shr, we can truncate it to a
344 // smaller ashr iff we know that all the bits from the sign bit of the
345 // original type and the sign bit of the truncate type are similar.
346 // TODO: It is enough to check that the bits we would be shifting in are
347 // similar to sign bit of the truncate type.
348 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
349 uint32_t BitWidth = Ty->getScalarSizeInBits();
350 KnownBits AmtKnownBits =
351 llvm::computeKnownBits(V: I->getOperand(i: 1), DL: IC.getDataLayout());
352 unsigned ShiftedBits = OrigBitWidth - BitWidth;
353 if (AmtKnownBits.getMaxValue().ult(RHS: BitWidth) &&
354 ShiftedBits < IC.ComputeNumSignBits(Op: I->getOperand(i: 0), CxtI))
355 return canEvaluateTruncated(V: I->getOperand(i: 0), Ty, IC, CxtI) &&
356 canEvaluateTruncated(V: I->getOperand(i: 1), Ty, IC, CxtI);
357 break;
358 }
359 case Instruction::Trunc:
360 // trunc(trunc(x)) -> trunc(x)
361 return true;
362 case Instruction::ZExt:
363 case Instruction::SExt:
364 // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
365 // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
366 return true;
367 case Instruction::Select: {
368 SelectInst *SI = cast<SelectInst>(Val: I);
369 return canEvaluateTruncated(V: SI->getTrueValue(), Ty, IC, CxtI) &&
370 canEvaluateTruncated(V: SI->getFalseValue(), Ty, IC, CxtI);
371 }
372 case Instruction::PHI: {
373 // We can change a phi if we can change all operands. Note that we never
374 // get into trouble with cyclic PHIs here because we only consider
375 // instructions with a single use.
376 PHINode *PN = cast<PHINode>(Val: I);
377 for (Value *IncValue : PN->incoming_values())
378 if (!canEvaluateTruncated(V: IncValue, Ty, IC, CxtI))
379 return false;
380 return true;
381 }
382 case Instruction::FPToUI:
383 case Instruction::FPToSI: {
384 // If the integer type can hold the max FP value, it is safe to cast
385 // directly to that type. Otherwise, we may create poison via overflow
386 // that did not exist in the original code.
387 Type *InputTy = I->getOperand(i: 0)->getType()->getScalarType();
388 const fltSemantics &Semantics = InputTy->getFltSemantics();
389 uint32_t MinBitWidth =
390 APFloatBase::semanticsIntSizeInBits(Semantics,
391 I->getOpcode() == Instruction::FPToSI);
392 return Ty->getScalarSizeInBits() >= MinBitWidth;
393 }
394 case Instruction::ShuffleVector:
395 return canEvaluateTruncated(V: I->getOperand(i: 0), Ty, IC, CxtI) &&
396 canEvaluateTruncated(V: I->getOperand(i: 1), Ty, IC, CxtI);
397 default:
398 // TODO: Can handle more cases here.
399 break;
400 }
401
402 return false;
403}
404
405/// Given a vector that is bitcast to an integer, optionally logically
406/// right-shifted, and truncated, convert it to an extractelement.
407/// Example (big endian):
408/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
409/// --->
410/// extractelement <4 x i32> %X, 1
411static Instruction *foldVecTruncToExtElt(TruncInst &Trunc,
412 InstCombinerImpl &IC) {
413 Value *TruncOp = Trunc.getOperand(i_nocapture: 0);
414 Type *DestType = Trunc.getType();
415 if (!TruncOp->hasOneUse() || !isa<IntegerType>(Val: DestType))
416 return nullptr;
417
418 Value *VecInput = nullptr;
419 ConstantInt *ShiftVal = nullptr;
420 if (!match(V: TruncOp, P: m_CombineOr(L: m_BitCast(Op: m_Value(V&: VecInput)),
421 R: m_LShr(L: m_BitCast(Op: m_Value(V&: VecInput)),
422 R: m_ConstantInt(CI&: ShiftVal)))) ||
423 !isa<VectorType>(Val: VecInput->getType()))
424 return nullptr;
425
426 VectorType *VecType = cast<VectorType>(Val: VecInput->getType());
427 unsigned VecWidth = VecType->getPrimitiveSizeInBits();
428 unsigned DestWidth = DestType->getPrimitiveSizeInBits();
429 unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
430
431 if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
432 return nullptr;
433
434 // If the element type of the vector doesn't match the result type,
435 // bitcast it to a vector type that we can extract from.
436 unsigned NumVecElts = VecWidth / DestWidth;
437 if (VecType->getElementType() != DestType) {
438 VecType = FixedVectorType::get(ElementType: DestType, NumElts: NumVecElts);
439 VecInput = IC.Builder.CreateBitCast(V: VecInput, DestTy: VecType, Name: "bc");
440 }
441
442 unsigned Elt = ShiftAmount / DestWidth;
443 if (IC.getDataLayout().isBigEndian())
444 Elt = NumVecElts - 1 - Elt;
445
446 return ExtractElementInst::Create(Vec: VecInput, Idx: IC.Builder.getInt32(C: Elt));
447}
448
449/// Whenever an element is extracted from a vector, optionally shifted down, and
450/// then truncated, canonicalize by converting it to a bitcast followed by an
451/// extractelement.
452///
453/// Examples (little endian):
454/// trunc (extractelement <4 x i64> %X, 0) to i32
455/// --->
456/// extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
457///
458/// trunc (lshr (extractelement <4 x i32> %X, 0), 8) to i8
459/// --->
460/// extractelement <16 x i8> (bitcast <4 x i32> %X to <16 x i8>), i32 1
461static Instruction *foldVecExtTruncToExtElt(TruncInst &Trunc,
462 InstCombinerImpl &IC) {
463 Value *Src = Trunc.getOperand(i_nocapture: 0);
464 Type *SrcType = Src->getType();
465 Type *DstType = Trunc.getType();
466
467 // Only attempt this if we have simple aliasing of the vector elements.
468 // A badly fit destination size would result in an invalid cast.
469 unsigned SrcBits = SrcType->getScalarSizeInBits();
470 unsigned DstBits = DstType->getScalarSizeInBits();
471 unsigned TruncRatio = SrcBits / DstBits;
472 if ((SrcBits % DstBits) != 0)
473 return nullptr;
474
475 Value *VecOp;
476 ConstantInt *Cst;
477 const APInt *ShiftAmount = nullptr;
478 if (!match(V: Src, P: m_OneUse(SubPattern: m_ExtractElt(Val: m_Value(V&: VecOp), Idx: m_ConstantInt(CI&: Cst)))) &&
479 !match(V: Src,
480 P: m_OneUse(SubPattern: m_LShr(L: m_ExtractElt(Val: m_Value(V&: VecOp), Idx: m_ConstantInt(CI&: Cst)),
481 R: m_APInt(Res&: ShiftAmount)))))
482 return nullptr;
483
484 auto *VecOpTy = cast<VectorType>(Val: VecOp->getType());
485 auto VecElts = VecOpTy->getElementCount();
486
487 uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
488 uint64_t VecOpIdx = Cst->getZExtValue();
489 uint64_t NewIdx = IC.getDataLayout().isBigEndian()
490 ? (VecOpIdx + 1) * TruncRatio - 1
491 : VecOpIdx * TruncRatio;
492
493 // Adjust index by the whole number of truncated elements.
494 if (ShiftAmount) {
495 // Check shift amount is in range and shifts a whole number of truncated
496 // elements.
497 if (ShiftAmount->uge(RHS: SrcBits) || ShiftAmount->urem(RHS: DstBits) != 0)
498 return nullptr;
499
500 uint64_t IdxOfs = ShiftAmount->udiv(RHS: DstBits).getZExtValue();
501 NewIdx = IC.getDataLayout().isBigEndian() ? (NewIdx - IdxOfs)
502 : (NewIdx + IdxOfs);
503 }
504
505 assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
506 NewIdx <= std::numeric_limits<uint32_t>::max() && "overflow 32-bits");
507
508 auto *BitCastTo =
509 VectorType::get(ElementType: DstType, NumElements: BitCastNumElts, Scalable: VecElts.isScalable());
510 Value *BitCast = IC.Builder.CreateBitCast(V: VecOp, DestTy: BitCastTo);
511 return ExtractElementInst::Create(Vec: BitCast, Idx: IC.Builder.getInt32(C: NewIdx));
512}
513
514/// Funnel/Rotate left/right may occur in a wider type than necessary because of
515/// type promotion rules. Try to narrow the inputs and convert to funnel shift.
516Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
517 assert((isa<VectorType>(Trunc.getSrcTy()) ||
518 shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
519 "Don't narrow to an illegal scalar type");
520
521 // Bail out on strange types. It is possible to handle some of these patterns
522 // even with non-power-of-2 sizes, but it is not a likely scenario.
523 Type *DestTy = Trunc.getType();
524 unsigned NarrowWidth = DestTy->getScalarSizeInBits();
525 unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
526 if (!isPowerOf2_32(Value: NarrowWidth))
527 return nullptr;
528
529 // First, find an or'd pair of opposite shifts:
530 // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
531 BinaryOperator *Or0, *Or1;
532 if (!match(V: Trunc.getOperand(i_nocapture: 0), P: m_OneUse(SubPattern: m_Or(L: m_BinOp(I&: Or0), R: m_BinOp(I&: Or1)))))
533 return nullptr;
534
535 Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
536 if (!match(V: Or0, P: m_OneUse(SubPattern: m_LogicalShift(L: m_Value(V&: ShVal0), R: m_Value(V&: ShAmt0)))) ||
537 !match(V: Or1, P: m_OneUse(SubPattern: m_LogicalShift(L: m_Value(V&: ShVal1), R: m_Value(V&: ShAmt1)))) ||
538 Or0->getOpcode() == Or1->getOpcode())
539 return nullptr;
540
541 // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
542 if (Or0->getOpcode() == BinaryOperator::LShr) {
543 std::swap(a&: Or0, b&: Or1);
544 std::swap(a&: ShVal0, b&: ShVal1);
545 std::swap(a&: ShAmt0, b&: ShAmt1);
546 }
547 assert(Or0->getOpcode() == BinaryOperator::Shl &&
548 Or1->getOpcode() == BinaryOperator::LShr &&
549 "Illegal or(shift,shift) pair");
550
551 // Match the shift amount operands for a funnel/rotate pattern. This always
552 // matches a subtraction on the R operand.
553 auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
554 // The shift amounts may add up to the narrow bit width:
555 // (shl ShVal0, L) | (lshr ShVal1, Width - L)
556 // If this is a funnel shift (different operands are shifted), then the
557 // shift amount can not over-shift (create poison) in the narrow type.
558 unsigned MaxShiftAmountWidth = Log2_32(Value: NarrowWidth);
559 APInt HiBitMask = ~APInt::getLowBitsSet(numBits: WideWidth, loBitsSet: MaxShiftAmountWidth);
560 if (ShVal0 == ShVal1 || MaskedValueIsZero(V: L, Mask: HiBitMask))
561 if (match(V: R, P: m_OneUse(SubPattern: m_Sub(L: m_SpecificInt(V: Width), R: m_Specific(V: L)))))
562 return L;
563
564 // The following patterns currently only work for rotation patterns.
565 // TODO: Add more general funnel-shift compatible patterns.
566 if (ShVal0 != ShVal1)
567 return nullptr;
568
569 // The shift amount may be masked with negation:
570 // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))
571 Value *X;
572 unsigned Mask = Width - 1;
573 if (match(V: L, P: m_And(L: m_Value(V&: X), R: m_SpecificInt(V: Mask))) &&
574 match(V: R, P: m_And(L: m_Neg(V: m_Specific(V: X)), R: m_SpecificInt(V: Mask))))
575 return X;
576
577 // Same as above, but the shift amount may be extended after masking:
578 if (match(V: L, P: m_ZExt(Op: m_And(L: m_Value(V&: X), R: m_SpecificInt(V: Mask)))) &&
579 match(V: R, P: m_ZExt(Op: m_And(L: m_Neg(V: m_Specific(V: X)), R: m_SpecificInt(V: Mask)))))
580 return X;
581
582 return nullptr;
583 };
584
585 Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
586 bool IsFshl = true; // Sub on LSHR.
587 if (!ShAmt) {
588 ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
589 IsFshl = false; // Sub on SHL.
590 }
591 if (!ShAmt)
592 return nullptr;
593
594 // The right-shifted value must have high zeros in the wide type (for example
595 // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
596 // truncated, so those do not matter.
597 APInt HiBitMask = APInt::getHighBitsSet(numBits: WideWidth, hiBitsSet: WideWidth - NarrowWidth);
598 if (!MaskedValueIsZero(V: ShVal1, Mask: HiBitMask, CxtI: &Trunc))
599 return nullptr;
600
601 // Adjust the width of ShAmt for narrowed funnel shift operation:
602 // - Zero-extend if ShAmt is narrower than the destination type.
603 // - Truncate if ShAmt is wider, discarding non-significant high-order bits.
604 // This prepares ShAmt for llvm.fshl.i8(trunc(ShVal), trunc(ShVal),
605 // zext/trunc(ShAmt)).
606 Value *NarrowShAmt = Builder.CreateZExtOrTrunc(V: ShAmt, DestTy);
607
608 Value *X, *Y;
609 X = Y = Builder.CreateTrunc(V: ShVal0, DestTy);
610 if (ShVal0 != ShVal1)
611 Y = Builder.CreateTrunc(V: ShVal1, DestTy);
612 Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
613 Function *F =
614 Intrinsic::getOrInsertDeclaration(M: Trunc.getModule(), id: IID, Tys: DestTy);
615 return CallInst::Create(Func: F, Args: {X, Y, NarrowShAmt});
616}
617
618/// Try to narrow the width of math or bitwise logic instructions by pulling a
619/// truncate ahead of binary operators.
620Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
621 Type *SrcTy = Trunc.getSrcTy();
622 Type *DestTy = Trunc.getType();
623 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
624 unsigned DestWidth = DestTy->getScalarSizeInBits();
625
626 if (!isa<VectorType>(Val: SrcTy) && !shouldChangeType(From: SrcTy, To: DestTy))
627 return nullptr;
628
629 BinaryOperator *BinOp;
630 if (!match(V: Trunc.getOperand(i_nocapture: 0), P: m_OneUse(SubPattern: m_BinOp(I&: BinOp))))
631 return nullptr;
632
633 Value *BinOp0 = BinOp->getOperand(i_nocapture: 0);
634 Value *BinOp1 = BinOp->getOperand(i_nocapture: 1);
635 switch (BinOp->getOpcode()) {
636 case Instruction::And:
637 case Instruction::Or:
638 case Instruction::Xor:
639 case Instruction::Add:
640 case Instruction::Sub:
641 case Instruction::Mul: {
642 Constant *C;
643 if (match(V: BinOp0, P: m_Constant(C))) {
644 // trunc (binop C, X) --> binop (trunc C', X)
645 Constant *NarrowC = ConstantExpr::getTrunc(C, Ty: DestTy);
646 Value *TruncX = Builder.CreateTrunc(V: BinOp1, DestTy);
647 return BinaryOperator::Create(Op: BinOp->getOpcode(), S1: NarrowC, S2: TruncX);
648 }
649 if (match(V: BinOp1, P: m_Constant(C))) {
650 // trunc (binop X, C) --> binop (trunc X, C')
651 Constant *NarrowC = ConstantExpr::getTrunc(C, Ty: DestTy);
652 Value *TruncX = Builder.CreateTrunc(V: BinOp0, DestTy);
653 return BinaryOperator::Create(Op: BinOp->getOpcode(), S1: TruncX, S2: NarrowC);
654 }
655 Value *X;
656 if (match(V: BinOp0, P: m_ZExtOrSExt(Op: m_Value(V&: X))) && X->getType() == DestTy) {
657 // trunc (binop (ext X), Y) --> binop X, (trunc Y)
658 Value *NarrowOp1 = Builder.CreateTrunc(V: BinOp1, DestTy);
659 return BinaryOperator::Create(Op: BinOp->getOpcode(), S1: X, S2: NarrowOp1);
660 }
661 if (match(V: BinOp1, P: m_ZExtOrSExt(Op: m_Value(V&: X))) && X->getType() == DestTy) {
662 // trunc (binop Y, (ext X)) --> binop (trunc Y), X
663 Value *NarrowOp0 = Builder.CreateTrunc(V: BinOp0, DestTy);
664 return BinaryOperator::Create(Op: BinOp->getOpcode(), S1: NarrowOp0, S2: X);
665 }
666 break;
667 }
668 case Instruction::LShr:
669 case Instruction::AShr: {
670 // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
671 Value *A;
672 Constant *C;
673 if (match(V: BinOp0, P: m_Trunc(Op: m_Value(V&: A))) && match(V: BinOp1, P: m_Constant(C))) {
674 unsigned MaxShiftAmt = SrcWidth - DestWidth;
675 // If the shift is small enough, all zero/sign bits created by the shift
676 // are removed by the trunc.
677 if (match(V: C, P: m_SpecificInt_ICMP(Predicate: ICmpInst::ICMP_ULE,
678 Threshold: APInt(SrcWidth, MaxShiftAmt)))) {
679 auto *OldShift = cast<Instruction>(Val: Trunc.getOperand(i_nocapture: 0));
680 bool IsExact = OldShift->isExact();
681 if (Constant *ShAmt = ConstantFoldIntegerCast(C, DestTy: A->getType(),
682 /*IsSigned*/ true, DL)) {
683 ShAmt = Constant::mergeUndefsWith(C: ShAmt, Other: C);
684 Value *Shift =
685 OldShift->getOpcode() == Instruction::AShr
686 ? Builder.CreateAShr(LHS: A, RHS: ShAmt, Name: OldShift->getName(), isExact: IsExact)
687 : Builder.CreateLShr(LHS: A, RHS: ShAmt, Name: OldShift->getName(), isExact: IsExact);
688 return CastInst::CreateTruncOrBitCast(S: Shift, Ty: DestTy);
689 }
690 }
691 }
692 break;
693 }
694 default: break;
695 }
696
697 if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
698 return NarrowOr;
699
700 return nullptr;
701}
702
703/// Try to narrow the width of a splat shuffle. This could be generalized to any
704/// shuffle with a constant operand, but we limit the transform to avoid
705/// creating a shuffle type that targets may not be able to lower effectively.
706static Instruction *shrinkSplatShuffle(TruncInst &Trunc,
707 InstCombiner::BuilderTy &Builder) {
708 auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: Trunc.getOperand(i_nocapture: 0));
709 if (Shuf && Shuf->hasOneUse() && match(V: Shuf->getOperand(i_nocapture: 1), P: m_Undef()) &&
710 all_equal(Range: Shuf->getShuffleMask()) &&
711 Shuf->getType() == Shuf->getOperand(i_nocapture: 0)->getType()) {
712 // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask
713 // trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask
714 Value *NarrowOp = Builder.CreateTrunc(V: Shuf->getOperand(i_nocapture: 0), DestTy: Trunc.getType());
715 return new ShuffleVectorInst(NarrowOp, Shuf->getShuffleMask());
716 }
717
718 return nullptr;
719}
720
721/// Try to narrow the width of an insert element. This could be generalized for
722/// any vector constant, but we limit the transform to insertion into undef to
723/// avoid potential backend problems from unsupported insertion widths. This
724/// could also be extended to handle the case of inserting a scalar constant
725/// into a vector variable.
726static Instruction *shrinkInsertElt(CastInst &Trunc,
727 InstCombiner::BuilderTy &Builder) {
728 Instruction::CastOps Opcode = Trunc.getOpcode();
729 assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
730 "Unexpected instruction for shrinking");
731
732 auto *InsElt = dyn_cast<InsertElementInst>(Val: Trunc.getOperand(i_nocapture: 0));
733 if (!InsElt || !InsElt->hasOneUse())
734 return nullptr;
735
736 Type *DestTy = Trunc.getType();
737 Type *DestScalarTy = DestTy->getScalarType();
738 Value *VecOp = InsElt->getOperand(i_nocapture: 0);
739 Value *ScalarOp = InsElt->getOperand(i_nocapture: 1);
740 Value *Index = InsElt->getOperand(i_nocapture: 2);
741
742 if (match(V: VecOp, P: m_Undef())) {
743 // trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
744 // fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index
745 UndefValue *NarrowUndef = UndefValue::get(T: DestTy);
746 Value *NarrowOp = Builder.CreateCast(Op: Opcode, V: ScalarOp, DestTy: DestScalarTy);
747 return InsertElementInst::Create(Vec: NarrowUndef, NewElt: NarrowOp, Idx: Index);
748 }
749
750 return nullptr;
751}
752
753Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
754 if (Instruction *Result = commonCastTransforms(CI&: Trunc))
755 return Result;
756
757 Value *Src = Trunc.getOperand(i_nocapture: 0);
758 Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();
759 unsigned DestWidth = DestTy->getScalarSizeInBits();
760 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
761
762 // Attempt to truncate the entire input expression tree to the destination
763 // type. Only do this if the dest type is a simple type, don't convert the
764 // expression tree to something weird like i93 unless the source is also
765 // strange.
766 if ((DestTy->isVectorTy() || shouldChangeType(From: SrcTy, To: DestTy)) &&
767 canEvaluateTruncated(V: Src, Ty: DestTy, IC&: *this, CxtI: &Trunc)) {
768
769 // If this cast is a truncate, evaluting in a different type always
770 // eliminates the cast, so it is always a win.
771 LLVM_DEBUG(
772 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
773 " to avoid cast: "
774 << Trunc << '\n');
775 Value *Res = EvaluateInDifferentType(V: Src, Ty: DestTy, isSigned: false);
776 assert(Res->getType() == DestTy);
777 return replaceInstUsesWith(I&: Trunc, V: Res);
778 }
779
780 // For integer types, check if we can shorten the entire input expression to
781 // DestWidth * 2, which won't allow removing the truncate, but reducing the
782 // width may enable further optimizations, e.g. allowing for larger
783 // vectorization factors.
784 if (auto *DestITy = dyn_cast<IntegerType>(Val: DestTy)) {
785 if (DestWidth * 2 < SrcWidth) {
786 auto *NewDestTy = DestITy->getExtendedType();
787 if (shouldChangeType(From: SrcTy, To: NewDestTy) &&
788 canEvaluateTruncated(V: Src, Ty: NewDestTy, IC&: *this, CxtI: &Trunc)) {
789 LLVM_DEBUG(
790 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
791 " to reduce the width of operand of"
792 << Trunc << '\n');
793 Value *Res = EvaluateInDifferentType(V: Src, Ty: NewDestTy, isSigned: false);
794 return new TruncInst(Res, DestTy);
795 }
796 }
797 }
798
799 // See if we can simplify any instructions used by the input whose sole
800 // purpose is to compute bits we don't care about.
801 if (SimplifyDemandedInstructionBits(Inst&: Trunc))
802 return &Trunc;
803
804 if (DestWidth == 1) {
805 Value *Zero = Constant::getNullValue(Ty: SrcTy);
806
807 Value *X;
808 const APInt *C1;
809 Constant *C2;
810 if (match(V: Src, P: m_OneUse(SubPattern: m_Shr(L: m_Shl(L: m_Power2(V&: C1), R: m_Value(V&: X)),
811 R: m_ImmConstant(C&: C2))))) {
812 // trunc ((C1 << X) >> C2) to i1 --> X == (C2-cttz(C1)), where C1 is pow2
813 Constant *Log2C1 = ConstantInt::get(Ty: SrcTy, V: C1->exactLogBase2());
814 Constant *CmpC = ConstantExpr::getSub(C1: C2, C2: Log2C1);
815 return new ICmpInst(ICmpInst::ICMP_EQ, X, CmpC);
816 }
817
818 if (match(V: Src, P: m_Shr(L: m_Value(V&: X), R: m_SpecificInt(V: SrcWidth - 1)))) {
819 // trunc (ashr X, BW-1) to i1 --> icmp slt X, 0
820 // trunc (lshr X, BW-1) to i1 --> icmp slt X, 0
821 return new ICmpInst(ICmpInst::ICMP_SLT, X, Zero);
822 }
823
824 Constant *C;
825 if (match(V: Src, P: m_OneUse(SubPattern: m_LShr(L: m_Value(V&: X), R: m_ImmConstant(C))))) {
826 // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
827 Constant *One = ConstantInt::get(Ty: SrcTy, V: APInt(SrcWidth, 1));
828 Value *MaskC = Builder.CreateShl(LHS: One, RHS: C);
829 Value *And = Builder.CreateAnd(LHS: X, RHS: MaskC);
830 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
831 }
832 if (match(V: Src, P: m_OneUse(SubPattern: m_c_Or(L: m_LShr(L: m_Value(V&: X), R: m_ImmConstant(C)),
833 R: m_Deferred(V: X))))) {
834 // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
835 Constant *One = ConstantInt::get(Ty: SrcTy, V: APInt(SrcWidth, 1));
836 Value *MaskC = Builder.CreateShl(LHS: One, RHS: C);
837 Value *And = Builder.CreateAnd(LHS: X, RHS: Builder.CreateOr(LHS: MaskC, RHS: One));
838 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
839 }
840
841 {
842 const APInt *C;
843 if (match(V: Src, P: m_Shl(L: m_APInt(Res&: C), R: m_Value(V&: X))) && (*C)[0] == 1) {
844 // trunc (C << X) to i1 --> X == 0, where C is odd
845 return new ICmpInst(ICmpInst::Predicate::ICMP_EQ, X, Zero);
846 }
847 }
848
849 if (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) {
850 Value *X, *Y;
851 if (match(V: Src, P: m_Xor(L: m_Value(V&: X), R: m_Value(V&: Y))))
852 return new ICmpInst(ICmpInst::ICMP_NE, X, Y);
853 }
854 }
855
856 Value *A, *B;
857 Constant *C;
858 if (match(V: Src, P: m_LShr(L: m_SExt(Op: m_Value(V&: A)), R: m_Constant(C)))) {
859 unsigned AWidth = A->getType()->getScalarSizeInBits();
860 unsigned MaxShiftAmt = SrcWidth - std::max(a: DestWidth, b: AWidth);
861 auto *OldSh = cast<Instruction>(Val: Src);
862 bool IsExact = OldSh->isExact();
863
864 // If the shift is small enough, all zero bits created by the shift are
865 // removed by the trunc.
866 if (match(V: C, P: m_SpecificInt_ICMP(Predicate: ICmpInst::ICMP_ULE,
867 Threshold: APInt(SrcWidth, MaxShiftAmt)))) {
868 auto GetNewShAmt = [&](unsigned Width) {
869 Constant *MaxAmt = ConstantInt::get(Ty: SrcTy, V: Width - 1, IsSigned: false);
870 Constant *Cmp =
871 ConstantFoldCompareInstOperands(Predicate: ICmpInst::ICMP_ULT, LHS: C, RHS: MaxAmt, DL);
872 Constant *ShAmt = ConstantFoldSelectInstruction(Cond: Cmp, V1: C, V2: MaxAmt);
873 return ConstantFoldCastOperand(Opcode: Instruction::Trunc, C: ShAmt, DestTy: A->getType(),
874 DL);
875 };
876
877 // trunc (lshr (sext A), C) --> ashr A, C
878 if (A->getType() == DestTy) {
879 Constant *ShAmt = GetNewShAmt(DestWidth);
880 ShAmt = Constant::mergeUndefsWith(C: ShAmt, Other: C);
881 return IsExact ? BinaryOperator::CreateExactAShr(V1: A, V2: ShAmt)
882 : BinaryOperator::CreateAShr(V1: A, V2: ShAmt);
883 }
884 // The types are mismatched, so create a cast after shifting:
885 // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
886 if (Src->hasOneUse()) {
887 Constant *ShAmt = GetNewShAmt(AWidth);
888 Value *Shift = Builder.CreateAShr(LHS: A, RHS: ShAmt, Name: "", isExact: IsExact);
889 return CastInst::CreateIntegerCast(S: Shift, Ty: DestTy, isSigned: true);
890 }
891 }
892 // TODO: Mask high bits with 'and'.
893 }
894
895 if (Instruction *I = narrowBinOp(Trunc))
896 return I;
897
898 if (Instruction *I = shrinkSplatShuffle(Trunc, Builder))
899 return I;
900
901 if (Instruction *I = shrinkInsertElt(Trunc, Builder))
902 return I;
903
904 if (Src->hasOneUse() &&
905 (isa<VectorType>(Val: SrcTy) || shouldChangeType(From: SrcTy, To: DestTy))) {
906 // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
907 // dest type is native and cst < dest size.
908 if (match(V: Src, P: m_Shl(L: m_Value(V&: A), R: m_Constant(C))) &&
909 !match(V: A, P: m_Shr(L: m_Value(), R: m_Constant()))) {
910 // Skip shifts of shift by constants. It undoes a combine in
911 // FoldShiftByConstant and is the extend in reg pattern.
912 APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);
913 if (match(V: C, P: m_SpecificInt_ICMP(Predicate: ICmpInst::ICMP_ULT, Threshold))) {
914 Value *NewTrunc = Builder.CreateTrunc(V: A, DestTy, Name: A->getName() + ".tr");
915 return BinaryOperator::Create(Op: Instruction::Shl, S1: NewTrunc,
916 S2: ConstantExpr::getTrunc(C, Ty: DestTy));
917 }
918 }
919 }
920
921 if (Instruction *I = foldVecTruncToExtElt(Trunc, IC&: *this))
922 return I;
923
924 if (Instruction *I = foldVecExtTruncToExtElt(Trunc, IC&: *this))
925 return I;
926
927 // trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)
928 if (match(V: Src, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ctlz>(Op0: m_ZExt(Op: m_Value(V&: A)),
929 Op1: m_Value(V&: B))))) {
930 unsigned AWidth = A->getType()->getScalarSizeInBits();
931 if (AWidth == DestWidth && AWidth > Log2_32(Value: SrcWidth)) {
932 Value *WidthDiff = ConstantInt::get(Ty: A->getType(), V: SrcWidth - AWidth);
933 Value *NarrowCtlz =
934 Builder.CreateIntrinsic(ID: Intrinsic::ctlz, Types: {Trunc.getType()}, Args: {A, B});
935 return BinaryOperator::CreateAdd(V1: NarrowCtlz, V2: WidthDiff);
936 }
937 }
938
939 if (match(V: Src, P: m_VScale())) {
940 if (Trunc.getFunction() &&
941 Trunc.getFunction()->hasFnAttribute(Kind: Attribute::VScaleRange)) {
942 Attribute Attr =
943 Trunc.getFunction()->getFnAttribute(Kind: Attribute::VScaleRange);
944 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax())
945 if (Log2_32(Value: *MaxVScale) < DestWidth)
946 return replaceInstUsesWith(I&: Trunc, V: Builder.CreateVScale(Ty: DestTy));
947 }
948 }
949
950 if (DestWidth == 1 &&
951 (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) &&
952 isKnownNonZero(V: Src, Q: SQ.getWithInstruction(I: &Trunc)))
953 return replaceInstUsesWith(I&: Trunc, V: ConstantInt::getTrue(Ty: DestTy));
954
955 bool Changed = false;
956 if (!Trunc.hasNoSignedWrap() &&
957 ComputeMaxSignificantBits(Op: Src, CxtI: &Trunc) <= DestWidth) {
958 Trunc.setHasNoSignedWrap(true);
959 Changed = true;
960 }
961 if (!Trunc.hasNoUnsignedWrap() &&
962 MaskedValueIsZero(V: Src, Mask: APInt::getBitsSetFrom(numBits: SrcWidth, loBit: DestWidth),
963 CxtI: &Trunc)) {
964 Trunc.setHasNoUnsignedWrap(true);
965 Changed = true;
966 }
967
968 return Changed ? &Trunc : nullptr;
969}
970
971Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,
972 ZExtInst &Zext) {
973 // If we are just checking for a icmp eq of a single bit and zext'ing it
974 // to an integer, then shift the bit to the appropriate place and then
975 // cast to integer to avoid the comparison.
976
977 // FIXME: This set of transforms does not check for extra uses and/or creates
978 // an extra instruction (an optional final cast is not included
979 // in the transform comments). We may also want to favor icmp over
980 // shifts in cases of equal instructions because icmp has better
981 // analysis in general (invert the transform).
982
983 const APInt *Op1CV;
984 if (match(V: Cmp->getOperand(i_nocapture: 1), P: m_APInt(Res&: Op1CV))) {
985
986 // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
987 if (Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) {
988 Value *In = Cmp->getOperand(i_nocapture: 0);
989 Value *Sh = ConstantInt::get(Ty: In->getType(),
990 V: In->getType()->getScalarSizeInBits() - 1);
991 In = Builder.CreateLShr(LHS: In, RHS: Sh, Name: In->getName() + ".lobit");
992 if (In->getType() != Zext.getType())
993 In = Builder.CreateIntCast(V: In, DestTy: Zext.getType(), isSigned: false /*ZExt*/);
994
995 return replaceInstUsesWith(I&: Zext, V: In);
996 }
997
998 // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
999 // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
1000 // zext (X != 0) to i32 --> X iff X has only the low bit set.
1001 // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
1002
1003 if (Op1CV->isZero() && Cmp->isEquality()) {
1004 // Exactly 1 possible 1? But not the high-bit because that is
1005 // canonicalized to this form.
1006 KnownBits Known = computeKnownBits(V: Cmp->getOperand(i_nocapture: 0), CxtI: &Zext);
1007 APInt KnownZeroMask(~Known.Zero);
1008 uint32_t ShAmt = KnownZeroMask.logBase2();
1009 bool IsExpectShAmt = KnownZeroMask.isPowerOf2() &&
1010 (Zext.getType()->getScalarSizeInBits() != ShAmt + 1);
1011 if (IsExpectShAmt &&
1012 (Cmp->getOperand(i_nocapture: 0)->getType() == Zext.getType() ||
1013 Cmp->getPredicate() == ICmpInst::ICMP_NE || ShAmt == 0)) {
1014 Value *In = Cmp->getOperand(i_nocapture: 0);
1015 if (ShAmt) {
1016 // Perform a logical shr by shiftamt.
1017 // Insert the shift to put the result in the low bit.
1018 In = Builder.CreateLShr(LHS: In, RHS: ConstantInt::get(Ty: In->getType(), V: ShAmt),
1019 Name: In->getName() + ".lobit");
1020 }
1021
1022 // Toggle the low bit for "X == 0".
1023 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1024 In = Builder.CreateXor(LHS: In, RHS: ConstantInt::get(Ty: In->getType(), V: 1));
1025
1026 if (Zext.getType() == In->getType())
1027 return replaceInstUsesWith(I&: Zext, V: In);
1028
1029 Value *IntCast = Builder.CreateIntCast(V: In, DestTy: Zext.getType(), isSigned: false);
1030 return replaceInstUsesWith(I&: Zext, V: IntCast);
1031 }
1032 }
1033 }
1034
1035 if (Cmp->isEquality()) {
1036 // Test if a bit is clear/set using a shifted-one mask:
1037 // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
1038 // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
1039 Value *X, *ShAmt;
1040 if (Cmp->hasOneUse() && match(V: Cmp->getOperand(i_nocapture: 1), P: m_ZeroInt()) &&
1041 match(V: Cmp->getOperand(i_nocapture: 0),
1042 P: m_OneUse(SubPattern: m_c_And(L: m_Shl(L: m_One(), R: m_Value(V&: ShAmt)), R: m_Value(V&: X))))) {
1043 auto *And = cast<BinaryOperator>(Val: Cmp->getOperand(i_nocapture: 0));
1044 Value *Shift = And->getOperand(i_nocapture: X == And->getOperand(i_nocapture: 0) ? 1 : 0);
1045 if (Zext.getType() == And->getType() ||
1046 Cmp->getPredicate() != ICmpInst::ICMP_EQ || Shift->hasOneUse()) {
1047 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1048 X = Builder.CreateNot(V: X);
1049 Value *Lshr = Builder.CreateLShr(LHS: X, RHS: ShAmt);
1050 Value *And1 =
1051 Builder.CreateAnd(LHS: Lshr, RHS: ConstantInt::get(Ty: X->getType(), V: 1));
1052 return replaceInstUsesWith(
1053 I&: Zext, V: Builder.CreateZExtOrTrunc(V: And1, DestTy: Zext.getType()));
1054 }
1055 }
1056 }
1057
1058 return nullptr;
1059}
1060
1061/// Determine if the specified value can be computed in the specified wider type
1062/// and produce the same low bits. If not, return false.
1063///
1064/// If this function returns true, it can also return a non-zero number of bits
1065/// (in BitsToClear) which indicates that the value it computes is correct for
1066/// the zero extend, but that the additional BitsToClear bits need to be zero'd
1067/// out. For example, to promote something like:
1068///
1069/// %B = trunc i64 %A to i32
1070/// %C = lshr i32 %B, 8
1071/// %E = zext i32 %C to i64
1072///
1073/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
1074/// set to 8 to indicate that the promoted value needs to have bits 24-31
1075/// cleared in addition to bits 32-63. Since an 'and' will be generated to
1076/// clear the top bits anyway, doing this has no extra cost.
1077///
1078/// This function works on both vectors and scalars.
1079static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
1080 InstCombinerImpl &IC, Instruction *CxtI) {
1081 BitsToClear = 0;
1082 if (canAlwaysEvaluateInType(V, Ty))
1083 return true;
1084 if (canNotEvaluateInType(V, Ty))
1085 return false;
1086
1087 auto *I = cast<Instruction>(Val: V);
1088 unsigned Tmp;
1089 switch (I->getOpcode()) {
1090 case Instruction::ZExt: // zext(zext(x)) -> zext(x).
1091 case Instruction::SExt: // zext(sext(x)) -> sext(x).
1092 case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
1093 return true;
1094 case Instruction::And:
1095 case Instruction::Or:
1096 case Instruction::Xor:
1097 case Instruction::Add:
1098 case Instruction::Sub:
1099 case Instruction::Mul:
1100 if (!canEvaluateZExtd(V: I->getOperand(i: 0), Ty, BitsToClear, IC, CxtI) ||
1101 !canEvaluateZExtd(V: I->getOperand(i: 1), Ty, BitsToClear&: Tmp, IC, CxtI))
1102 return false;
1103 // These can all be promoted if neither operand has 'bits to clear'.
1104 if (BitsToClear == 0 && Tmp == 0)
1105 return true;
1106
1107 // If the operation is an AND/OR/XOR and the bits to clear are zero in the
1108 // other side, BitsToClear is ok.
1109 if (Tmp == 0 && I->isBitwiseLogicOp()) {
1110 // We use MaskedValueIsZero here for generality, but the case we care
1111 // about the most is constant RHS.
1112 unsigned VSize = V->getType()->getScalarSizeInBits();
1113 if (IC.MaskedValueIsZero(V: I->getOperand(i: 1),
1114 Mask: APInt::getHighBitsSet(numBits: VSize, hiBitsSet: BitsToClear),
1115 CxtI)) {
1116 // If this is an And instruction and all of the BitsToClear are
1117 // known to be zero we can reset BitsToClear.
1118 if (I->getOpcode() == Instruction::And)
1119 BitsToClear = 0;
1120 return true;
1121 }
1122 }
1123
1124 // Otherwise, we don't know how to analyze this BitsToClear case yet.
1125 return false;
1126
1127 case Instruction::Shl: {
1128 // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
1129 // upper bits we can reduce BitsToClear by the shift amount.
1130 const APInt *Amt;
1131 if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: Amt))) {
1132 if (!canEvaluateZExtd(V: I->getOperand(i: 0), Ty, BitsToClear, IC, CxtI))
1133 return false;
1134 uint64_t ShiftAmt = Amt->getZExtValue();
1135 BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
1136 return true;
1137 }
1138 return false;
1139 }
1140 case Instruction::LShr: {
1141 // We can promote lshr(x, cst) if we can promote x. This requires the
1142 // ultimate 'and' to clear out the high zero bits we're clearing out though.
1143 const APInt *Amt;
1144 if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: Amt))) {
1145 if (!canEvaluateZExtd(V: I->getOperand(i: 0), Ty, BitsToClear, IC, CxtI))
1146 return false;
1147 BitsToClear += Amt->getZExtValue();
1148 if (BitsToClear > V->getType()->getScalarSizeInBits())
1149 BitsToClear = V->getType()->getScalarSizeInBits();
1150 return true;
1151 }
1152 // Cannot promote variable LSHR.
1153 return false;
1154 }
1155 case Instruction::Select:
1156 if (!canEvaluateZExtd(V: I->getOperand(i: 1), Ty, BitsToClear&: Tmp, IC, CxtI) ||
1157 !canEvaluateZExtd(V: I->getOperand(i: 2), Ty, BitsToClear, IC, CxtI) ||
1158 // TODO: If important, we could handle the case when the BitsToClear are
1159 // known zero in the disagreeing side.
1160 Tmp != BitsToClear)
1161 return false;
1162 return true;
1163
1164 case Instruction::PHI: {
1165 // We can change a phi if we can change all operands. Note that we never
1166 // get into trouble with cyclic PHIs here because we only consider
1167 // instructions with a single use.
1168 PHINode *PN = cast<PHINode>(Val: I);
1169 if (!canEvaluateZExtd(V: PN->getIncomingValue(i: 0), Ty, BitsToClear, IC, CxtI))
1170 return false;
1171 for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
1172 if (!canEvaluateZExtd(V: PN->getIncomingValue(i), Ty, BitsToClear&: Tmp, IC, CxtI) ||
1173 // TODO: If important, we could handle the case when the BitsToClear
1174 // are known zero in the disagreeing input.
1175 Tmp != BitsToClear)
1176 return false;
1177 return true;
1178 }
1179 case Instruction::Call:
1180 // llvm.vscale() can always be executed in larger type, because the
1181 // value is automatically zero-extended.
1182 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I))
1183 if (II->getIntrinsicID() == Intrinsic::vscale)
1184 return true;
1185 return false;
1186 default:
1187 // TODO: Can handle more cases here.
1188 return false;
1189 }
1190}
1191
1192Instruction *InstCombinerImpl::visitZExt(ZExtInst &Zext) {
1193 // If this zero extend is only used by a truncate, let the truncate be
1194 // eliminated before we try to optimize this zext.
1195 if (Zext.hasOneUse() && isa<TruncInst>(Val: Zext.user_back()) &&
1196 !isa<Constant>(Val: Zext.getOperand(i_nocapture: 0)))
1197 return nullptr;
1198
1199 // If one of the common conversion will work, do it.
1200 if (Instruction *Result = commonCastTransforms(CI&: Zext))
1201 return Result;
1202
1203 Value *Src = Zext.getOperand(i_nocapture: 0);
1204 Type *SrcTy = Src->getType(), *DestTy = Zext.getType();
1205
1206 // zext nneg bool x -> 0
1207 if (SrcTy->isIntOrIntVectorTy(BitWidth: 1) && Zext.hasNonNeg())
1208 return replaceInstUsesWith(I&: Zext, V: Constant::getNullValue(Ty: Zext.getType()));
1209
1210 // Try to extend the entire expression tree to the wide destination type.
1211 unsigned BitsToClear;
1212 if (shouldChangeType(From: SrcTy, To: DestTy) &&
1213 canEvaluateZExtd(V: Src, Ty: DestTy, BitsToClear, IC&: *this, CxtI: &Zext)) {
1214 assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
1215 "Can't clear more bits than in SrcTy");
1216
1217 // Okay, we can transform this! Insert the new expression now.
1218 LLVM_DEBUG(
1219 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1220 " to avoid zero extend: "
1221 << Zext << '\n');
1222 Value *Res = EvaluateInDifferentType(V: Src, Ty: DestTy, isSigned: false);
1223 assert(Res->getType() == DestTy);
1224
1225 // Preserve debug values referring to Src if the zext is its last use.
1226 if (auto *SrcOp = dyn_cast<Instruction>(Val: Src))
1227 if (SrcOp->hasOneUse())
1228 replaceAllDbgUsesWith(From&: *SrcOp, To&: *Res, DomPoint&: Zext, DT);
1229
1230 uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits() - BitsToClear;
1231 uint32_t DestBitSize = DestTy->getScalarSizeInBits();
1232
1233 // If the high bits are already filled with zeros, just replace this
1234 // cast with the result.
1235 if (MaskedValueIsZero(
1236 V: Res, Mask: APInt::getHighBitsSet(numBits: DestBitSize, hiBitsSet: DestBitSize - SrcBitsKept),
1237 CxtI: &Zext))
1238 return replaceInstUsesWith(I&: Zext, V: Res);
1239
1240 // We need to emit an AND to clear the high bits.
1241 Constant *C = ConstantInt::get(Ty: Res->getType(),
1242 V: APInt::getLowBitsSet(numBits: DestBitSize, loBitsSet: SrcBitsKept));
1243 return BinaryOperator::CreateAnd(V1: Res, V2: C);
1244 }
1245
1246 // If this is a TRUNC followed by a ZEXT then we are dealing with integral
1247 // types and if the sizes are just right we can convert this into a logical
1248 // 'and' which will be much cheaper than the pair of casts.
1249 if (auto *CSrc = dyn_cast<TruncInst>(Val: Src)) { // A->B->C cast
1250 // TODO: Subsume this into EvaluateInDifferentType.
1251
1252 // Get the sizes of the types involved. We know that the intermediate type
1253 // will be smaller than A or C, but don't know the relation between A and C.
1254 Value *A = CSrc->getOperand(i_nocapture: 0);
1255 unsigned SrcSize = A->getType()->getScalarSizeInBits();
1256 unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
1257 unsigned DstSize = DestTy->getScalarSizeInBits();
1258 // If we're actually extending zero bits, then if
1259 // SrcSize < DstSize: zext(a & mask)
1260 // SrcSize == DstSize: a & mask
1261 // SrcSize > DstSize: trunc(a) & mask
1262 if (SrcSize < DstSize) {
1263 APInt AndValue(APInt::getLowBitsSet(numBits: SrcSize, loBitsSet: MidSize));
1264 Constant *AndConst = ConstantInt::get(Ty: A->getType(), V: AndValue);
1265 Value *And = Builder.CreateAnd(LHS: A, RHS: AndConst, Name: CSrc->getName() + ".mask");
1266 return new ZExtInst(And, DestTy);
1267 }
1268
1269 if (SrcSize == DstSize) {
1270 APInt AndValue(APInt::getLowBitsSet(numBits: SrcSize, loBitsSet: MidSize));
1271 return BinaryOperator::CreateAnd(V1: A, V2: ConstantInt::get(Ty: A->getType(),
1272 V: AndValue));
1273 }
1274 if (SrcSize > DstSize) {
1275 Value *Trunc = Builder.CreateTrunc(V: A, DestTy);
1276 APInt AndValue(APInt::getLowBitsSet(numBits: DstSize, loBitsSet: MidSize));
1277 return BinaryOperator::CreateAnd(V1: Trunc,
1278 V2: ConstantInt::get(Ty: Trunc->getType(),
1279 V: AndValue));
1280 }
1281 }
1282
1283 if (auto *Cmp = dyn_cast<ICmpInst>(Val: Src))
1284 return transformZExtICmp(Cmp, Zext);
1285
1286 // zext(trunc(X) & C) -> (X & zext(C)).
1287 Constant *C;
1288 Value *X;
1289 if (match(V: Src, P: m_OneUse(SubPattern: m_And(L: m_Trunc(Op: m_Value(V&: X)), R: m_Constant(C)))) &&
1290 X->getType() == DestTy)
1291 return BinaryOperator::CreateAnd(V1: X, V2: Builder.CreateZExt(V: C, DestTy));
1292
1293 // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
1294 Value *And;
1295 if (match(V: Src, P: m_OneUse(SubPattern: m_Xor(L: m_Value(V&: And), R: m_Constant(C)))) &&
1296 match(V: And, P: m_OneUse(SubPattern: m_And(L: m_Trunc(Op: m_Value(V&: X)), R: m_Specific(V: C)))) &&
1297 X->getType() == DestTy) {
1298 Value *ZC = Builder.CreateZExt(V: C, DestTy);
1299 return BinaryOperator::CreateXor(V1: Builder.CreateAnd(LHS: X, RHS: ZC), V2: ZC);
1300 }
1301
1302 // If we are truncating, masking, and then zexting back to the original type,
1303 // that's just a mask. This is not handled by canEvaluateZextd if the
1304 // intermediate values have extra uses. This could be generalized further for
1305 // a non-constant mask operand.
1306 // zext (and (trunc X), C) --> and X, (zext C)
1307 if (match(V: Src, P: m_And(L: m_Trunc(Op: m_Value(V&: X)), R: m_Constant(C))) &&
1308 X->getType() == DestTy) {
1309 Value *ZextC = Builder.CreateZExt(V: C, DestTy);
1310 return BinaryOperator::CreateAnd(V1: X, V2: ZextC);
1311 }
1312
1313 if (match(V: Src, P: m_VScale())) {
1314 if (Zext.getFunction() &&
1315 Zext.getFunction()->hasFnAttribute(Kind: Attribute::VScaleRange)) {
1316 Attribute Attr =
1317 Zext.getFunction()->getFnAttribute(Kind: Attribute::VScaleRange);
1318 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1319 unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
1320 if (Log2_32(Value: *MaxVScale) < TypeWidth)
1321 return replaceInstUsesWith(I&: Zext, V: Builder.CreateVScale(Ty: DestTy));
1322 }
1323 }
1324 }
1325
1326 if (!Zext.hasNonNeg()) {
1327 // If this zero extend is only used by a shift, add nneg flag.
1328 if (Zext.hasOneUse() &&
1329 SrcTy->getScalarSizeInBits() >
1330 Log2_64_Ceil(Value: DestTy->getScalarSizeInBits()) &&
1331 match(V: Zext.user_back(), P: m_Shift(L: m_Value(), R: m_Specific(V: &Zext)))) {
1332 Zext.setNonNeg();
1333 return &Zext;
1334 }
1335
1336 if (isKnownNonNegative(V: Src, SQ: SQ.getWithInstruction(I: &Zext))) {
1337 Zext.setNonNeg();
1338 return &Zext;
1339 }
1340 }
1341
1342 return nullptr;
1343}
1344
1345/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
1346Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp,
1347 SExtInst &Sext) {
1348 Value *Op0 = Cmp->getOperand(i_nocapture: 0), *Op1 = Cmp->getOperand(i_nocapture: 1);
1349 ICmpInst::Predicate Pred = Cmp->getPredicate();
1350
1351 // Don't bother if Op1 isn't of vector or integer type.
1352 if (!Op1->getType()->isIntOrIntVectorTy())
1353 return nullptr;
1354
1355 if (Pred == ICmpInst::ICMP_SLT && match(V: Op1, P: m_ZeroInt())) {
1356 // sext (x <s 0) --> ashr x, 31 (all ones if negative)
1357 Value *Sh = ConstantInt::get(Ty: Op0->getType(),
1358 V: Op0->getType()->getScalarSizeInBits() - 1);
1359 Value *In = Builder.CreateAShr(LHS: Op0, RHS: Sh, Name: Op0->getName() + ".lobit");
1360 if (In->getType() != Sext.getType())
1361 In = Builder.CreateIntCast(V: In, DestTy: Sext.getType(), isSigned: true /*SExt*/);
1362
1363 return replaceInstUsesWith(I&: Sext, V: In);
1364 }
1365
1366 if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Val: Op1)) {
1367 // If we know that only one bit of the LHS of the icmp can be set and we
1368 // have an equality comparison with zero or a power of 2, we can transform
1369 // the icmp and sext into bitwise/integer operations.
1370 if (Cmp->hasOneUse() &&
1371 Cmp->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
1372 KnownBits Known = computeKnownBits(V: Op0, CxtI: &Sext);
1373
1374 APInt KnownZeroMask(~Known.Zero);
1375 if (KnownZeroMask.isPowerOf2()) {
1376 Value *In = Cmp->getOperand(i_nocapture: 0);
1377
1378 // If the icmp tests for a known zero bit we can constant fold it.
1379 if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
1380 Value *V = Pred == ICmpInst::ICMP_NE ?
1381 ConstantInt::getAllOnesValue(Ty: Sext.getType()) :
1382 ConstantInt::getNullValue(Ty: Sext.getType());
1383 return replaceInstUsesWith(I&: Sext, V);
1384 }
1385
1386 if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
1387 // sext ((x & 2^n) == 0) -> (x >> n) - 1
1388 // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
1389 unsigned ShiftAmt = KnownZeroMask.countr_zero();
1390 // Perform a right shift to place the desired bit in the LSB.
1391 if (ShiftAmt)
1392 In = Builder.CreateLShr(LHS: In,
1393 RHS: ConstantInt::get(Ty: In->getType(), V: ShiftAmt));
1394
1395 // At this point "In" is either 1 or 0. Subtract 1 to turn
1396 // {1, 0} -> {0, -1}.
1397 In = Builder.CreateAdd(LHS: In,
1398 RHS: ConstantInt::getAllOnesValue(Ty: In->getType()),
1399 Name: "sext");
1400 } else {
1401 // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
1402 // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
1403 unsigned ShiftAmt = KnownZeroMask.countl_zero();
1404 // Perform a left shift to place the desired bit in the MSB.
1405 if (ShiftAmt)
1406 In = Builder.CreateShl(LHS: In,
1407 RHS: ConstantInt::get(Ty: In->getType(), V: ShiftAmt));
1408
1409 // Distribute the bit over the whole bit width.
1410 In = Builder.CreateAShr(LHS: In, RHS: ConstantInt::get(Ty: In->getType(),
1411 V: KnownZeroMask.getBitWidth() - 1), Name: "sext");
1412 }
1413
1414 if (Sext.getType() == In->getType())
1415 return replaceInstUsesWith(I&: Sext, V: In);
1416 return CastInst::CreateIntegerCast(S: In, Ty: Sext.getType(), isSigned: true/*SExt*/);
1417 }
1418 }
1419 }
1420
1421 return nullptr;
1422}
1423
1424/// Return true if we can take the specified value and return it as type Ty
1425/// without inserting any new casts and without changing the value of the common
1426/// low bits. This is used by code that tries to promote integer operations to
1427/// a wider types will allow us to eliminate the extension.
1428///
1429/// This function works on both vectors and scalars.
1430///
1431static bool canEvaluateSExtd(Value *V, Type *Ty) {
1432 assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
1433 "Can't sign extend type to a smaller type");
1434 if (canAlwaysEvaluateInType(V, Ty))
1435 return true;
1436 if (canNotEvaluateInType(V, Ty))
1437 return false;
1438
1439 auto *I = cast<Instruction>(Val: V);
1440 switch (I->getOpcode()) {
1441 case Instruction::SExt: // sext(sext(x)) -> sext(x)
1442 case Instruction::ZExt: // sext(zext(x)) -> zext(x)
1443 case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
1444 return true;
1445 case Instruction::And:
1446 case Instruction::Or:
1447 case Instruction::Xor:
1448 case Instruction::Add:
1449 case Instruction::Sub:
1450 case Instruction::Mul:
1451 // These operators can all arbitrarily be extended if their inputs can.
1452 return canEvaluateSExtd(V: I->getOperand(i: 0), Ty) &&
1453 canEvaluateSExtd(V: I->getOperand(i: 1), Ty);
1454
1455 //case Instruction::Shl: TODO
1456 //case Instruction::LShr: TODO
1457
1458 case Instruction::Select:
1459 return canEvaluateSExtd(V: I->getOperand(i: 1), Ty) &&
1460 canEvaluateSExtd(V: I->getOperand(i: 2), Ty);
1461
1462 case Instruction::PHI: {
1463 // We can change a phi if we can change all operands. Note that we never
1464 // get into trouble with cyclic PHIs here because we only consider
1465 // instructions with a single use.
1466 PHINode *PN = cast<PHINode>(Val: I);
1467 for (Value *IncValue : PN->incoming_values())
1468 if (!canEvaluateSExtd(V: IncValue, Ty)) return false;
1469 return true;
1470 }
1471 default:
1472 // TODO: Can handle more cases here.
1473 break;
1474 }
1475
1476 return false;
1477}
1478
1479Instruction *InstCombinerImpl::visitSExt(SExtInst &Sext) {
1480 // If this sign extend is only used by a truncate, let the truncate be
1481 // eliminated before we try to optimize this sext.
1482 if (Sext.hasOneUse() && isa<TruncInst>(Val: Sext.user_back()))
1483 return nullptr;
1484
1485 if (Instruction *I = commonCastTransforms(CI&: Sext))
1486 return I;
1487
1488 Value *Src = Sext.getOperand(i_nocapture: 0);
1489 Type *SrcTy = Src->getType(), *DestTy = Sext.getType();
1490 unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
1491 unsigned DestBitSize = DestTy->getScalarSizeInBits();
1492
1493 // If the value being extended is zero or positive, use a zext instead.
1494 if (isKnownNonNegative(V: Src, SQ: SQ.getWithInstruction(I: &Sext))) {
1495 auto CI = CastInst::Create(Instruction::ZExt, S: Src, Ty: DestTy);
1496 CI->setNonNeg(true);
1497 return CI;
1498 }
1499
1500 // Try to extend the entire expression tree to the wide destination type.
1501 if (shouldChangeType(From: SrcTy, To: DestTy) && canEvaluateSExtd(V: Src, Ty: DestTy)) {
1502 // Okay, we can transform this! Insert the new expression now.
1503 LLVM_DEBUG(
1504 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1505 " to avoid sign extend: "
1506 << Sext << '\n');
1507 Value *Res = EvaluateInDifferentType(V: Src, Ty: DestTy, isSigned: true);
1508 assert(Res->getType() == DestTy);
1509
1510 // If the high bits are already filled with sign bit, just replace this
1511 // cast with the result.
1512 if (ComputeNumSignBits(Op: Res, CxtI: &Sext) > DestBitSize - SrcBitSize)
1513 return replaceInstUsesWith(I&: Sext, V: Res);
1514
1515 // We need to emit a shl + ashr to do the sign extend.
1516 Value *ShAmt = ConstantInt::get(Ty: DestTy, V: DestBitSize-SrcBitSize);
1517 return BinaryOperator::CreateAShr(V1: Builder.CreateShl(LHS: Res, RHS: ShAmt, Name: "sext"),
1518 V2: ShAmt);
1519 }
1520
1521 Value *X;
1522 if (match(V: Src, P: m_Trunc(Op: m_Value(V&: X)))) {
1523 // If the input has more sign bits than bits truncated, then convert
1524 // directly to final type.
1525 unsigned XBitSize = X->getType()->getScalarSizeInBits();
1526 if (ComputeNumSignBits(Op: X, CxtI: &Sext) > XBitSize - SrcBitSize)
1527 return CastInst::CreateIntegerCast(S: X, Ty: DestTy, /* isSigned */ true);
1528
1529 // If input is a trunc from the destination type, then convert into shifts.
1530 if (Src->hasOneUse() && X->getType() == DestTy) {
1531 // sext (trunc X) --> ashr (shl X, C), C
1532 Constant *ShAmt = ConstantInt::get(Ty: DestTy, V: DestBitSize - SrcBitSize);
1533 return BinaryOperator::CreateAShr(V1: Builder.CreateShl(LHS: X, RHS: ShAmt), V2: ShAmt);
1534 }
1535
1536 // If we are replacing shifted-in high zero bits with sign bits, convert
1537 // the logic shift to arithmetic shift and eliminate the cast to
1538 // intermediate type:
1539 // sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)
1540 Value *Y;
1541 if (Src->hasOneUse() &&
1542 match(V: X, P: m_LShr(L: m_Value(V&: Y),
1543 R: m_SpecificIntAllowPoison(V: XBitSize - SrcBitSize)))) {
1544 Value *Ashr = Builder.CreateAShr(LHS: Y, RHS: XBitSize - SrcBitSize);
1545 return CastInst::CreateIntegerCast(S: Ashr, Ty: DestTy, /* isSigned */ true);
1546 }
1547 }
1548
1549 if (auto *Cmp = dyn_cast<ICmpInst>(Val: Src))
1550 return transformSExtICmp(Cmp, Sext);
1551
1552 // If the input is a shl/ashr pair of a same constant, then this is a sign
1553 // extension from a smaller value. If we could trust arbitrary bitwidth
1554 // integers, we could turn this into a truncate to the smaller bit and then
1555 // use a sext for the whole extension. Since we don't, look deeper and check
1556 // for a truncate. If the source and dest are the same type, eliminate the
1557 // trunc and extend and just do shifts. For example, turn:
1558 // %a = trunc i32 %i to i8
1559 // %b = shl i8 %a, C
1560 // %c = ashr i8 %b, C
1561 // %d = sext i8 %c to i32
1562 // into:
1563 // %a = shl i32 %i, 32-(8-C)
1564 // %d = ashr i32 %a, 32-(8-C)
1565 Value *A = nullptr;
1566 // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1567 Constant *BA = nullptr, *CA = nullptr;
1568 if (match(V: Src, P: m_AShr(L: m_Shl(L: m_Trunc(Op: m_Value(V&: A)), R: m_Constant(C&: BA)),
1569 R: m_ImmConstant(C&: CA))) &&
1570 BA->isElementWiseEqual(Y: CA) && A->getType() == DestTy) {
1571 Constant *WideCurrShAmt =
1572 ConstantFoldCastOperand(Opcode: Instruction::SExt, C: CA, DestTy, DL);
1573 assert(WideCurrShAmt && "Constant folding of ImmConstant cannot fail");
1574 Constant *NumLowbitsLeft = ConstantExpr::getSub(
1575 C1: ConstantInt::get(Ty: DestTy, V: SrcTy->getScalarSizeInBits()), C2: WideCurrShAmt);
1576 Constant *NewShAmt = ConstantExpr::getSub(
1577 C1: ConstantInt::get(Ty: DestTy, V: DestTy->getScalarSizeInBits()),
1578 C2: NumLowbitsLeft);
1579 NewShAmt =
1580 Constant::mergeUndefsWith(C: Constant::mergeUndefsWith(C: NewShAmt, Other: BA), Other: CA);
1581 A = Builder.CreateShl(LHS: A, RHS: NewShAmt, Name: Sext.getName());
1582 return BinaryOperator::CreateAShr(V1: A, V2: NewShAmt);
1583 }
1584
1585 // Splatting a bit of constant-index across a value:
1586 // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
1587 // If the dest type is different, use a cast (adjust use check).
1588 if (match(V: Src, P: m_OneUse(SubPattern: m_AShr(L: m_Trunc(Op: m_Value(V&: X)),
1589 R: m_SpecificInt(V: SrcBitSize - 1))))) {
1590 Type *XTy = X->getType();
1591 unsigned XBitSize = XTy->getScalarSizeInBits();
1592 Constant *ShlAmtC = ConstantInt::get(Ty: XTy, V: XBitSize - SrcBitSize);
1593 Constant *AshrAmtC = ConstantInt::get(Ty: XTy, V: XBitSize - 1);
1594 if (XTy == DestTy)
1595 return BinaryOperator::CreateAShr(V1: Builder.CreateShl(LHS: X, RHS: ShlAmtC),
1596 V2: AshrAmtC);
1597 if (cast<BinaryOperator>(Val: Src)->getOperand(i_nocapture: 0)->hasOneUse()) {
1598 Value *Ashr = Builder.CreateAShr(LHS: Builder.CreateShl(LHS: X, RHS: ShlAmtC), RHS: AshrAmtC);
1599 return CastInst::CreateIntegerCast(S: Ashr, Ty: DestTy, /* isSigned */ true);
1600 }
1601 }
1602
1603 if (match(V: Src, P: m_VScale())) {
1604 if (Sext.getFunction() &&
1605 Sext.getFunction()->hasFnAttribute(Kind: Attribute::VScaleRange)) {
1606 Attribute Attr =
1607 Sext.getFunction()->getFnAttribute(Kind: Attribute::VScaleRange);
1608 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax())
1609 if (Log2_32(Value: *MaxVScale) < (SrcBitSize - 1))
1610 return replaceInstUsesWith(I&: Sext, V: Builder.CreateVScale(Ty: DestTy));
1611 }
1612 }
1613
1614 return nullptr;
1615}
1616
1617/// Return a Constant* for the specified floating-point constant if it fits
1618/// in the specified FP type without changing its value.
1619static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
1620 bool losesInfo;
1621 APFloat F = CFP->getValueAPF();
1622 (void)F.convert(ToSemantics: Sem, RM: APFloat::rmNearestTiesToEven, losesInfo: &losesInfo);
1623 return !losesInfo;
1624}
1625
1626static Type *shrinkFPConstant(ConstantFP *CFP, bool PreferBFloat) {
1627 if (CFP->getType() == Type::getPPC_FP128Ty(C&: CFP->getContext()))
1628 return nullptr; // No constant folding of this.
1629 // See if the value can be truncated to bfloat and then reextended.
1630 if (PreferBFloat && fitsInFPType(CFP, Sem: APFloat::BFloat()))
1631 return Type::getBFloatTy(C&: CFP->getContext());
1632 // See if the value can be truncated to half and then reextended.
1633 if (!PreferBFloat && fitsInFPType(CFP, Sem: APFloat::IEEEhalf()))
1634 return Type::getHalfTy(C&: CFP->getContext());
1635 // See if the value can be truncated to float and then reextended.
1636 if (fitsInFPType(CFP, Sem: APFloat::IEEEsingle()))
1637 return Type::getFloatTy(C&: CFP->getContext());
1638 if (CFP->getType()->isDoubleTy())
1639 return nullptr; // Won't shrink.
1640 if (fitsInFPType(CFP, Sem: APFloat::IEEEdouble()))
1641 return Type::getDoubleTy(C&: CFP->getContext());
1642 // Don't try to shrink to various long double types.
1643 return nullptr;
1644}
1645
1646// Determine if this is a vector of ConstantFPs and if so, return the minimal
1647// type we can safely truncate all elements to.
1648static Type *shrinkFPConstantVector(Value *V, bool PreferBFloat) {
1649 auto *CV = dyn_cast<Constant>(Val: V);
1650 auto *CVVTy = dyn_cast<FixedVectorType>(Val: V->getType());
1651 if (!CV || !CVVTy)
1652 return nullptr;
1653
1654 Type *MinType = nullptr;
1655
1656 unsigned NumElts = CVVTy->getNumElements();
1657
1658 // For fixed-width vectors we find the minimal type by looking
1659 // through the constant values of the vector.
1660 for (unsigned i = 0; i != NumElts; ++i) {
1661 if (isa<UndefValue>(Val: CV->getAggregateElement(Elt: i)))
1662 continue;
1663
1664 auto *CFP = dyn_cast_or_null<ConstantFP>(Val: CV->getAggregateElement(Elt: i));
1665 if (!CFP)
1666 return nullptr;
1667
1668 Type *T = shrinkFPConstant(CFP, PreferBFloat);
1669 if (!T)
1670 return nullptr;
1671
1672 // If we haven't found a type yet or this type has a larger mantissa than
1673 // our previous type, this is our new minimal type.
1674 if (!MinType || T->getFPMantissaWidth() > MinType->getFPMantissaWidth())
1675 MinType = T;
1676 }
1677
1678 // Make a vector type from the minimal type.
1679 return MinType ? FixedVectorType::get(ElementType: MinType, NumElts) : nullptr;
1680}
1681
1682/// Find the minimum FP type we can safely truncate to.
1683static Type *getMinimumFPType(Value *V, bool PreferBFloat) {
1684 if (auto *FPExt = dyn_cast<FPExtInst>(Val: V))
1685 return FPExt->getOperand(i_nocapture: 0)->getType();
1686
1687 // If this value is a constant, return the constant in the smallest FP type
1688 // that can accurately represent it. This allows us to turn
1689 // (float)((double)X+2.0) into x+2.0f.
1690 if (auto *CFP = dyn_cast<ConstantFP>(Val: V))
1691 if (Type *T = shrinkFPConstant(CFP, PreferBFloat))
1692 return T;
1693
1694 // Try to shrink scalable and fixed splat vectors.
1695 if (auto *FPC = dyn_cast<Constant>(Val: V))
1696 if (isa<VectorType>(Val: V->getType()))
1697 if (auto *Splat = dyn_cast_or_null<ConstantFP>(Val: FPC->getSplatValue()))
1698 if (Type *T = shrinkFPConstant(CFP: Splat, PreferBFloat))
1699 return T;
1700
1701 // Try to shrink a vector of FP constants. This returns nullptr on scalable
1702 // vectors
1703 if (Type *T = shrinkFPConstantVector(V, PreferBFloat))
1704 return T;
1705
1706 return V->getType();
1707}
1708
1709/// Return true if the cast from integer to FP can be proven to be exact for all
1710/// possible inputs (the conversion does not lose any precision).
1711static bool isKnownExactCastIntToFP(CastInst &I, InstCombinerImpl &IC) {
1712 CastInst::CastOps Opcode = I.getOpcode();
1713 assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
1714 "Unexpected cast");
1715 Value *Src = I.getOperand(i_nocapture: 0);
1716 Type *SrcTy = Src->getType();
1717 Type *FPTy = I.getType();
1718 bool IsSigned = Opcode == Instruction::SIToFP;
1719 int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
1720
1721 // Easy case - if the source integer type has less bits than the FP mantissa,
1722 // then the cast must be exact.
1723 int DestNumSigBits = FPTy->getFPMantissaWidth();
1724 if (SrcSize <= DestNumSigBits)
1725 return true;
1726
1727 // Cast from FP to integer and back to FP is independent of the intermediate
1728 // integer width because of poison on overflow.
1729 Value *F;
1730 if (match(V: Src, P: m_FPToSI(Op: m_Value(V&: F))) || match(V: Src, P: m_FPToUI(Op: m_Value(V&: F)))) {
1731 // If this is uitofp (fptosi F), the source needs an extra bit to avoid
1732 // potential rounding of negative FP input values.
1733 int SrcNumSigBits = F->getType()->getFPMantissaWidth();
1734 if (!IsSigned && match(V: Src, P: m_FPToSI(Op: m_Value())))
1735 SrcNumSigBits++;
1736
1737 // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
1738 // significant bits than the destination (and make sure neither type is
1739 // weird -- ppc_fp128).
1740 if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
1741 SrcNumSigBits <= DestNumSigBits)
1742 return true;
1743 }
1744
1745 // TODO:
1746 // Try harder to find if the source integer type has less significant bits.
1747 // For example, compute number of sign bits.
1748 KnownBits SrcKnown = IC.computeKnownBits(V: Src, CxtI: &I);
1749 int SigBits = (int)SrcTy->getScalarSizeInBits() -
1750 SrcKnown.countMinLeadingZeros() -
1751 SrcKnown.countMinTrailingZeros();
1752 if (SigBits <= DestNumSigBits)
1753 return true;
1754
1755 return false;
1756}
1757
1758Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) {
1759 if (Instruction *I = commonCastTransforms(CI&: FPT))
1760 return I;
1761
1762 // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
1763 // simplify this expression to avoid one or more of the trunc/extend
1764 // operations if we can do so without changing the numerical results.
1765 //
1766 // The exact manner in which the widths of the operands interact to limit
1767 // what we can and cannot do safely varies from operation to operation, and
1768 // is explained below in the various case statements.
1769 Type *Ty = FPT.getType();
1770 auto *BO = dyn_cast<BinaryOperator>(Val: FPT.getOperand(i_nocapture: 0));
1771 if (BO && BO->hasOneUse()) {
1772 Type *LHSMinType =
1773 getMinimumFPType(V: BO->getOperand(i_nocapture: 0), /*PreferBFloat=*/Ty->isBFloatTy());
1774 Type *RHSMinType =
1775 getMinimumFPType(V: BO->getOperand(i_nocapture: 1), /*PreferBFloat=*/Ty->isBFloatTy());
1776 unsigned OpWidth = BO->getType()->getFPMantissaWidth();
1777 unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
1778 unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
1779 unsigned SrcWidth = std::max(a: LHSWidth, b: RHSWidth);
1780 unsigned DstWidth = Ty->getFPMantissaWidth();
1781 switch (BO->getOpcode()) {
1782 default: break;
1783 case Instruction::FAdd:
1784 case Instruction::FSub:
1785 // For addition and subtraction, the infinitely precise result can
1786 // essentially be arbitrarily wide; proving that double rounding
1787 // will not occur because the result of OpI is exact (as we will for
1788 // FMul, for example) is hopeless. However, we *can* nonetheless
1789 // frequently know that double rounding cannot occur (or that it is
1790 // innocuous) by taking advantage of the specific structure of
1791 // infinitely-precise results that admit double rounding.
1792 //
1793 // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
1794 // to represent both sources, we can guarantee that the double
1795 // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
1796 // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
1797 // for proof of this fact).
1798 //
1799 // Note: Figueroa does not consider the case where DstFormat !=
1800 // SrcFormat. It's possible (likely even!) that this analysis
1801 // could be tightened for those cases, but they are rare (the main
1802 // case of interest here is (float)((double)float + float)).
1803 if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
1804 Value *LHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: 0), DestTy: Ty);
1805 Value *RHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: 1), DestTy: Ty);
1806 Instruction *RI = BinaryOperator::Create(Op: BO->getOpcode(), S1: LHS, S2: RHS);
1807 RI->copyFastMathFlags(I: BO);
1808 return RI;
1809 }
1810 break;
1811 case Instruction::FMul:
1812 // For multiplication, the infinitely precise result has at most
1813 // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
1814 // that such a value can be exactly represented, then no double
1815 // rounding can possibly occur; we can safely perform the operation
1816 // in the destination format if it can represent both sources.
1817 if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
1818 Value *LHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: 0), DestTy: Ty);
1819 Value *RHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: 1), DestTy: Ty);
1820 return BinaryOperator::CreateFMulFMF(V1: LHS, V2: RHS, FMFSource: BO);
1821 }
1822 break;
1823 case Instruction::FDiv:
1824 // For division, we use again use the bound from Figueroa's
1825 // dissertation. I am entirely certain that this bound can be
1826 // tightened in the unbalanced operand case by an analysis based on
1827 // the diophantine rational approximation bound, but the well-known
1828 // condition used here is a good conservative first pass.
1829 // TODO: Tighten bound via rigorous analysis of the unbalanced case.
1830 if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
1831 Value *LHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: 0), DestTy: Ty);
1832 Value *RHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: 1), DestTy: Ty);
1833 return BinaryOperator::CreateFDivFMF(V1: LHS, V2: RHS, FMFSource: BO);
1834 }
1835 break;
1836 case Instruction::FRem: {
1837 // Remainder is straightforward. Remainder is always exact, so the
1838 // type of OpI doesn't enter into things at all. We simply evaluate
1839 // in whichever source type is larger, then convert to the
1840 // destination type.
1841 if (SrcWidth == OpWidth)
1842 break;
1843 Value *LHS, *RHS;
1844 if (LHSWidth == SrcWidth) {
1845 LHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: 0), DestTy: LHSMinType);
1846 RHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: 1), DestTy: LHSMinType);
1847 } else {
1848 LHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: 0), DestTy: RHSMinType);
1849 RHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: 1), DestTy: RHSMinType);
1850 }
1851
1852 Value *ExactResult = Builder.CreateFRemFMF(L: LHS, R: RHS, FMFSource: BO);
1853 return CastInst::CreateFPCast(S: ExactResult, Ty);
1854 }
1855 }
1856 }
1857
1858 // (fptrunc (fneg x)) -> (fneg (fptrunc x))
1859 Value *X;
1860 Instruction *Op = dyn_cast<Instruction>(Val: FPT.getOperand(i_nocapture: 0));
1861 if (Op && Op->hasOneUse()) {
1862 FastMathFlags FMF = FPT.getFastMathFlags();
1863 if (auto *FPMO = dyn_cast<FPMathOperator>(Val: Op))
1864 FMF &= FPMO->getFastMathFlags();
1865
1866 if (match(V: Op, P: m_FNeg(X: m_Value(V&: X)))) {
1867 Value *InnerTrunc = Builder.CreateFPTruncFMF(V: X, DestTy: Ty, FMFSource: FMF);
1868 Value *Neg = Builder.CreateFNegFMF(V: InnerTrunc, FMFSource: FMF);
1869 return replaceInstUsesWith(I&: FPT, V: Neg);
1870 }
1871
1872 // If we are truncating a select that has an extended operand, we can
1873 // narrow the other operand and do the select as a narrow op.
1874 Value *Cond, *X, *Y;
1875 if (match(V: Op, P: m_Select(C: m_Value(V&: Cond), L: m_FPExt(Op: m_Value(V&: X)), R: m_Value(V&: Y))) &&
1876 X->getType() == Ty) {
1877 // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
1878 Value *NarrowY = Builder.CreateFPTruncFMF(V: Y, DestTy: Ty, FMFSource: FMF);
1879 Value *Sel =
1880 Builder.CreateSelectFMF(C: Cond, True: X, False: NarrowY, FMFSource: FMF, Name: "narrow.sel", MDFrom: Op);
1881 return replaceInstUsesWith(I&: FPT, V: Sel);
1882 }
1883 if (match(V: Op, P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: Y), R: m_FPExt(Op: m_Value(V&: X)))) &&
1884 X->getType() == Ty) {
1885 // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
1886 Value *NarrowY = Builder.CreateFPTruncFMF(V: Y, DestTy: Ty, FMFSource: FMF);
1887 Value *Sel =
1888 Builder.CreateSelectFMF(C: Cond, True: NarrowY, False: X, FMFSource: FMF, Name: "narrow.sel", MDFrom: Op);
1889 return replaceInstUsesWith(I&: FPT, V: Sel);
1890 }
1891 }
1892
1893 if (auto *II = dyn_cast<IntrinsicInst>(Val: FPT.getOperand(i_nocapture: 0))) {
1894 switch (II->getIntrinsicID()) {
1895 default: break;
1896 case Intrinsic::ceil:
1897 case Intrinsic::fabs:
1898 case Intrinsic::floor:
1899 case Intrinsic::nearbyint:
1900 case Intrinsic::rint:
1901 case Intrinsic::round:
1902 case Intrinsic::roundeven:
1903 case Intrinsic::trunc: {
1904 Value *Src = II->getArgOperand(i: 0);
1905 if (!Src->hasOneUse())
1906 break;
1907
1908 // Except for fabs, this transformation requires the input of the unary FP
1909 // operation to be itself an fpext from the type to which we're
1910 // truncating.
1911 if (II->getIntrinsicID() != Intrinsic::fabs) {
1912 FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Val: Src);
1913 if (!FPExtSrc || FPExtSrc->getSrcTy() != Ty)
1914 break;
1915 }
1916
1917 // Do unary FP operation on smaller type.
1918 // (fptrunc (fabs x)) -> (fabs (fptrunc x))
1919 Value *InnerTrunc = Builder.CreateFPTrunc(V: Src, DestTy: Ty);
1920 Function *Overload = Intrinsic::getOrInsertDeclaration(
1921 M: FPT.getModule(), id: II->getIntrinsicID(), Tys: Ty);
1922 SmallVector<OperandBundleDef, 1> OpBundles;
1923 II->getOperandBundlesAsDefs(Defs&: OpBundles);
1924 CallInst *NewCI =
1925 CallInst::Create(Func: Overload, Args: {InnerTrunc}, Bundles: OpBundles, NameStr: II->getName());
1926 // A normal value may be converted to an infinity. It means that we cannot
1927 // propagate ninf from the intrinsic. So we propagate FMF from fptrunc.
1928 NewCI->copyFastMathFlags(I: &FPT);
1929 return NewCI;
1930 }
1931 }
1932 }
1933
1934 if (Instruction *I = shrinkInsertElt(Trunc&: FPT, Builder))
1935 return I;
1936
1937 Value *Src = FPT.getOperand(i_nocapture: 0);
1938 if (isa<SIToFPInst>(Val: Src) || isa<UIToFPInst>(Val: Src)) {
1939 auto *FPCast = cast<CastInst>(Val: Src);
1940 if (isKnownExactCastIntToFP(I&: *FPCast, IC&: *this))
1941 return CastInst::Create(FPCast->getOpcode(), S: FPCast->getOperand(i_nocapture: 0), Ty);
1942 }
1943
1944 return nullptr;
1945}
1946
1947Instruction *InstCombinerImpl::visitFPExt(CastInst &FPExt) {
1948 // If the source operand is a cast from integer to FP and known exact, then
1949 // cast the integer operand directly to the destination type.
1950 Type *Ty = FPExt.getType();
1951 Value *Src = FPExt.getOperand(i_nocapture: 0);
1952 if (isa<SIToFPInst>(Val: Src) || isa<UIToFPInst>(Val: Src)) {
1953 auto *FPCast = cast<CastInst>(Val: Src);
1954 if (isKnownExactCastIntToFP(I&: *FPCast, IC&: *this))
1955 return CastInst::Create(FPCast->getOpcode(), S: FPCast->getOperand(i_nocapture: 0), Ty);
1956 }
1957
1958 return commonCastTransforms(CI&: FPExt);
1959}
1960
1961/// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
1962/// This is safe if the intermediate type has enough bits in its mantissa to
1963/// accurately represent all values of X. For example, this won't work with
1964/// i64 -> float -> i64.
1965Instruction *InstCombinerImpl::foldItoFPtoI(CastInst &FI) {
1966 if (!isa<UIToFPInst>(Val: FI.getOperand(i_nocapture: 0)) && !isa<SIToFPInst>(Val: FI.getOperand(i_nocapture: 0)))
1967 return nullptr;
1968
1969 auto *OpI = cast<CastInst>(Val: FI.getOperand(i_nocapture: 0));
1970 Value *X = OpI->getOperand(i_nocapture: 0);
1971 Type *XType = X->getType();
1972 Type *DestType = FI.getType();
1973 bool IsOutputSigned = isa<FPToSIInst>(Val: FI);
1974
1975 // Since we can assume the conversion won't overflow, our decision as to
1976 // whether the input will fit in the float should depend on the minimum
1977 // of the input range and output range.
1978
1979 // This means this is also safe for a signed input and unsigned output, since
1980 // a negative input would lead to undefined behavior.
1981 if (!isKnownExactCastIntToFP(I&: *OpI, IC&: *this)) {
1982 // The first cast may not round exactly based on the source integer width
1983 // and FP width, but the overflow UB rules can still allow this to fold.
1984 // If the destination type is narrow, that means the intermediate FP value
1985 // must be large enough to hold the source value exactly.
1986 // For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior.
1987 int OutputSize = (int)DestType->getScalarSizeInBits();
1988 if (OutputSize > OpI->getType()->getFPMantissaWidth())
1989 return nullptr;
1990 }
1991
1992 if (DestType->getScalarSizeInBits() > XType->getScalarSizeInBits()) {
1993 bool IsInputSigned = isa<SIToFPInst>(Val: OpI);
1994 if (IsInputSigned && IsOutputSigned)
1995 return new SExtInst(X, DestType);
1996 return new ZExtInst(X, DestType);
1997 }
1998 if (DestType->getScalarSizeInBits() < XType->getScalarSizeInBits())
1999 return new TruncInst(X, DestType);
2000
2001 assert(XType == DestType && "Unexpected types for int to FP to int casts");
2002 return replaceInstUsesWith(I&: FI, V: X);
2003}
2004
2005static Instruction *foldFPtoI(Instruction &FI, InstCombiner &IC) {
2006 // fpto{u/s}i non-norm --> 0
2007 FPClassTest Mask =
2008 FI.getOpcode() == Instruction::FPToUI ? fcPosNormal : fcNormal;
2009 KnownFPClass FPClass = computeKnownFPClass(
2010 V: FI.getOperand(i: 0), InterestedClasses: Mask, SQ: IC.getSimplifyQuery().getWithInstruction(I: &FI));
2011 if (FPClass.isKnownNever(Mask))
2012 return IC.replaceInstUsesWith(I&: FI, V: ConstantInt::getNullValue(Ty: FI.getType()));
2013
2014 return nullptr;
2015}
2016
2017Instruction *InstCombinerImpl::visitFPToUI(FPToUIInst &FI) {
2018 if (Instruction *I = foldItoFPtoI(FI))
2019 return I;
2020
2021 if (Instruction *I = foldFPtoI(FI, IC&: *this))
2022 return I;
2023
2024 return commonCastTransforms(CI&: FI);
2025}
2026
2027Instruction *InstCombinerImpl::visitFPToSI(FPToSIInst &FI) {
2028 if (Instruction *I = foldItoFPtoI(FI))
2029 return I;
2030
2031 if (Instruction *I = foldFPtoI(FI, IC&: *this))
2032 return I;
2033
2034 return commonCastTransforms(CI&: FI);
2035}
2036
2037Instruction *InstCombinerImpl::visitUIToFP(CastInst &CI) {
2038 if (Instruction *R = commonCastTransforms(CI))
2039 return R;
2040 if (!CI.hasNonNeg() && isKnownNonNegative(V: CI.getOperand(i_nocapture: 0), SQ)) {
2041 CI.setNonNeg();
2042 return &CI;
2043 }
2044 return nullptr;
2045}
2046
2047Instruction *InstCombinerImpl::visitSIToFP(CastInst &CI) {
2048 if (Instruction *R = commonCastTransforms(CI))
2049 return R;
2050 if (isKnownNonNegative(V: CI.getOperand(i_nocapture: 0), SQ)) {
2051 auto *UI =
2052 CastInst::Create(Instruction::UIToFP, S: CI.getOperand(i_nocapture: 0), Ty: CI.getType());
2053 UI->setNonNeg(true);
2054 return UI;
2055 }
2056 return nullptr;
2057}
2058
2059Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) {
2060 // If the source integer type is not the intptr_t type for this target, do a
2061 // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
2062 // cast to be exposed to other transforms.
2063 unsigned AS = CI.getAddressSpace();
2064 if (CI.getOperand(i_nocapture: 0)->getType()->getScalarSizeInBits() !=
2065 DL.getPointerSizeInBits(AS)) {
2066 Type *Ty = CI.getOperand(i_nocapture: 0)->getType()->getWithNewType(
2067 EltTy: DL.getIntPtrType(C&: CI.getContext(), AddressSpace: AS));
2068 Value *P = Builder.CreateZExtOrTrunc(V: CI.getOperand(i_nocapture: 0), DestTy: Ty);
2069 return new IntToPtrInst(P, CI.getType());
2070 }
2071
2072 if (Instruction *I = commonCastTransforms(CI))
2073 return I;
2074
2075 return nullptr;
2076}
2077
2078Value *InstCombinerImpl::foldPtrToIntOfGEP(Type *IntTy, Value *Ptr) {
2079 // Look through chain of one-use GEPs.
2080 Type *PtrTy = Ptr->getType();
2081 SmallVector<GEPOperator *> GEPs;
2082 while (true) {
2083 auto *GEP = dyn_cast<GEPOperator>(Val: Ptr);
2084 if (!GEP || !GEP->hasOneUse())
2085 break;
2086 GEPs.push_back(Elt: GEP);
2087 Ptr = GEP->getPointerOperand();
2088 }
2089
2090 // Don't handle case where GEP converts from pointer to vector.
2091 if (GEPs.empty() || PtrTy != Ptr->getType())
2092 return nullptr;
2093
2094 // Check whether we know the integer value of the base pointer.
2095 Value *Res;
2096 Type *IdxTy = DL.getIndexType(PtrTy);
2097 if (match(V: Ptr, P: m_OneUse(SubPattern: m_IntToPtr(Op: m_Value(V&: Res)))) &&
2098 Res->getType() == IntTy && IntTy == IdxTy) {
2099 // pass
2100 } else if (isa<ConstantPointerNull>(Val: Ptr)) {
2101 Res = Constant::getNullValue(Ty: IdxTy);
2102 } else {
2103 return nullptr;
2104 }
2105
2106 // Perform the entire operation on integers instead.
2107 for (GEPOperator *GEP : reverse(C&: GEPs)) {
2108 Value *Offset = EmitGEPOffset(GEP);
2109 Res = Builder.CreateAdd(LHS: Res, RHS: Offset, Name: "", HasNUW: GEP->hasNoUnsignedWrap());
2110 }
2111 return Builder.CreateZExtOrTrunc(V: Res, DestTy: IntTy);
2112}
2113
2114Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
2115 // If the destination integer type is not the intptr_t type for this target,
2116 // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
2117 // to be exposed to other transforms.
2118 Value *SrcOp = CI.getPointerOperand();
2119 Type *SrcTy = SrcOp->getType();
2120 Type *Ty = CI.getType();
2121 unsigned AS = CI.getPointerAddressSpace();
2122 unsigned TySize = Ty->getScalarSizeInBits();
2123 unsigned PtrSize = DL.getPointerSizeInBits(AS);
2124 if (TySize != PtrSize) {
2125 Type *IntPtrTy =
2126 SrcTy->getWithNewType(EltTy: DL.getIntPtrType(C&: CI.getContext(), AddressSpace: AS));
2127 Value *P = Builder.CreatePtrToInt(V: SrcOp, DestTy: IntPtrTy);
2128 return CastInst::CreateIntegerCast(S: P, Ty, /*isSigned=*/false);
2129 }
2130
2131 // (ptrtoint (ptrmask P, M))
2132 // -> (and (ptrtoint P), M)
2133 // This is generally beneficial as `and` is better supported than `ptrmask`.
2134 Value *Ptr, *Mask;
2135 if (match(V: SrcOp, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ptrmask>(Op0: m_Value(V&: Ptr),
2136 Op1: m_Value(V&: Mask)))) &&
2137 Mask->getType() == Ty)
2138 return BinaryOperator::CreateAnd(V1: Builder.CreatePtrToInt(V: Ptr, DestTy: Ty), V2: Mask);
2139
2140 if (Value *V = foldPtrToIntOfGEP(IntTy: Ty, Ptr: SrcOp))
2141 return replaceInstUsesWith(I&: CI, V);
2142
2143 Value *Vec, *Scalar, *Index;
2144 if (match(V: SrcOp, P: m_OneUse(SubPattern: m_InsertElt(Val: m_IntToPtr(Op: m_Value(V&: Vec)),
2145 Elt: m_Value(V&: Scalar), Idx: m_Value(V&: Index)))) &&
2146 Vec->getType() == Ty) {
2147 assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
2148 // Convert the scalar to int followed by insert to eliminate one cast:
2149 // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
2150 Value *NewCast = Builder.CreatePtrToInt(V: Scalar, DestTy: Ty->getScalarType());
2151 return InsertElementInst::Create(Vec, NewElt: NewCast, Idx: Index);
2152 }
2153
2154 return commonCastTransforms(CI);
2155}
2156
2157/// This input value (which is known to have vector type) is being zero extended
2158/// or truncated to the specified vector type. Since the zext/trunc is done
2159/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
2160/// endianness will impact which end of the vector that is extended or
2161/// truncated.
2162///
2163/// A vector is always stored with index 0 at the lowest address, which
2164/// corresponds to the most significant bits for a big endian stored integer and
2165/// the least significant bits for little endian. A trunc/zext of an integer
2166/// impacts the big end of the integer. Thus, we need to add/remove elements at
2167/// the front of the vector for big endian targets, and the back of the vector
2168/// for little endian targets.
2169///
2170/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
2171///
2172/// The source and destination vector types may have different element types.
2173static Instruction *
2174optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy,
2175 InstCombinerImpl &IC) {
2176 // We can only do this optimization if the output is a multiple of the input
2177 // element size, or the input is a multiple of the output element size.
2178 // Convert the input type to have the same element type as the output.
2179 VectorType *SrcTy = cast<VectorType>(Val: InVal->getType());
2180
2181 if (SrcTy->getElementType() != DestTy->getElementType()) {
2182 // The input types don't need to be identical, but for now they must be the
2183 // same size. There is no specific reason we couldn't handle things like
2184 // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
2185 // there yet.
2186 if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
2187 DestTy->getElementType()->getPrimitiveSizeInBits())
2188 return nullptr;
2189
2190 SrcTy =
2191 FixedVectorType::get(ElementType: DestTy->getElementType(),
2192 NumElts: cast<FixedVectorType>(Val: SrcTy)->getNumElements());
2193 InVal = IC.Builder.CreateBitCast(V: InVal, DestTy: SrcTy);
2194 }
2195
2196 bool IsBigEndian = IC.getDataLayout().isBigEndian();
2197 unsigned SrcElts = cast<FixedVectorType>(Val: SrcTy)->getNumElements();
2198 unsigned DestElts = cast<FixedVectorType>(Val: DestTy)->getNumElements();
2199
2200 assert(SrcElts != DestElts && "Element counts should be different.");
2201
2202 // Now that the element types match, get the shuffle mask and RHS of the
2203 // shuffle to use, which depends on whether we're increasing or decreasing the
2204 // size of the input.
2205 auto ShuffleMaskStorage = llvm::to_vector<16>(Range: llvm::seq<int>(Begin: 0, End: SrcElts));
2206 ArrayRef<int> ShuffleMask;
2207 Value *V2;
2208
2209 if (SrcElts > DestElts) {
2210 // If we're shrinking the number of elements (rewriting an integer
2211 // truncate), just shuffle in the elements corresponding to the least
2212 // significant bits from the input and use poison as the second shuffle
2213 // input.
2214 V2 = PoisonValue::get(T: SrcTy);
2215 // Make sure the shuffle mask selects the "least significant bits" by
2216 // keeping elements from back of the src vector for big endian, and from the
2217 // front for little endian.
2218 ShuffleMask = ShuffleMaskStorage;
2219 if (IsBigEndian)
2220 ShuffleMask = ShuffleMask.take_back(N: DestElts);
2221 else
2222 ShuffleMask = ShuffleMask.take_front(N: DestElts);
2223 } else {
2224 // If we're increasing the number of elements (rewriting an integer zext),
2225 // shuffle in all of the elements from InVal. Fill the rest of the result
2226 // elements with zeros from a constant zero.
2227 V2 = Constant::getNullValue(Ty: SrcTy);
2228 // Use first elt from V2 when indicating zero in the shuffle mask.
2229 uint32_t NullElt = SrcElts;
2230 // Extend with null values in the "most significant bits" by adding elements
2231 // in front of the src vector for big endian, and at the back for little
2232 // endian.
2233 unsigned DeltaElts = DestElts - SrcElts;
2234 if (IsBigEndian)
2235 ShuffleMaskStorage.insert(I: ShuffleMaskStorage.begin(), NumToInsert: DeltaElts, Elt: NullElt);
2236 else
2237 ShuffleMaskStorage.append(NumInputs: DeltaElts, Elt: NullElt);
2238 ShuffleMask = ShuffleMaskStorage;
2239 }
2240
2241 return new ShuffleVectorInst(InVal, V2, ShuffleMask);
2242}
2243
2244static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
2245 return Value % Ty->getPrimitiveSizeInBits() == 0;
2246}
2247
2248static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
2249 return Value / Ty->getPrimitiveSizeInBits();
2250}
2251
2252/// V is a value which is inserted into a vector of VecEltTy.
2253/// Look through the value to see if we can decompose it into
2254/// insertions into the vector. See the example in the comment for
2255/// OptimizeIntegerToVectorInsertions for the pattern this handles.
2256/// The type of V is always a non-zero multiple of VecEltTy's size.
2257/// Shift is the number of bits between the lsb of V and the lsb of
2258/// the vector.
2259///
2260/// This returns false if the pattern can't be matched or true if it can,
2261/// filling in Elements with the elements found here.
2262static bool collectInsertionElements(Value *V, unsigned Shift,
2263 SmallVectorImpl<Value *> &Elements,
2264 Type *VecEltTy, bool isBigEndian) {
2265 assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
2266 "Shift should be a multiple of the element type size");
2267
2268 // Undef values never contribute useful bits to the result.
2269 if (isa<UndefValue>(Val: V)) return true;
2270
2271 // If we got down to a value of the right type, we win, try inserting into the
2272 // right element.
2273 if (V->getType() == VecEltTy) {
2274 // Inserting null doesn't actually insert any elements.
2275 if (Constant *C = dyn_cast<Constant>(Val: V))
2276 if (C->isNullValue())
2277 return true;
2278
2279 unsigned ElementIndex = getTypeSizeIndex(Value: Shift, Ty: VecEltTy);
2280 if (isBigEndian)
2281 ElementIndex = Elements.size() - ElementIndex - 1;
2282
2283 // Fail if multiple elements are inserted into this slot.
2284 if (Elements[ElementIndex])
2285 return false;
2286
2287 Elements[ElementIndex] = V;
2288 return true;
2289 }
2290
2291 if (Constant *C = dyn_cast<Constant>(Val: V)) {
2292 // Figure out the # elements this provides, and bitcast it or slice it up
2293 // as required.
2294 unsigned NumElts = getTypeSizeIndex(Value: C->getType()->getPrimitiveSizeInBits(),
2295 Ty: VecEltTy);
2296 // If the constant is the size of a vector element, we just need to bitcast
2297 // it to the right type so it gets properly inserted.
2298 if (NumElts == 1)
2299 return collectInsertionElements(V: ConstantExpr::getBitCast(C, Ty: VecEltTy),
2300 Shift, Elements, VecEltTy, isBigEndian);
2301
2302 // Okay, this is a constant that covers multiple elements. Slice it up into
2303 // pieces and insert each element-sized piece into the vector.
2304 if (!isa<IntegerType>(Val: C->getType()))
2305 C = ConstantExpr::getBitCast(C, Ty: IntegerType::get(C&: V->getContext(),
2306 NumBits: C->getType()->getPrimitiveSizeInBits()));
2307 unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
2308 Type *ElementIntTy = IntegerType::get(C&: C->getContext(), NumBits: ElementSize);
2309
2310 for (unsigned i = 0; i != NumElts; ++i) {
2311 unsigned ShiftI = i * ElementSize;
2312 Constant *Piece = ConstantFoldBinaryInstruction(
2313 Opcode: Instruction::LShr, V1: C, V2: ConstantInt::get(Ty: C->getType(), V: ShiftI));
2314 if (!Piece)
2315 return false;
2316
2317 Piece = ConstantExpr::getTrunc(C: Piece, Ty: ElementIntTy);
2318 if (!collectInsertionElements(V: Piece, Shift: ShiftI + Shift, Elements, VecEltTy,
2319 isBigEndian))
2320 return false;
2321 }
2322 return true;
2323 }
2324
2325 if (!V->hasOneUse()) return false;
2326
2327 Instruction *I = dyn_cast<Instruction>(Val: V);
2328 if (!I) return false;
2329 switch (I->getOpcode()) {
2330 default: return false; // Unhandled case.
2331 case Instruction::BitCast:
2332 if (I->getOperand(i: 0)->getType()->isVectorTy())
2333 return false;
2334 return collectInsertionElements(V: I->getOperand(i: 0), Shift, Elements, VecEltTy,
2335 isBigEndian);
2336 case Instruction::ZExt:
2337 if (!isMultipleOfTypeSize(
2338 Value: I->getOperand(i: 0)->getType()->getPrimitiveSizeInBits(),
2339 Ty: VecEltTy))
2340 return false;
2341 return collectInsertionElements(V: I->getOperand(i: 0), Shift, Elements, VecEltTy,
2342 isBigEndian);
2343 case Instruction::Or:
2344 return collectInsertionElements(V: I->getOperand(i: 0), Shift, Elements, VecEltTy,
2345 isBigEndian) &&
2346 collectInsertionElements(V: I->getOperand(i: 1), Shift, Elements, VecEltTy,
2347 isBigEndian);
2348 case Instruction::Shl: {
2349 // Must be shifting by a constant that is a multiple of the element size.
2350 ConstantInt *CI = dyn_cast<ConstantInt>(Val: I->getOperand(i: 1));
2351 if (!CI) return false;
2352 Shift += CI->getZExtValue();
2353 if (!isMultipleOfTypeSize(Value: Shift, Ty: VecEltTy)) return false;
2354 return collectInsertionElements(V: I->getOperand(i: 0), Shift, Elements, VecEltTy,
2355 isBigEndian);
2356 }
2357
2358 }
2359}
2360
2361
2362/// If the input is an 'or' instruction, we may be doing shifts and ors to
2363/// assemble the elements of the vector manually.
2364/// Try to rip the code out and replace it with insertelements. This is to
2365/// optimize code like this:
2366///
2367/// %tmp37 = bitcast float %inc to i32
2368/// %tmp38 = zext i32 %tmp37 to i64
2369/// %tmp31 = bitcast float %inc5 to i32
2370/// %tmp32 = zext i32 %tmp31 to i64
2371/// %tmp33 = shl i64 %tmp32, 32
2372/// %ins35 = or i64 %tmp33, %tmp38
2373/// %tmp43 = bitcast i64 %ins35 to <2 x float>
2374///
2375/// Into two insertelements that do "buildvector{%inc, %inc5}".
2376static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,
2377 InstCombinerImpl &IC) {
2378 auto *DestVecTy = cast<FixedVectorType>(Val: CI.getType());
2379 Value *IntInput = CI.getOperand(i_nocapture: 0);
2380
2381 // if the int input is just an undef value do not try to optimize to vector
2382 // insertions as it will prevent undef propagation
2383 if (isa<UndefValue>(Val: IntInput))
2384 return nullptr;
2385
2386 SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
2387 if (!collectInsertionElements(V: IntInput, Shift: 0, Elements,
2388 VecEltTy: DestVecTy->getElementType(),
2389 isBigEndian: IC.getDataLayout().isBigEndian()))
2390 return nullptr;
2391
2392 // If we succeeded, we know that all of the element are specified by Elements
2393 // or are zero if Elements has a null entry. Recast this as a set of
2394 // insertions.
2395 Value *Result = Constant::getNullValue(Ty: CI.getType());
2396 for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
2397 if (!Elements[i]) continue; // Unset element.
2398
2399 Result = IC.Builder.CreateInsertElement(Vec: Result, NewElt: Elements[i],
2400 Idx: IC.Builder.getInt32(C: i));
2401 }
2402
2403 return Result;
2404}
2405
2406/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
2407/// vector followed by extract element. The backend tends to handle bitcasts of
2408/// vectors better than bitcasts of scalars because vector registers are
2409/// usually not type-specific like scalar integer or scalar floating-point.
2410static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast,
2411 InstCombinerImpl &IC) {
2412 Value *VecOp, *Index;
2413 if (!match(V: BitCast.getOperand(i_nocapture: 0),
2414 P: m_OneUse(SubPattern: m_ExtractElt(Val: m_Value(V&: VecOp), Idx: m_Value(V&: Index)))))
2415 return nullptr;
2416
2417 // The bitcast must be to a vectorizable type, otherwise we can't make a new
2418 // type to extract from.
2419 Type *DestType = BitCast.getType();
2420 VectorType *VecType = cast<VectorType>(Val: VecOp->getType());
2421 if (VectorType::isValidElementType(ElemTy: DestType)) {
2422 auto *NewVecType = VectorType::get(ElementType: DestType, Other: VecType);
2423 auto *NewBC = IC.Builder.CreateBitCast(V: VecOp, DestTy: NewVecType, Name: "bc");
2424 return ExtractElementInst::Create(Vec: NewBC, Idx: Index);
2425 }
2426
2427 // Only solve DestType is vector to avoid inverse transform in visitBitCast.
2428 // bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest)
2429 auto *FixedVType = dyn_cast<FixedVectorType>(Val: VecType);
2430 if (DestType->isVectorTy() && FixedVType && FixedVType->getNumElements() == 1)
2431 return CastInst::Create(Instruction::BitCast, S: VecOp, Ty: DestType);
2432
2433 return nullptr;
2434}
2435
2436/// Change the type of a bitwise logic operation if we can eliminate a bitcast.
2437static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast,
2438 InstCombiner::BuilderTy &Builder) {
2439 Type *DestTy = BitCast.getType();
2440 BinaryOperator *BO;
2441
2442 if (!match(V: BitCast.getOperand(i_nocapture: 0), P: m_OneUse(SubPattern: m_BinOp(I&: BO))) ||
2443 !BO->isBitwiseLogicOp())
2444 return nullptr;
2445
2446 // FIXME: This transform is restricted to vector types to avoid backend
2447 // problems caused by creating potentially illegal operations. If a fix-up is
2448 // added to handle that situation, we can remove this check.
2449 if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
2450 return nullptr;
2451
2452 if (DestTy->isFPOrFPVectorTy()) {
2453 Value *X, *Y;
2454 // bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y))
2455 if (match(V: BO->getOperand(i_nocapture: 0), P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: X)))) &&
2456 match(V: BO->getOperand(i_nocapture: 1), P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: Y))))) {
2457 if (X->getType()->isFPOrFPVectorTy() &&
2458 Y->getType()->isIntOrIntVectorTy()) {
2459 Value *CastedOp =
2460 Builder.CreateBitCast(V: BO->getOperand(i_nocapture: 0), DestTy: Y->getType());
2461 Value *NewBO = Builder.CreateBinOp(Opc: BO->getOpcode(), LHS: CastedOp, RHS: Y);
2462 return CastInst::CreateBitOrPointerCast(S: NewBO, Ty: DestTy);
2463 }
2464 if (X->getType()->isIntOrIntVectorTy() &&
2465 Y->getType()->isFPOrFPVectorTy()) {
2466 Value *CastedOp =
2467 Builder.CreateBitCast(V: BO->getOperand(i_nocapture: 1), DestTy: X->getType());
2468 Value *NewBO = Builder.CreateBinOp(Opc: BO->getOpcode(), LHS: CastedOp, RHS: X);
2469 return CastInst::CreateBitOrPointerCast(S: NewBO, Ty: DestTy);
2470 }
2471 }
2472 return nullptr;
2473 }
2474
2475 if (!DestTy->isIntOrIntVectorTy())
2476 return nullptr;
2477
2478 Value *X;
2479 if (match(V: BO->getOperand(i_nocapture: 0), P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: X)))) &&
2480 X->getType() == DestTy && !isa<Constant>(Val: X)) {
2481 // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
2482 Value *CastedOp1 = Builder.CreateBitCast(V: BO->getOperand(i_nocapture: 1), DestTy);
2483 return BinaryOperator::Create(Op: BO->getOpcode(), S1: X, S2: CastedOp1);
2484 }
2485
2486 if (match(V: BO->getOperand(i_nocapture: 1), P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: X)))) &&
2487 X->getType() == DestTy && !isa<Constant>(Val: X)) {
2488 // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
2489 Value *CastedOp0 = Builder.CreateBitCast(V: BO->getOperand(i_nocapture: 0), DestTy);
2490 return BinaryOperator::Create(Op: BO->getOpcode(), S1: CastedOp0, S2: X);
2491 }
2492
2493 // Canonicalize vector bitcasts to come before vector bitwise logic with a
2494 // constant. This eases recognition of special constants for later ops.
2495 // Example:
2496 // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
2497 Constant *C;
2498 if (match(V: BO->getOperand(i_nocapture: 1), P: m_Constant(C))) {
2499 // bitcast (logic X, C) --> logic (bitcast X, C')
2500 Value *CastedOp0 = Builder.CreateBitCast(V: BO->getOperand(i_nocapture: 0), DestTy);
2501 Value *CastedC = Builder.CreateBitCast(V: C, DestTy);
2502 return BinaryOperator::Create(Op: BO->getOpcode(), S1: CastedOp0, S2: CastedC);
2503 }
2504
2505 return nullptr;
2506}
2507
2508/// Change the type of a select if we can eliminate a bitcast.
2509static Instruction *foldBitCastSelect(BitCastInst &BitCast,
2510 InstCombiner::BuilderTy &Builder) {
2511 Value *Cond, *TVal, *FVal;
2512 if (!match(V: BitCast.getOperand(i_nocapture: 0),
2513 P: m_OneUse(SubPattern: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: TVal), R: m_Value(V&: FVal)))))
2514 return nullptr;
2515
2516 // A vector select must maintain the same number of elements in its operands.
2517 Type *CondTy = Cond->getType();
2518 Type *DestTy = BitCast.getType();
2519 if (auto *CondVTy = dyn_cast<VectorType>(Val: CondTy))
2520 if (!DestTy->isVectorTy() ||
2521 CondVTy->getElementCount() !=
2522 cast<VectorType>(Val: DestTy)->getElementCount())
2523 return nullptr;
2524
2525 // FIXME: This transform is restricted from changing the select between
2526 // scalars and vectors to avoid backend problems caused by creating
2527 // potentially illegal operations. If a fix-up is added to handle that
2528 // situation, we can remove this check.
2529 if (DestTy->isVectorTy() != TVal->getType()->isVectorTy())
2530 return nullptr;
2531
2532 auto *Sel = cast<Instruction>(Val: BitCast.getOperand(i_nocapture: 0));
2533 Value *X;
2534 if (match(V: TVal, P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: X)))) && X->getType() == DestTy &&
2535 !isa<Constant>(Val: X)) {
2536 // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
2537 Value *CastedVal = Builder.CreateBitCast(V: FVal, DestTy);
2538 return SelectInst::Create(C: Cond, S1: X, S2: CastedVal, NameStr: "", InsertBefore: nullptr, MDFrom: Sel);
2539 }
2540
2541 if (match(V: FVal, P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: X)))) && X->getType() == DestTy &&
2542 !isa<Constant>(Val: X)) {
2543 // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
2544 Value *CastedVal = Builder.CreateBitCast(V: TVal, DestTy);
2545 return SelectInst::Create(C: Cond, S1: CastedVal, S2: X, NameStr: "", InsertBefore: nullptr, MDFrom: Sel);
2546 }
2547
2548 return nullptr;
2549}
2550
2551/// Check if all users of CI are StoreInsts.
2552static bool hasStoreUsersOnly(CastInst &CI) {
2553 for (User *U : CI.users()) {
2554 if (!isa<StoreInst>(Val: U))
2555 return false;
2556 }
2557 return true;
2558}
2559
2560/// This function handles following case
2561///
2562/// A -> B cast
2563/// PHI
2564/// B -> A cast
2565///
2566/// All the related PHI nodes can be replaced by new PHI nodes with type A.
2567/// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
2568Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
2569 PHINode *PN) {
2570 // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
2571 if (hasStoreUsersOnly(CI))
2572 return nullptr;
2573
2574 Value *Src = CI.getOperand(i_nocapture: 0);
2575 Type *SrcTy = Src->getType(); // Type B
2576 Type *DestTy = CI.getType(); // Type A
2577
2578 SmallVector<PHINode *, 4> PhiWorklist;
2579 SmallSetVector<PHINode *, 4> OldPhiNodes;
2580
2581 // Find all of the A->B casts and PHI nodes.
2582 // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
2583 // OldPhiNodes is used to track all known PHI nodes, before adding a new
2584 // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
2585 PhiWorklist.push_back(Elt: PN);
2586 OldPhiNodes.insert(X: PN);
2587 while (!PhiWorklist.empty()) {
2588 auto *OldPN = PhiWorklist.pop_back_val();
2589 for (Value *IncValue : OldPN->incoming_values()) {
2590 if (isa<Constant>(Val: IncValue))
2591 continue;
2592
2593 if (auto *LI = dyn_cast<LoadInst>(Val: IncValue)) {
2594 // If there is a sequence of one or more load instructions, each loaded
2595 // value is used as address of later load instruction, bitcast is
2596 // necessary to change the value type, don't optimize it. For
2597 // simplicity we give up if the load address comes from another load.
2598 Value *Addr = LI->getOperand(i_nocapture: 0);
2599 if (Addr == &CI || isa<LoadInst>(Val: Addr))
2600 return nullptr;
2601 // Don't tranform "load <256 x i32>, <256 x i32>*" to
2602 // "load x86_amx, x86_amx*", because x86_amx* is invalid.
2603 // TODO: Remove this check when bitcast between vector and x86_amx
2604 // is replaced with a specific intrinsic.
2605 if (DestTy->isX86_AMXTy())
2606 return nullptr;
2607 if (LI->hasOneUse() && LI->isSimple())
2608 continue;
2609 // If a LoadInst has more than one use, changing the type of loaded
2610 // value may create another bitcast.
2611 return nullptr;
2612 }
2613
2614 if (auto *PNode = dyn_cast<PHINode>(Val: IncValue)) {
2615 if (OldPhiNodes.insert(X: PNode))
2616 PhiWorklist.push_back(Elt: PNode);
2617 continue;
2618 }
2619
2620 auto *BCI = dyn_cast<BitCastInst>(Val: IncValue);
2621 // We can't handle other instructions.
2622 if (!BCI)
2623 return nullptr;
2624
2625 // Verify it's a A->B cast.
2626 Type *TyA = BCI->getOperand(i_nocapture: 0)->getType();
2627 Type *TyB = BCI->getType();
2628 if (TyA != DestTy || TyB != SrcTy)
2629 return nullptr;
2630 }
2631 }
2632
2633 // Check that each user of each old PHI node is something that we can
2634 // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
2635 for (auto *OldPN : OldPhiNodes) {
2636 for (User *V : OldPN->users()) {
2637 if (auto *SI = dyn_cast<StoreInst>(Val: V)) {
2638 if (!SI->isSimple() || SI->getOperand(i_nocapture: 0) != OldPN)
2639 return nullptr;
2640 } else if (auto *BCI = dyn_cast<BitCastInst>(Val: V)) {
2641 // Verify it's a B->A cast.
2642 Type *TyB = BCI->getOperand(i_nocapture: 0)->getType();
2643 Type *TyA = BCI->getType();
2644 if (TyA != DestTy || TyB != SrcTy)
2645 return nullptr;
2646 } else if (auto *PHI = dyn_cast<PHINode>(Val: V)) {
2647 // As long as the user is another old PHI node, then even if we don't
2648 // rewrite it, the PHI web we're considering won't have any users
2649 // outside itself, so it'll be dead.
2650 if (!OldPhiNodes.contains(key: PHI))
2651 return nullptr;
2652 } else {
2653 return nullptr;
2654 }
2655 }
2656 }
2657
2658 // For each old PHI node, create a corresponding new PHI node with a type A.
2659 SmallDenseMap<PHINode *, PHINode *> NewPNodes;
2660 for (auto *OldPN : OldPhiNodes) {
2661 Builder.SetInsertPoint(OldPN);
2662 PHINode *NewPN = Builder.CreatePHI(Ty: DestTy, NumReservedValues: OldPN->getNumOperands());
2663 NewPNodes[OldPN] = NewPN;
2664 }
2665
2666 // Fill in the operands of new PHI nodes.
2667 for (auto *OldPN : OldPhiNodes) {
2668 PHINode *NewPN = NewPNodes[OldPN];
2669 for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
2670 Value *V = OldPN->getOperand(i_nocapture: j);
2671 Value *NewV = nullptr;
2672 if (auto *C = dyn_cast<Constant>(Val: V)) {
2673 NewV = ConstantExpr::getBitCast(C, Ty: DestTy);
2674 } else if (auto *LI = dyn_cast<LoadInst>(Val: V)) {
2675 // Explicitly perform load combine to make sure no opposing transform
2676 // can remove the bitcast in the meantime and trigger an infinite loop.
2677 Builder.SetInsertPoint(LI);
2678 NewV = combineLoadToNewType(LI&: *LI, NewTy: DestTy);
2679 // Remove the old load and its use in the old phi, which itself becomes
2680 // dead once the whole transform finishes.
2681 replaceInstUsesWith(I&: *LI, V: PoisonValue::get(T: LI->getType()));
2682 eraseInstFromFunction(I&: *LI);
2683 } else if (auto *BCI = dyn_cast<BitCastInst>(Val: V)) {
2684 NewV = BCI->getOperand(i_nocapture: 0);
2685 } else if (auto *PrevPN = dyn_cast<PHINode>(Val: V)) {
2686 NewV = NewPNodes[PrevPN];
2687 }
2688 assert(NewV);
2689 NewPN->addIncoming(V: NewV, BB: OldPN->getIncomingBlock(i: j));
2690 }
2691 }
2692
2693 // Traverse all accumulated PHI nodes and process its users,
2694 // which are Stores and BitcCasts. Without this processing
2695 // NewPHI nodes could be replicated and could lead to extra
2696 // moves generated after DeSSA.
2697 // If there is a store with type B, change it to type A.
2698
2699
2700 // Replace users of BitCast B->A with NewPHI. These will help
2701 // later to get rid off a closure formed by OldPHI nodes.
2702 Instruction *RetVal = nullptr;
2703 for (auto *OldPN : OldPhiNodes) {
2704 PHINode *NewPN = NewPNodes[OldPN];
2705 for (User *V : make_early_inc_range(Range: OldPN->users())) {
2706 if (auto *SI = dyn_cast<StoreInst>(Val: V)) {
2707 assert(SI->isSimple() && SI->getOperand(0) == OldPN);
2708 Builder.SetInsertPoint(SI);
2709 auto *NewBC =
2710 cast<BitCastInst>(Val: Builder.CreateBitCast(V: NewPN, DestTy: SrcTy));
2711 SI->setOperand(i_nocapture: 0, Val_nocapture: NewBC);
2712 Worklist.push(I: SI);
2713 assert(hasStoreUsersOnly(*NewBC));
2714 }
2715 else if (auto *BCI = dyn_cast<BitCastInst>(Val: V)) {
2716 Type *TyB = BCI->getOperand(i_nocapture: 0)->getType();
2717 Type *TyA = BCI->getType();
2718 assert(TyA == DestTy && TyB == SrcTy);
2719 (void) TyA;
2720 (void) TyB;
2721 Instruction *I = replaceInstUsesWith(I&: *BCI, V: NewPN);
2722 if (BCI == &CI)
2723 RetVal = I;
2724 } else if (auto *PHI = dyn_cast<PHINode>(Val: V)) {
2725 assert(OldPhiNodes.contains(PHI));
2726 (void) PHI;
2727 } else {
2728 llvm_unreachable("all uses should be handled");
2729 }
2730 }
2731 }
2732
2733 return RetVal;
2734}
2735
2736/// Fold (bitcast (or (and (bitcast X to int), signmask), nneg Y) to fp) to
2737/// copysign((bitcast Y to fp), X)
2738static Value *foldCopySignIdioms(BitCastInst &CI,
2739 InstCombiner::BuilderTy &Builder,
2740 const SimplifyQuery &SQ) {
2741 Value *X, *Y;
2742 Type *FTy = CI.getType();
2743 if (!FTy->isFPOrFPVectorTy())
2744 return nullptr;
2745 if (!match(V: &CI, P: m_ElementWiseBitCast(Op: m_c_Or(
2746 L: m_And(L: m_ElementWiseBitCast(Op: m_Value(V&: X)), R: m_SignMask()),
2747 R: m_Value(V&: Y)))))
2748 return nullptr;
2749 if (X->getType() != FTy)
2750 return nullptr;
2751 if (!isKnownNonNegative(V: Y, SQ))
2752 return nullptr;
2753
2754 return Builder.CreateCopySign(LHS: Builder.CreateBitCast(V: Y, DestTy: FTy), RHS: X);
2755}
2756
2757Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {
2758 // If the operands are integer typed then apply the integer transforms,
2759 // otherwise just apply the common ones.
2760 Value *Src = CI.getOperand(i_nocapture: 0);
2761 Type *SrcTy = Src->getType();
2762 Type *DestTy = CI.getType();
2763
2764 // Get rid of casts from one type to the same type. These are useless and can
2765 // be replaced by the operand.
2766 if (DestTy == Src->getType())
2767 return replaceInstUsesWith(I&: CI, V: Src);
2768
2769 if (isa<FixedVectorType>(Val: DestTy)) {
2770 if (isa<IntegerType>(Val: SrcTy)) {
2771 // If this is a cast from an integer to vector, check to see if the input
2772 // is a trunc or zext of a bitcast from vector. If so, we can replace all
2773 // the casts with a shuffle and (potentially) a bitcast.
2774 if (isa<TruncInst>(Val: Src) || isa<ZExtInst>(Val: Src)) {
2775 CastInst *SrcCast = cast<CastInst>(Val: Src);
2776 if (BitCastInst *BCIn = dyn_cast<BitCastInst>(Val: SrcCast->getOperand(i_nocapture: 0)))
2777 if (isa<VectorType>(Val: BCIn->getOperand(i_nocapture: 0)->getType()))
2778 if (Instruction *I = optimizeVectorResizeWithIntegerBitCasts(
2779 InVal: BCIn->getOperand(i_nocapture: 0), DestTy: cast<VectorType>(Val: DestTy), IC&: *this))
2780 return I;
2781 }
2782
2783 // If the input is an 'or' instruction, we may be doing shifts and ors to
2784 // assemble the elements of the vector manually. Try to rip the code out
2785 // and replace it with insertelements.
2786 if (Value *V = optimizeIntegerToVectorInsertions(CI, IC&: *this))
2787 return replaceInstUsesWith(I&: CI, V);
2788 }
2789 }
2790
2791 if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(Val: SrcTy)) {
2792 if (SrcVTy->getNumElements() == 1) {
2793 // If our destination is not a vector, then make this a straight
2794 // scalar-scalar cast.
2795 if (!DestTy->isVectorTy()) {
2796 Value *Elem =
2797 Builder.CreateExtractElement(Vec: Src,
2798 Idx: Constant::getNullValue(Ty: Type::getInt32Ty(C&: CI.getContext())));
2799 return CastInst::Create(Instruction::BitCast, S: Elem, Ty: DestTy);
2800 }
2801
2802 // Otherwise, see if our source is an insert. If so, then use the scalar
2803 // component directly:
2804 // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
2805 if (auto *InsElt = dyn_cast<InsertElementInst>(Val: Src))
2806 return new BitCastInst(InsElt->getOperand(i_nocapture: 1), DestTy);
2807 }
2808
2809 // Convert an artificial vector insert into more analyzable bitwise logic.
2810 unsigned BitWidth = DestTy->getScalarSizeInBits();
2811 Value *X, *Y;
2812 uint64_t IndexC;
2813 if (match(V: Src, P: m_OneUse(SubPattern: m_InsertElt(Val: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: X))),
2814 Elt: m_Value(V&: Y), Idx: m_ConstantInt(V&: IndexC)))) &&
2815 DestTy->isIntegerTy() && X->getType() == DestTy &&
2816 Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) {
2817 // Adjust for big endian - the LSBs are at the high index.
2818 if (DL.isBigEndian())
2819 IndexC = SrcVTy->getNumElements() - 1 - IndexC;
2820
2821 // We only handle (endian-normalized) insert to index 0. Any other insert
2822 // would require a left-shift, so that is an extra instruction.
2823 if (IndexC == 0) {
2824 // bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)
2825 unsigned EltWidth = Y->getType()->getScalarSizeInBits();
2826 APInt MaskC = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth - EltWidth);
2827 Value *AndX = Builder.CreateAnd(LHS: X, RHS: MaskC);
2828 Value *ZextY = Builder.CreateZExt(V: Y, DestTy);
2829 return BinaryOperator::CreateOr(V1: AndX, V2: ZextY);
2830 }
2831 }
2832 }
2833
2834 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: Src)) {
2835 // Okay, we have (bitcast (shuffle ..)). Check to see if this is
2836 // a bitcast to a vector with the same # elts.
2837 Value *ShufOp0 = Shuf->getOperand(i_nocapture: 0);
2838 Value *ShufOp1 = Shuf->getOperand(i_nocapture: 1);
2839 auto ShufElts = cast<VectorType>(Val: Shuf->getType())->getElementCount();
2840 auto SrcVecElts = cast<VectorType>(Val: ShufOp0->getType())->getElementCount();
2841 if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
2842 cast<VectorType>(Val: DestTy)->getElementCount() == ShufElts &&
2843 ShufElts == SrcVecElts) {
2844 BitCastInst *Tmp;
2845 // If either of the operands is a cast from CI.getType(), then
2846 // evaluating the shuffle in the casted destination's type will allow
2847 // us to eliminate at least one cast.
2848 if (((Tmp = dyn_cast<BitCastInst>(Val: ShufOp0)) &&
2849 Tmp->getOperand(i_nocapture: 0)->getType() == DestTy) ||
2850 ((Tmp = dyn_cast<BitCastInst>(Val: ShufOp1)) &&
2851 Tmp->getOperand(i_nocapture: 0)->getType() == DestTy)) {
2852 Value *LHS = Builder.CreateBitCast(V: ShufOp0, DestTy);
2853 Value *RHS = Builder.CreateBitCast(V: ShufOp1, DestTy);
2854 // Return a new shuffle vector. Use the same element ID's, as we
2855 // know the vector types match #elts.
2856 return new ShuffleVectorInst(LHS, RHS, Shuf->getShuffleMask());
2857 }
2858 }
2859
2860 // A bitcasted-to-scalar and byte/bit reversing shuffle is better recognized
2861 // as a byte/bit swap:
2862 // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) -> bswap (bitcast X)
2863 // bitcast <N x i1> (shuf X, undef, <N, N-1,...0>) -> bitreverse (bitcast X)
2864 if (DestTy->isIntegerTy() && ShufElts.getKnownMinValue() % 2 == 0 &&
2865 Shuf->hasOneUse() && Shuf->isReverse()) {
2866 unsigned IntrinsicNum = 0;
2867 if (DL.isLegalInteger(Width: DestTy->getScalarSizeInBits()) &&
2868 SrcTy->getScalarSizeInBits() == 8) {
2869 IntrinsicNum = Intrinsic::bswap;
2870 } else if (SrcTy->getScalarSizeInBits() == 1) {
2871 IntrinsicNum = Intrinsic::bitreverse;
2872 }
2873 if (IntrinsicNum != 0) {
2874 assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
2875 assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");
2876 Function *BswapOrBitreverse = Intrinsic::getOrInsertDeclaration(
2877 M: CI.getModule(), id: IntrinsicNum, Tys: DestTy);
2878 Value *ScalarX = Builder.CreateBitCast(V: ShufOp0, DestTy);
2879 return CallInst::Create(Func: BswapOrBitreverse, Args: {ScalarX});
2880 }
2881 }
2882 }
2883
2884 // Handle the A->B->A cast, and there is an intervening PHI node.
2885 if (PHINode *PN = dyn_cast<PHINode>(Val: Src))
2886 if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
2887 return I;
2888
2889 if (Instruction *I = canonicalizeBitCastExtElt(BitCast&: CI, IC&: *this))
2890 return I;
2891
2892 if (Instruction *I = foldBitCastBitwiseLogic(BitCast&: CI, Builder))
2893 return I;
2894
2895 if (Instruction *I = foldBitCastSelect(BitCast&: CI, Builder))
2896 return I;
2897
2898 if (Value *V = foldCopySignIdioms(CI, Builder, SQ: SQ.getWithInstruction(I: &CI)))
2899 return replaceInstUsesWith(I&: CI, V);
2900
2901 return commonCastTransforms(CI);
2902}
2903
2904Instruction *InstCombinerImpl::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
2905 return commonCastTransforms(CI);
2906}
2907