InstCombineCasts.cpp source code [llvm_projects/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp]

1	//===- InstCombineCasts.cpp -----------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the visit functions for cast operations.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "InstCombineInternal.h"
14	#include "llvm/ADT/SetVector.h"
15	#include "llvm/Analysis/ConstantFolding.h"
16	#include "llvm/IR/DataLayout.h"
17	#include "llvm/IR/DebugInfo.h"
18	#include "llvm/IR/PatternMatch.h"
19	#include "llvm/Support/KnownBits.h"
20	#include "llvm/Transforms/InstCombine/InstCombiner.h"
21	#include <optional>
22
23	using namespace llvm;
24	using namespace PatternMatch;
25
26	#define DEBUG_TYPE "instcombine"
27
28	/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
29	/// true for, actually insert the code to evaluate the expression.
30	Value InstCombinerImpl::EvaluateInDifferentType(Value V, Type *Ty,
31	bool isSigned) {
32	if (Constant *C = dyn_cast<Constant>(Val: V))
33	return ConstantFoldIntegerCast(C, DestTy: Ty, IsSigned: isSigned, DL);
34
35	// Otherwise, it must be an instruction.
36	Instruction *I = cast<Instruction>(Val: V);
37	Instruction Res = nullptr*;
38	unsigned Opc = I->getOpcode();
39	switch (Opc) {
40	case Instruction::Add:
41	case Instruction::Sub:
42	case Instruction::Mul:
43	case Instruction::And:
44	case Instruction::Or:
45	case Instruction::Xor:
46	case Instruction::AShr:
47	case Instruction::LShr:
48	case Instruction::Shl:
49	case Instruction::UDiv:
50	case Instruction::URem: {
51	Value *LHS = EvaluateInDifferentType(V: I->getOperand(i: `0`), Ty, isSigned);
52	Value *RHS = EvaluateInDifferentType(V: I->getOperand(i: `1`), Ty, isSigned);
53	Res = BinaryOperator::Create(Op: (Instruction::BinaryOps)Opc, S1: LHS, S2: RHS);
54	if (Opc == Instruction::LShr \|\| Opc == Instruction::AShr)
55	Res->setIsExact(I->isExact());
56	break;
57	}
58	case Instruction::Trunc:
59	case Instruction::ZExt:
60	case Instruction::SExt:
61	// If the source type of the cast is the type we're trying for then we can
62	// just return the source. There's no need to insert it because it is not
63	// new.
64	if (I->getOperand(i: `0`)->getType() == Ty)
65	return I->getOperand(i: `0`);
66
67	// Otherwise, must be the same type of cast, so just reinsert a new one.
68	// This also handles the case of zext(trunc(x)) -> zext(x).
69	Res = CastInst::CreateIntegerCast(S: I->getOperand(i: `0`), Ty,
70	isSigned: Opc == Instruction::SExt);
71	break;
72	case Instruction::Select: {
73	Value *True = EvaluateInDifferentType(V: I->getOperand(i: `1`), Ty, isSigned);
74	Value *False = EvaluateInDifferentType(V: I->getOperand(i: `2`), Ty, isSigned);
75	Res = SelectInst::Create(C: I->getOperand(i: `0`), S1: True, S2: False);
76	break;
77	}
78	case Instruction::PHI: {
79	PHINode *OPN = cast<PHINode>(Val: I);
80	PHINode *NPN = PHINode::Create(Ty, NumReservedValues: OPN->getNumIncomingValues());
81	for (unsigned i = `0`, e = OPN->getNumIncomingValues(); i != e; ++i) {
82	Value *V =
83	EvaluateInDifferentType(V: OPN->getIncomingValue(i), Ty, isSigned);
84	NPN->addIncoming(V, BB: OPN->getIncomingBlock(i));
85	}
86	Res = NPN;
87	break;
88	}
89	case Instruction::FPToUI:
90	case Instruction::FPToSI:
91	Res = CastInst::Create(
92	static_cast<Instruction::CastOps>(Opc), S: I->getOperand(i: `0`), Ty);
93	break;
94	case Instruction::Call:
95	if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
96	switch (II->getIntrinsicID()) {
97	default:
98	llvm_unreachable("Unsupported call!");
99	case Intrinsic::vscale: {
100	Function *Fn = Intrinsic::getOrInsertDeclaration(
101	M: I->getModule(), id: Intrinsic::vscale, Tys: {Ty});
102	Res = CallInst::Create(Ty: Fn->getFunctionType(), F: Fn);
103	break;
104	}
105	}
106	}
107	break;
108	case Instruction::ShuffleVector: {
109	auto *ScalarTy = cast<VectorType>(Val: Ty)->getElementType();
110	auto *VTy = cast<VectorType>(Val: I->getOperand(i: `0`)->getType());
111	auto *FixedTy = VectorType::get(ElementType: ScalarTy, EC: VTy->getElementCount());
112	Value *Op0 = EvaluateInDifferentType(V: I->getOperand(i: `0`), Ty: FixedTy, isSigned);
113	Value *Op1 = EvaluateInDifferentType(V: I->getOperand(i: `1`), Ty: FixedTy, isSigned);
114	Res = new ShuffleVectorInst (Op0, Op1,
115	cast<ShuffleVectorInst>(Val: I)->getShuffleMask());
116	break;
117	}
118	default:
119	// TODO: Can handle more cases here.
120	llvm_unreachable("Unreachable!");
121	}
122
123	Res->takeName(V: I);
124	return InsertNewInstWith(New: Res, Old: I->getIterator());
125	}
126
127	Instruction::CastOps
128	InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
129	const CastInst *CI2) {
130	Type *SrcTy = CI1->getSrcTy();
131	Type *MidTy = CI1->getDestTy();
132	Type *DstTy = CI2->getDestTy();
133
134	Instruction::CastOps firstOp = CI1->getOpcode();
135	Instruction::CastOps secondOp = CI2->getOpcode();
136	Type *SrcIntPtrTy =
137	SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
138	Type *MidIntPtrTy =
139	MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
140	Type *DstIntPtrTy =
141	DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
142	unsigned Res = CastInst::isEliminableCastPair(firstOpcode: firstOp, secondOpcode: secondOp, SrcTy, MidTy,
143	DstTy, SrcIntPtrTy, MidIntPtrTy,
144	DstIntPtrTy);
145
146	// We don't want to form an inttoptr or ptrtoint that converts to an integer
147	// type that differs from the pointer size.
148	if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) \|\|
149	(Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
150	Res = `0`;
151
152	return Instruction::CastOps(Res);
153	}
154
155	/// Implement the transforms common to all CastInst visitors.
156	Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) {
157	Value *Src = CI.getOperand(i_nocapture: `0`);
158	Type *Ty = CI.getType();
159
160	if (auto *SrcC = dyn_cast<Constant>(Val: Src))
161	if (Constant *Res = ConstantFoldCastOperand(Opcode: CI.getOpcode(), C: SrcC, DestTy: Ty, DL))
162	return replaceInstUsesWith(I&: CI, V: Res);
163
164	// Try to eliminate a cast of a cast.
165	if (auto CSrc = dyn_cast<CastInst>(Val: Src)) { // A->B->C cast*
166	if (Instruction::CastOps NewOpc = isEliminableCastPair(CI1: CSrc, CI2: &CI)) {
167	// The first cast (CSrc) is eliminable so we need to fix up or replace
168	// the second cast (CI). CSrc will then have a good chance of being dead.
169	auto *Res = CastInst::Create(NewOpc, S: CSrc->getOperand(i_nocapture: `0`), Ty);
170	// Point debug users of the dying cast to the new one.
171	if (CSrc->hasOneUse())
172	replaceAllDbgUsesWith(From&: CSrc, To&: Res, DomPoint&: CI, DT);
173	return Res;
174	}
175	}
176
177	if (auto *Sel = dyn_cast<SelectInst>(Val: Src)) {
178	// We are casting a select. Try to fold the cast into the select if the
179	// select does not have a compare instruction with matching operand types
180	// or the select is likely better done in a narrow type.
181	// Creating a select with operands that are different sizes than its
182	// condition may inhibit other folds and lead to worse codegen.
183	auto *Cmp = dyn_cast<CmpInst>(Val: Sel->getCondition());
184	if (!Cmp \|\| Cmp->getOperand(i_nocapture: `0`)->getType() != Sel->getType() \|\|
185	(CI.getOpcode() == Instruction::Trunc &&
186	shouldChangeType(From: CI.getSrcTy(), To: CI.getType()))) {
187
188	// If it's a bitcast involving vectors, make sure it has the same number
189	// of elements on both sides.
190	if (CI.getOpcode() != Instruction::BitCast \|\|
191	match(V: &CI, P: m_ElementWiseBitCast(Op: m_Value()))) {
192	if (Instruction *NV = FoldOpIntoSelect(Op&: CI, SI: Sel)) {
193	replaceAllDbgUsesWith(From&: Sel, To&: NV, DomPoint&: CI, DT);
194	return NV;
195	}
196	}
197	}
198	}
199
200	// If we are casting a PHI, then fold the cast into the PHI.
201	if (auto *PN = dyn_cast<PHINode>(Val: Src)) {
202	// Don't do this if it would create a PHI node with an illegal type from a
203	// legal type.
204	if (!Src->getType()->isIntegerTy() \|\| !CI.getType()->isIntegerTy() \|\|
205	shouldChangeType(From: CI.getSrcTy(), To: CI.getType()))
206	if (Instruction *NV = foldOpIntoPhi(I&: CI, PN))
207	return NV;
208	}
209
210	// Canonicalize a unary shuffle after the cast if neither operation changes
211	// the size or element size of the input vector.
212	// TODO: We could allow size-changing ops if that doesn't harm codegen.
213	// cast (shuffle X, Mask) --> shuffle (cast X), Mask
214	Value *X;
215	ArrayRef<int> Mask;
216	if (match(V: Src, P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: X), v2: m_Undef(), mask: m_Mask (Mask))))) {
217	// TODO: Allow scalable vectors?
218	auto *SrcTy = dyn_cast<FixedVectorType>(Val: X->getType());
219	auto *DestTy = dyn_cast<FixedVectorType>(Val: Ty);
220	if (SrcTy && DestTy &&
221	SrcTy->getNumElements() == DestTy->getNumElements() &&
222	SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
223	Value *CastX = Builder.CreateCast(Op: CI.getOpcode(), V: X, DestTy);
224	return new ShuffleVectorInst (CastX, Mask);
225	}
226	}
227
228	return nullptr;
229	}
230
231	/// Constants and extensions/truncates from the destination type are always
232	/// free to be evaluated in that type. This is a helper for canEvaluate.*
233	static bool canAlwaysEvaluateInType(Value V, Type Ty) {
234	if (isa<Constant>(Val: V))
235	return match(V, P: m_ImmConstant());
236
237	Value *X;
238	if ((match(V, P: m_ZExtOrSExt(Op: m_Value(V&: X))) \|\| match(V, P: m_Trunc(Op: m_Value(V&: X)))) &&
239	X->getType() == Ty)
240	return true;
241
242	return false;
243	}
244
245	/// Filter out values that we can not evaluate in the destination type for free.
246	/// This is a helper for canEvaluate.*
247	static bool canNotEvaluateInType(Value V, Type Ty) {
248	if (!isa<Instruction>(Val: V))
249	return true;
250	// We don't extend or shrink something that has multiple uses -- doing so
251	// would require duplicating the instruction which isn't profitable.
252	if (!V->hasOneUse())
253	return true;
254
255	return false;
256	}
257
258	/// Return true if we can evaluate the specified expression tree as type Ty
259	/// instead of its larger type, and arrive with the same value.
260	/// This is used by code that tries to eliminate truncates.
261	///
262	/// Ty will always be a type smaller than V. We should return true if trunc(V)
263	/// can be computed by computing V in the smaller type. If V is an instruction,
264	/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
265	/// makes sense if x and y can be efficiently truncated.
266	///
267	/// This function works on both vectors and scalars.
268	///
269	static bool canEvaluateTruncated(Value V, Type Ty, InstCombinerImpl &IC,
270	Instruction *CxtI) {
271	if (canAlwaysEvaluateInType(V, Ty))
272	return true;
273	if (canNotEvaluateInType(V, Ty))
274	return false;
275
276	auto *I = cast<Instruction>(Val: V);
277	Type *OrigTy = V->getType();
278	switch (I->getOpcode()) {
279	case Instruction::Add:
280	case Instruction::Sub:
281	case Instruction::Mul:
282	case Instruction::And:
283	case Instruction::Or:
284	case Instruction::Xor:
285	// These operators can all arbitrarily be extended or truncated.
286	return canEvaluateTruncated(V: I->getOperand(i: `0`), Ty, IC, CxtI) &&
287	canEvaluateTruncated(V: I->getOperand(i: `1`), Ty, IC, CxtI);
288
289	case Instruction::UDiv:
290	case Instruction::URem: {
291	// UDiv and URem can be truncated if all the truncated bits are zero.
292	uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
293	uint32_t BitWidth = Ty->getScalarSizeInBits();
294	assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");
295	APInt Mask = APInt::getBitsSetFrom(numBits: OrigBitWidth, loBit: BitWidth);
296	// Do not preserve the original context instruction. Simplifying div/rem
297	// based on later context may introduce a trap.
298	if (IC.MaskedValueIsZero(V: I->getOperand(i: `0`), Mask, CxtI: I) &&
299	IC.MaskedValueIsZero(V: I->getOperand(i: `1`), Mask, CxtI: I)) {
300	return canEvaluateTruncated(V: I->getOperand(i: `0`), Ty, IC, CxtI: I) &&
301	canEvaluateTruncated(V: I->getOperand(i: `1`), Ty, IC, CxtI: I);
302	}
303	break;
304	}
305	case Instruction::Shl: {
306	// If we are truncating the result of this SHL, and if it's a shift of an
307	// inrange amount, we can always perform a SHL in a smaller type.
308	uint32_t BitWidth = Ty->getScalarSizeInBits();
309	KnownBits AmtKnownBits =
310	llvm::computeKnownBits(V: I->getOperand(i: `1`), DL: IC.getDataLayout());
311	if (AmtKnownBits.getMaxValue().ult(RHS: BitWidth))
312	return canEvaluateTruncated(V: I->getOperand(i: `0`), Ty, IC, CxtI) &&
313	canEvaluateTruncated(V: I->getOperand(i: `1`), Ty, IC, CxtI);
314	break;
315	}
316	case Instruction::LShr: {
317	// If this is a truncate of a logical shr, we can truncate it to a smaller
318	// lshr iff we know that the bits we would otherwise be shifting in are
319	// already zeros.
320	// TODO: It is enough to check that the bits we would be shifting in are
321	// zero - use AmtKnownBits.getMaxValue().
322	uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
323	uint32_t BitWidth = Ty->getScalarSizeInBits();
324	KnownBits AmtKnownBits = IC.computeKnownBits(V: I->getOperand(i: `1`), CxtI);
325	APInt MaxShiftAmt = AmtKnownBits.getMaxValue();
326	APInt ShiftedBits = APInt::getBitsSetFrom(numBits: OrigBitWidth, loBit: BitWidth);
327	if (MaxShiftAmt.ult(RHS: BitWidth)) {
328	// If the only user is a trunc then we can narrow the shift if any new
329	// MSBs are not going to be used.
330	if (auto *Trunc = dyn_cast<TruncInst>(Val: V->user_back())) {
331	auto DemandedBits = Trunc->getType()->getScalarSizeInBits();
332	if ((MaxShiftAmt + DemandedBits).ule(RHS: BitWidth))
333	return canEvaluateTruncated(V: I->getOperand(i: `0`), Ty, IC, CxtI) &&
334	canEvaluateTruncated(V: I->getOperand(i: `1`), Ty, IC, CxtI);
335	}
336	if (IC.MaskedValueIsZero(V: I->getOperand(i: `0`), Mask: ShiftedBits, CxtI))
337	return canEvaluateTruncated(V: I->getOperand(i: `0`), Ty, IC, CxtI) &&
338	canEvaluateTruncated(V: I->getOperand(i: `1`), Ty, IC, CxtI);
339	}
340	break;
341	}
342	case Instruction::AShr: {
343	// If this is a truncate of an arithmetic shr, we can truncate it to a
344	// smaller ashr iff we know that all the bits from the sign bit of the
345	// original type and the sign bit of the truncate type are similar.
346	// TODO: It is enough to check that the bits we would be shifting in are
347	// similar to sign bit of the truncate type.
348	uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
349	uint32_t BitWidth = Ty->getScalarSizeInBits();
350	KnownBits AmtKnownBits =
351	llvm::computeKnownBits(V: I->getOperand(i: `1`), DL: IC.getDataLayout());
352	unsigned ShiftedBits = OrigBitWidth - BitWidth;
353	if (AmtKnownBits.getMaxValue().ult(RHS: BitWidth) &&
354	ShiftedBits < IC.ComputeNumSignBits(Op: I->getOperand(i: `0`), CxtI))
355	return canEvaluateTruncated(V: I->getOperand(i: `0`), Ty, IC, CxtI) &&
356	canEvaluateTruncated(V: I->getOperand(i: `1`), Ty, IC, CxtI);
357	break;
358	}
359	case Instruction::Trunc:
360	// trunc(trunc(x)) -> trunc(x)
361	return true;
362	case Instruction::ZExt:
363	case Instruction::SExt:
364	// trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
365	// trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
366	return true;
367	case Instruction::Select: {
368	SelectInst *SI = cast<SelectInst>(Val: I);
369	return canEvaluateTruncated(V: SI->getTrueValue(), Ty, IC, CxtI) &&
370	canEvaluateTruncated(V: SI->getFalseValue(), Ty, IC, CxtI);
371	}
372	case Instruction::PHI: {
373	// We can change a phi if we can change all operands. Note that we never
374	// get into trouble with cyclic PHIs here because we only consider
375	// instructions with a single use.
376	PHINode *PN = cast<PHINode>(Val: I);
377	for (Value *IncValue : PN->incoming_values())
378	if (!canEvaluateTruncated(V: IncValue, Ty, IC, CxtI))
379	return false;
380	return true;
381	}
382	case Instruction::FPToUI:
383	case Instruction::FPToSI: {
384	// If the integer type can hold the max FP value, it is safe to cast
385	// directly to that type. Otherwise, we may create poison via overflow
386	// that did not exist in the original code.
387	Type *InputTy = I->getOperand(i: `0`)->getType()->getScalarType();
388	const fltSemantics &Semantics = InputTy->getFltSemantics();
389	uint32_t MinBitWidth =
390	APFloatBase::semanticsIntSizeInBits(Semantics,
391	I->getOpcode() == Instruction::FPToSI);
392	return Ty->getScalarSizeInBits() >= MinBitWidth;
393	}
394	case Instruction::ShuffleVector:
395	return canEvaluateTruncated(V: I->getOperand(i: `0`), Ty, IC, CxtI) &&
396	canEvaluateTruncated(V: I->getOperand(i: `1`), Ty, IC, CxtI);
397	default:
398	// TODO: Can handle more cases here.
399	break;
400	}
401
402	return false;
403	}
404
405	/// Given a vector that is bitcast to an integer, optionally logically
406	/// right-shifted, and truncated, convert it to an extractelement.
407	/// Example (big endian):
408	/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
409	/// --->
410	/// extractelement <4 x i32> %X, 1
411	static Instruction *foldVecTruncToExtElt(TruncInst &Trunc,
412	InstCombinerImpl &IC) {
413	Value *TruncOp = Trunc.getOperand(i_nocapture: `0`);
414	Type *DestType = Trunc.getType();
415	if (!TruncOp->hasOneUse() \|\| !isa<IntegerType>(Val: DestType))
416	return nullptr;
417
418	Value VecInput = nullptr*;
419	ConstantInt ShiftVal = nullptr*;
420	if (!match(V: TruncOp, P: m_CombineOr(L: m_BitCast(Op: m_Value(V&: VecInput)),
421	R: m_LShr(L: m_BitCast(Op: m_Value(V&: VecInput)),
422	R: m_ConstantInt(CI&: ShiftVal)))) \|\|
423	!isa<VectorType>(Val: VecInput->getType()))
424	return nullptr;
425
426	VectorType *VecType = cast<VectorType>(Val: VecInput->getType());
427	unsigned VecWidth = VecType->getPrimitiveSizeInBits();
428	unsigned DestWidth = DestType->getPrimitiveSizeInBits();
429	unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : `0`;
430
431	if ((VecWidth % DestWidth != `0`) \|\| (ShiftAmount % DestWidth != `0`))
432	return nullptr;
433
434	// If the element type of the vector doesn't match the result type,
435	// bitcast it to a vector type that we can extract from.
436	unsigned NumVecElts = VecWidth / DestWidth;
437	if (VecType->getElementType() != DestType) {
438	VecType = FixedVectorType::get(ElementType: DestType, NumElts: NumVecElts);
439	VecInput = IC.Builder.CreateBitCast(V: VecInput, DestTy: VecType, Name: "bc");
440	}
441
442	unsigned Elt = ShiftAmount / DestWidth;
443	if (IC.getDataLayout().isBigEndian())
444	Elt = NumVecElts - `1` - Elt;
445
446	return ExtractElementInst::Create(Vec: VecInput, Idx: IC.Builder.getInt32(C: Elt));
447	}
448
449	/// Whenever an element is extracted from a vector, optionally shifted down, and
450	/// then truncated, canonicalize by converting it to a bitcast followed by an
451	/// extractelement.
452	///
453	/// Examples (little endian):
454	/// trunc (extractelement <4 x i64> %X, 0) to i32
455	/// --->
456	/// extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
457	///
458	/// trunc (lshr (extractelement <4 x i32> %X, 0), 8) to i8
459	/// --->
460	/// extractelement <16 x i8> (bitcast <4 x i32> %X to <16 x i8>), i32 1
461	static Instruction *foldVecExtTruncToExtElt(TruncInst &Trunc,
462	InstCombinerImpl &IC) {
463	Value *Src = Trunc.getOperand(i_nocapture: `0`);
464	Type *SrcType = Src->getType();
465	Type *DstType = Trunc.getType();
466
467	// Only attempt this if we have simple aliasing of the vector elements.
468	// A badly fit destination size would result in an invalid cast.
469	unsigned SrcBits = SrcType->getScalarSizeInBits();
470	unsigned DstBits = DstType->getScalarSizeInBits();
471	unsigned TruncRatio = SrcBits / DstBits;
472	if ((SrcBits % DstBits) != `0`)
473	return nullptr;
474
475	Value *VecOp;
476	ConstantInt *Cst;
477	const APInt ShiftAmount = nullptr*;
478	if (!match(V: Src, P: m_OneUse(SubPattern: m_ExtractElt(Val: m_Value(V&: VecOp), Idx: m_ConstantInt(CI&: Cst)))) &&
479	!match(V: Src,
480	P: m_OneUse(SubPattern: m_LShr(L: m_ExtractElt(Val: m_Value(V&: VecOp), Idx: m_ConstantInt(CI&: Cst)),
481	R: m_APInt(Res&: ShiftAmount)))))
482	return nullptr;
483
484	auto *VecOpTy = cast<VectorType>(Val: VecOp->getType());
485	auto VecElts = VecOpTy->getElementCount();
486
487	uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
488	uint64_t VecOpIdx = Cst->getZExtValue();
489	uint64_t NewIdx = IC.getDataLayout().isBigEndian()
490	? (VecOpIdx + `1`) * TruncRatio - `1`
491	: VecOpIdx * TruncRatio;
492
493	// Adjust index by the whole number of truncated elements.
494	if (ShiftAmount) {
495	// Check shift amount is in range and shifts a whole number of truncated
496	// elements.
497	if (ShiftAmount->uge(RHS: SrcBits) \|\| ShiftAmount->urem(RHS: DstBits) != `0`)
498	return nullptr;
499
500	uint64_t IdxOfs = ShiftAmount->udiv(RHS: DstBits).getZExtValue();
501	NewIdx = IC.getDataLayout().isBigEndian() ? (NewIdx - IdxOfs)
502	: (NewIdx + IdxOfs);
503	}
504
505	assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
506	NewIdx <= std::numeric_limits<uint32_t>::max() && "overflow 32-bits");
507
508	auto *BitCastTo =
509	VectorType::get(ElementType: DstType, NumElements: BitCastNumElts, Scalable: VecElts.isScalable());
510	Value *BitCast = IC.Builder.CreateBitCast(V: VecOp, DestTy: BitCastTo);
511	return ExtractElementInst::Create(Vec: BitCast, Idx: IC.Builder.getInt32(C: NewIdx));
512	}
513
514	/// Funnel/Rotate left/right may occur in a wider type than necessary because of
515	/// type promotion rules. Try to narrow the inputs and convert to funnel shift.
516	Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
517	assert((isa<VectorType>(Trunc.getSrcTy()) \|\|
518	shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
519	"Don't narrow to an illegal scalar type");
520
521	// Bail out on strange types. It is possible to handle some of these patterns
522	// even with non-power-of-2 sizes, but it is not a likely scenario.
523	Type *DestTy = Trunc.getType();
524	unsigned NarrowWidth = DestTy->getScalarSizeInBits();
525	unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
526	if (!isPowerOf2_32(Value: NarrowWidth))
527	return nullptr;
528
529	// First, find an or'd pair of opposite shifts:
530	// trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
531	BinaryOperator Or0, Or1;
532	if (!match(V: Trunc.getOperand(i_nocapture: `0`), P: m_OneUse(SubPattern: m_Or(L: m_BinOp(I&: Or0), R: m_BinOp(I&: Or1)))))
533	return nullptr;
534
535	Value ShVal0, ShVal1, ShAmt0, ShAmt1;
536	if (!match(V: Or0, P: m_OneUse(SubPattern: m_LogicalShift(L: m_Value(V&: ShVal0), R: m_Value(V&: ShAmt0)))) \|\|
537	!match(V: Or1, P: m_OneUse(SubPattern: m_LogicalShift(L: m_Value(V&: ShVal1), R: m_Value(V&: ShAmt1)))) \|\|
538	Or0->getOpcode() == Or1->getOpcode())
539	return nullptr;
540
541	// Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
542	if (Or0->getOpcode() == BinaryOperator::LShr) {
543	std::swap(a&: Or0, b&: Or1);
544	std::swap(a&: ShVal0, b&: ShVal1);
545	std::swap(a&: ShAmt0, b&: ShAmt1);
546	}
547	assert(Or0->getOpcode() == BinaryOperator::Shl &&
548	Or1->getOpcode() == BinaryOperator::LShr &&
549	"Illegal or(shift,shift) pair");
550
551	// Match the shift amount operands for a funnel/rotate pattern. This always
552	// matches a subtraction on the R operand.
553	auto matchShiftAmount = [&](Value L, Value R, unsigned Width) -> Value * {
554	// The shift amounts may add up to the narrow bit width:
555	// (shl ShVal0, L) \| (lshr ShVal1, Width - L)
556	// If this is a funnel shift (different operands are shifted), then the
557	// shift amount can not over-shift (create poison) in the narrow type.
558	unsigned MaxShiftAmountWidth = Log2_32(Value: NarrowWidth);
559	APInt HiBitMask = ~APInt::getLowBitsSet(numBits: WideWidth, loBitsSet: MaxShiftAmountWidth);
560	if (ShVal0 == ShVal1 \|\| MaskedValueIsZero(V: L, Mask: HiBitMask))
561	if (match(V: R, P: m_OneUse(SubPattern: m_Sub(L: m_SpecificInt(V: Width), R: m_Specific(V: L)))))
562	return L;
563
564	// The following patterns currently only work for rotation patterns.
565	// TODO: Add more general funnel-shift compatible patterns.
566	if (ShVal0 != ShVal1)
567	return nullptr;
568
569	// The shift amount may be masked with negation:
570	// (shl ShVal0, (X & (Width - 1))) \| (lshr ShVal1, ((-X) & (Width - 1)))
571	Value *X;
572	unsigned Mask = Width - `1`;
573	if (match(V: L, P: m_And(L: m_Value(V&: X), R: m_SpecificInt(V: Mask))) &&
574	match(V: R, P: m_And(L: m_Neg(V: m_Specific(V: X)), R: m_SpecificInt(V: Mask))))
575	return X;
576
577	// Same as above, but the shift amount may be extended after masking:
578	if (match(V: L, P: m_ZExt(Op: m_And(L: m_Value(V&: X), R: m_SpecificInt(V: Mask)))) &&
579	match(V: R, P: m_ZExt(Op: m_And(L: m_Neg(V: m_Specific(V: X)), R: m_SpecificInt(V: Mask)))))
580	return X;
581
582	return nullptr;
583	};
584
585	Value *ShAmt = matchShiftAmount (ShAmt0, ShAmt1, NarrowWidth);
586	bool IsFshl = true; // Sub on LSHR.
587	if (!ShAmt) {
588	ShAmt = matchShiftAmount (ShAmt1, ShAmt0, NarrowWidth);
589	IsFshl = false; // Sub on SHL.
590	}
591	if (!ShAmt)
592	return nullptr;
593
594	// The right-shifted value must have high zeros in the wide type (for example
595	// from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
596	// truncated, so those do not matter.
597	APInt HiBitMask = APInt::getHighBitsSet(numBits: WideWidth, hiBitsSet: WideWidth - NarrowWidth);
598	if (!MaskedValueIsZero(V: ShVal1, Mask: HiBitMask, CxtI: &Trunc))
599	return nullptr;
600
601	// Adjust the width of ShAmt for narrowed funnel shift operation:
602	// - Zero-extend if ShAmt is narrower than the destination type.
603	// - Truncate if ShAmt is wider, discarding non-significant high-order bits.
604	// This prepares ShAmt for llvm.fshl.i8(trunc(ShVal), trunc(ShVal),
605	// zext/trunc(ShAmt)).
606	Value *NarrowShAmt = Builder.CreateZExtOrTrunc(V: ShAmt, DestTy);
607
608	Value X, Y;
609	X = Y = Builder.CreateTrunc(V: ShVal0, DestTy);
610	if (ShVal0 != ShVal1)
611	Y = Builder.CreateTrunc(V: ShVal1, DestTy);
612	Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
613	Function *F =
614	Intrinsic::getOrInsertDeclaration(M: Trunc.getModule(), id: IID, Tys: DestTy);
615	return CallInst::Create(Func: F, Args: {X, Y, NarrowShAmt});
616	}
617
618	/// Try to narrow the width of math or bitwise logic instructions by pulling a
619	/// truncate ahead of binary operators.
620	Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
621	Type *SrcTy = Trunc.getSrcTy();
622	Type *DestTy = Trunc.getType();
623	unsigned SrcWidth = SrcTy->getScalarSizeInBits();
624	unsigned DestWidth = DestTy->getScalarSizeInBits();
625
626	if (!isa<VectorType>(Val: SrcTy) && !shouldChangeType(From: SrcTy, To: DestTy))
627	return nullptr;
628
629	BinaryOperator *BinOp;
630	if (!match(V: Trunc.getOperand(i_nocapture: `0`), P: m_OneUse(SubPattern: m_BinOp(I&: BinOp))))
631	return nullptr;
632
633	Value *BinOp0 = BinOp->getOperand(i_nocapture: `0`);
634	Value *BinOp1 = BinOp->getOperand(i_nocapture: `1`);
635	switch (BinOp->getOpcode()) {
636	case Instruction::And:
637	case Instruction::Or:
638	case Instruction::Xor:
639	case Instruction::Add:
640	case Instruction::Sub:
641	case Instruction::Mul: {
642	Constant *C;
643	if (match(V: BinOp0, P: m_Constant(C))) {
644	// trunc (binop C, X) --> binop (trunc C', X)
645	Constant *NarrowC = ConstantExpr::getTrunc(C, Ty: DestTy);
646	Value *TruncX = Builder.CreateTrunc(V: BinOp1, DestTy);
647	return BinaryOperator::Create(Op: BinOp->getOpcode(), S1: NarrowC, S2: TruncX);
648	}
649	if (match(V: BinOp1, P: m_Constant(C))) {
650	// trunc (binop X, C) --> binop (trunc X, C')
651	Constant *NarrowC = ConstantExpr::getTrunc(C, Ty: DestTy);
652	Value *TruncX = Builder.CreateTrunc(V: BinOp0, DestTy);
653	return BinaryOperator::Create(Op: BinOp->getOpcode(), S1: TruncX, S2: NarrowC);
654	}
655	Value *X;
656	if (match(V: BinOp0, P: m_ZExtOrSExt(Op: m_Value(V&: X))) && X->getType() == DestTy) {
657	// trunc (binop (ext X), Y) --> binop X, (trunc Y)
658	Value *NarrowOp1 = Builder.CreateTrunc(V: BinOp1, DestTy);
659	return BinaryOperator::Create(Op: BinOp->getOpcode(), S1: X, S2: NarrowOp1);
660	}
661	if (match(V: BinOp1, P: m_ZExtOrSExt(Op: m_Value(V&: X))) && X->getType() == DestTy) {
662	// trunc (binop Y, (ext X)) --> binop (trunc Y), X
663	Value *NarrowOp0 = Builder.CreateTrunc(V: BinOp0, DestTy);
664	return BinaryOperator::Create(Op: BinOp->getOpcode(), S1: NarrowOp0, S2: X);
665	}
666	break;
667	}
668	case Instruction::LShr:
669	case Instruction::AShr: {
670	// trunc (shr (trunc A), C) --> trunc(shr A, C)
671	Value *A;
672	Constant *C;
673	if (match(V: BinOp0, P: m_Trunc(Op: m_Value(V&: A))) && match(V: BinOp1, P: m_Constant(C))) {
674	unsigned MaxShiftAmt = SrcWidth - DestWidth;
675	// If the shift is small enough, all zero/sign bits created by the shift
676	// are removed by the trunc.
677	if (match(V: C, P: m_SpecificInt_ICMP(Predicate: ICmpInst::ICMP_ULE,
678	Threshold: APInt (SrcWidth, MaxShiftAmt)))) {
679	auto *OldShift = cast<Instruction>(Val: Trunc.getOperand(i_nocapture: `0`));
680	bool IsExact = OldShift->isExact();
681	if (Constant *ShAmt = ConstantFoldIntegerCast(C, DestTy: A->getType(),
682	/IsSigned/ true, DL)) {
683	ShAmt = Constant::mergeUndefsWith(C: ShAmt, Other: C);
684	Value *Shift =
685	OldShift->getOpcode() == Instruction::AShr
686	? Builder.CreateAShr(LHS: A, RHS: ShAmt, Name: OldShift->getName(), isExact: IsExact)
687	: Builder.CreateLShr(LHS: A, RHS: ShAmt, Name: OldShift->getName(), isExact: IsExact);
688	return CastInst::CreateTruncOrBitCast(S: Shift, Ty: DestTy);
689	}
690	}
691	}
692	break;
693	}
694	default: break;
695	}
696
697	if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
698	return NarrowOr;
699
700	return nullptr;
701	}
702
703	/// Try to narrow the width of a splat shuffle. This could be generalized to any
704	/// shuffle with a constant operand, but we limit the transform to avoid
705	/// creating a shuffle type that targets may not be able to lower effectively.
706	static Instruction *shrinkSplatShuffle(TruncInst &Trunc,
707	InstCombiner::BuilderTy &Builder) {
708	auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: Trunc.getOperand(i_nocapture: `0`));
709	if (Shuf && Shuf->hasOneUse() && match(V: Shuf->getOperand(i_nocapture: `1`), P: m_Undef()) &&
710	all_equal(Range: Shuf->getShuffleMask()) &&
711	Shuf->getType() == Shuf->getOperand(i_nocapture: `0`)->getType()) {
712	// trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask
713	// trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask
714	Value *NarrowOp = Builder.CreateTrunc(V: Shuf->getOperand(i_nocapture: `0`), DestTy: Trunc.getType());
715	return new ShuffleVectorInst (NarrowOp, Shuf->getShuffleMask());
716	}
717
718	return nullptr;
719	}
720
721	/// Try to narrow the width of an insert element. This could be generalized for
722	/// any vector constant, but we limit the transform to insertion into undef to
723	/// avoid potential backend problems from unsupported insertion widths. This
724	/// could also be extended to handle the case of inserting a scalar constant
725	/// into a vector variable.
726	static Instruction *shrinkInsertElt(CastInst &Trunc,
727	InstCombiner::BuilderTy &Builder) {
728	Instruction::CastOps Opcode = Trunc.getOpcode();
729	assert((Opcode == Instruction::Trunc \|\| Opcode == Instruction::FPTrunc) &&
730	"Unexpected instruction for shrinking");
731
732	auto *InsElt = dyn_cast<InsertElementInst>(Val: Trunc.getOperand(i_nocapture: `0`));
733	if (!InsElt \|\| !InsElt->hasOneUse())
734	return nullptr;
735
736	Type *DestTy = Trunc.getType();
737	Type *DestScalarTy = DestTy->getScalarType();
738	Value *VecOp = InsElt->getOperand(i_nocapture: `0`);
739	Value *ScalarOp = InsElt->getOperand(i_nocapture: `1`);
740	Value *Index = InsElt->getOperand(i_nocapture: `2`);
741
742	if (match(V: VecOp, P: m_Undef())) {
743	// trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
744	// fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index
745	UndefValue *NarrowUndef = UndefValue::get(T: DestTy);
746	Value *NarrowOp = Builder.CreateCast(Op: Opcode, V: ScalarOp, DestTy: DestScalarTy);
747	return InsertElementInst::Create(Vec: NarrowUndef, NewElt: NarrowOp, Idx: Index);
748	}
749
750	return nullptr;
751	}
752
753	Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
754	if (Instruction *Result = commonCastTransforms(CI&: Trunc))
755	return Result;
756
757	Value *Src = Trunc.getOperand(i_nocapture: `0`);
758	Type DestTy = Trunc.getType(), SrcTy = Src->getType();
759	unsigned DestWidth = DestTy->getScalarSizeInBits();
760	unsigned SrcWidth = SrcTy->getScalarSizeInBits();
761
762	// Attempt to truncate the entire input expression tree to the destination
763	// type. Only do this if the dest type is a simple type, don't convert the
764	// expression tree to something weird like i93 unless the source is also
765	// strange.
766	if ((DestTy->isVectorTy() \|\| shouldChangeType(From: SrcTy, To: DestTy)) &&
767	canEvaluateTruncated(V: Src, Ty: DestTy, IC&: *this, CxtI: &Trunc)) {
768
769	// If this cast is a truncate, evaluting in a different type always
770	// eliminates the cast, so it is always a win.
771	LLVM_DEBUG(
772	dbgs() << "ICE: EvaluateInDifferentType converting expression type"
773	" to avoid cast: "
774	<< Trunc << `'\n'`);
775	Value Res = EvaluateInDifferentType(V: Src, Ty: DestTy, isSigned: false*);
776	assert(Res->getType() == DestTy);
777	return replaceInstUsesWith(I&: Trunc, V: Res);
778	}
779
780	// For integer types, check if we can shorten the entire input expression to
781	// DestWidth 2, which won't allow removing the truncate, but reducing the*
782	// width may enable further optimizations, e.g. allowing for larger
783	// vectorization factors.
784	if (auto *DestITy = dyn_cast<IntegerType>(Val: DestTy)) {
785	if (DestWidth * `2` < SrcWidth) {
786	auto *NewDestTy = DestITy->getExtendedType();
787	if (shouldChangeType(From: SrcTy, To: NewDestTy) &&
788	canEvaluateTruncated(V: Src, Ty: NewDestTy, IC&: *this, CxtI: &Trunc)) {
789	LLVM_DEBUG(
790	dbgs() << "ICE: EvaluateInDifferentType converting expression type"
791	" to reduce the width of operand of"
792	<< Trunc << `'\n'`);
793	Value Res = EvaluateInDifferentType(V: Src, Ty: NewDestTy, isSigned: false*);
794	return new TruncInst (Res, DestTy);
795	}
796	}
797	}
798
799	// See if we can simplify any instructions used by the input whose sole
800	// purpose is to compute bits we don't care about.
801	if (SimplifyDemandedInstructionBits(Inst&: Trunc))
802	return &Trunc;
803
804	if (DestWidth == `1`) {
805	Value *Zero = Constant::getNullValue(Ty: SrcTy);
806
807	Value *X;
808	const APInt *C1;
809	Constant *C2;
810	if (match(V: Src, P: m_OneUse(SubPattern: m_Shr(L: m_Shl(L: m_Power2(V&: C1), R: m_Value(V&: X)),
811	R: m_ImmConstant(C&: C2))))) {
812	// trunc ((C1 << X) >> C2) to i1 --> X == (C2-cttz(C1)), where C1 is pow2
813	Constant *Log2C1 = ConstantInt::get(Ty: SrcTy, V: C1->exactLogBase2());
814	Constant *CmpC = ConstantExpr::getSub(C1: C2, C2: Log2C1);
815	return new ICmpInst (ICmpInst::ICMP_EQ, X, CmpC);
816	}
817
818	if (match(V: Src, P: m_Shr(L: m_Value(V&: X), R: m_SpecificInt(V: SrcWidth - `1`)))) {
819	// trunc (ashr X, BW-1) to i1 --> icmp slt X, 0
820	// trunc (lshr X, BW-1) to i1 --> icmp slt X, 0
821	return new ICmpInst (ICmpInst::ICMP_SLT, X, Zero);
822	}
823
824	Constant *C;
825	if (match(V: Src, P: m_OneUse(SubPattern: m_LShr(L: m_Value(V&: X), R: m_ImmConstant(C))))) {
826	// trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
827	Constant *One = ConstantInt::get(Ty: SrcTy, V: APInt (SrcWidth, `1`));
828	Value *MaskC = Builder.CreateShl(LHS: One, RHS: C);
829	Value *And = Builder.CreateAnd(LHS: X, RHS: MaskC);
830	return new ICmpInst (ICmpInst::ICMP_NE, And, Zero);
831	}
832	if (match(V: Src, P: m_OneUse(SubPattern: m_c_Or(L: m_LShr(L: m_Value(V&: X), R: m_ImmConstant(C)),
833	R: m_Deferred(V: X))))) {
834	// trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
835	Constant *One = ConstantInt::get(Ty: SrcTy, V: APInt (SrcWidth, `1`));
836	Value *MaskC = Builder.CreateShl(LHS: One, RHS: C);
837	Value *And = Builder.CreateAnd(LHS: X, RHS: Builder.CreateOr(LHS: MaskC, RHS: One));
838	return new ICmpInst (ICmpInst::ICMP_NE, And, Zero);
839	}
840
841	{
842	const APInt *C;
843	if (match(V: Src, P: m_Shl(L: m_APInt(Res&: C), R: m_Value(V&: X))) && (*C)[`0`] == `1`) {
844	// trunc (C << X) to i1 --> X == 0, where C is odd
845	return new ICmpInst (ICmpInst::Predicate::ICMP_EQ, X, Zero);
846	}
847	}
848
849	if (Trunc.hasNoUnsignedWrap() \|\| Trunc.hasNoSignedWrap()) {
850	Value X, Y;
851	if (match(V: Src, P: m_Xor(L: m_Value(V&: X), R: m_Value(V&: Y))))
852	return new ICmpInst (ICmpInst::ICMP_NE, X, Y);
853	}
854	}
855
856	Value A, B;
857	Constant *C;
858	if (match(V: Src, P: m_LShr(L: m_SExt(Op: m_Value(V&: A)), R: m_Constant(C)))) {
859	unsigned AWidth = A->getType()->getScalarSizeInBits();
860	unsigned MaxShiftAmt = SrcWidth - std::max(a: DestWidth, b: AWidth);
861	auto *OldSh = cast<Instruction>(Val: Src);
862	bool IsExact = OldSh->isExact();
863
864	// If the shift is small enough, all zero bits created by the shift are
865	// removed by the trunc.
866	if (match(V: C, P: m_SpecificInt_ICMP(Predicate: ICmpInst::ICMP_ULE,
867	Threshold: APInt (SrcWidth, MaxShiftAmt)))) {
868	auto GetNewShAmt = [&](unsigned Width) {
869	Constant MaxAmt = ConstantInt::get(Ty: SrcTy, V: Width - `1`, IsSigned: false*);
870	Constant *Cmp =
871	ConstantFoldCompareInstOperands(Predicate: ICmpInst::ICMP_ULT, LHS: C, RHS: MaxAmt, DL);
872	Constant *ShAmt = ConstantFoldSelectInstruction(Cond: Cmp, V1: C, V2: MaxAmt);
873	return ConstantFoldCastOperand(Opcode: Instruction::Trunc, C: ShAmt, DestTy: A->getType(),
874	DL);
875	};
876
877	// trunc (lshr (sext A), C) --> ashr A, C
878	if (A->getType() == DestTy) {
879	Constant *ShAmt = GetNewShAmt (DestWidth);
880	ShAmt = Constant::mergeUndefsWith(C: ShAmt, Other: C);
881	return IsExact ? BinaryOperator::CreateExactAShr(V1: A, V2: ShAmt)
882	: BinaryOperator::CreateAShr(V1: A, V2: ShAmt);
883	}
884	// The types are mismatched, so create a cast after shifting:
885	// trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
886	if (Src->hasOneUse()) {
887	Constant *ShAmt = GetNewShAmt (AWidth);
888	Value *Shift = Builder.CreateAShr(LHS: A, RHS: ShAmt, Name: "", isExact: IsExact);
889	return CastInst::CreateIntegerCast(S: Shift, Ty: DestTy, isSigned: true);
890	}
891	}
892	// TODO: Mask high bits with 'and'.
893	}
894
895	if (Instruction *I = narrowBinOp(Trunc))
896	return I;
897
898	if (Instruction *I = shrinkSplatShuffle(Trunc, Builder))
899	return I;
900
901	if (Instruction *I = shrinkInsertElt(Trunc, Builder))
902	return I;
903
904	if (Src->hasOneUse() &&
905	(isa<VectorType>(Val: SrcTy) \|\| shouldChangeType(From: SrcTy, To: DestTy))) {
906	// Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
907	// dest type is native and cst < dest size.
908	if (match(V: Src, P: m_Shl(L: m_Value(V&: A), R: m_Constant(C))) &&
909	!match(V: A, P: m_Shr(L: m_Value(), R: m_Constant()))) {
910	// Skip shifts of shift by constants. It undoes a combine in
911	// FoldShiftByConstant and is the extend in reg pattern.
912	APInt Threshold = APInt (C->getType()->getScalarSizeInBits(), DestWidth);
913	if (match(V: C, P: m_SpecificInt_ICMP(Predicate: ICmpInst::ICMP_ULT, Threshold))) {
914	Value *NewTrunc = Builder.CreateTrunc(V: A, DestTy, Name: A->getName() + ".tr");
915	return BinaryOperator::Create(Op: Instruction::Shl, S1: NewTrunc,
916	S2: ConstantExpr::getTrunc(C, Ty: DestTy));
917	}
918	}
919	}
920
921	if (Instruction I = foldVecTruncToExtElt(Trunc, IC&: this))
922	return I;
923
924	if (Instruction I = foldVecExtTruncToExtElt(Trunc, IC&: this))
925	return I;
926
927	// trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)
928	if (match(V: Src, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ctlz>(Op0: m_ZExt(Op: m_Value(V&: A)),
929	Op1: m_Value(V&: B))))) {
930	unsigned AWidth = A->getType()->getScalarSizeInBits();
931	if (AWidth == DestWidth && AWidth > Log2_32(Value: SrcWidth)) {
932	Value *WidthDiff = ConstantInt::get(Ty: A->getType(), V: SrcWidth - AWidth);
933	Value *NarrowCtlz =
934	Builder.CreateIntrinsic(ID: Intrinsic::ctlz, Types: {Trunc.getType()}, Args: {A, B});
935	return BinaryOperator::CreateAdd(V1: NarrowCtlz, V2: WidthDiff);
936	}
937	}
938
939	if (match(V: Src, P: m_VScale())) {
940	if (Trunc.getFunction() &&
941	Trunc.getFunction()->hasFnAttribute(Kind: Attribute::VScaleRange)) {
942	Attribute Attr =
943	Trunc.getFunction()->getFnAttribute(Kind: Attribute::VScaleRange);
944	if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax())
945	if (Log2_32(Value: *MaxVScale) < DestWidth)
946	return replaceInstUsesWith(I&: Trunc, V: Builder.CreateVScale(Ty: DestTy));
947	}
948	}
949
950	if (DestWidth == `1` &&
951	(Trunc.hasNoUnsignedWrap() \|\| Trunc.hasNoSignedWrap()) &&
952	isKnownNonZero(V: Src, Q: SQ.getWithInstruction(I: &Trunc)))
953	return replaceInstUsesWith(I&: Trunc, V: ConstantInt::getTrue(Ty: DestTy));
954
955	bool Changed = false;
956	if (!Trunc.hasNoSignedWrap() &&
957	ComputeMaxSignificantBits(Op: Src, CxtI: &Trunc) <= DestWidth) {
958	Trunc.setHasNoSignedWrap(true);
959	Changed = true;
960	}
961	if (!Trunc.hasNoUnsignedWrap() &&
962	MaskedValueIsZero(V: Src, Mask: APInt::getBitsSetFrom(numBits: SrcWidth, loBit: DestWidth),
963	CxtI: &Trunc)) {
964	Trunc.setHasNoUnsignedWrap(true);
965	Changed = true;
966	}
967
968	return Changed ? &Trunc : nullptr;
969	}
970
971	Instruction InstCombinerImpl::transformZExtICmp(ICmpInst Cmp,
972	ZExtInst &Zext) {
973	// If we are just checking for a icmp eq of a single bit and zext'ing it
974	// to an integer, then shift the bit to the appropriate place and then
975	// cast to integer to avoid the comparison.
976
977	// FIXME: This set of transforms does not check for extra uses and/or creates
978	// an extra instruction (an optional final cast is not included
979	// in the transform comments). We may also want to favor icmp over
980	// shifts in cases of equal instructions because icmp has better
981	// analysis in general (invert the transform).
982
983	const APInt *Op1CV;
984	if (match(V: Cmp->getOperand(i_nocapture: `1`), P: m_APInt(Res&: Op1CV))) {
985
986	// zext (x <s 0) to i32 --> x>>u31 true if signbit set.
987	if (Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) {
988	Value *In = Cmp->getOperand(i_nocapture: `0`);
989	Value *Sh = ConstantInt::get(Ty: In->getType(),
990	V: In->getType()->getScalarSizeInBits() - `1`);
991	In = Builder.CreateLShr(LHS: In, RHS: Sh, Name: In->getName() + ".lobit");
992	if (In->getType() != Zext.getType())
993	In = Builder.CreateIntCast(V: In, DestTy: Zext.getType(), isSigned: false /ZExt/);
994
995	return replaceInstUsesWith(I&: Zext, V: In);
996	}
997
998	// zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
999	// zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
1000	// zext (X != 0) to i32 --> X iff X has only the low bit set.
1001	// zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
1002
1003	if (Op1CV->isZero() && Cmp->isEquality()) {
1004	// Exactly 1 possible 1? But not the high-bit because that is
1005	// canonicalized to this form.
1006	KnownBits Known = computeKnownBits(V: Cmp->getOperand(i_nocapture: `0`), CxtI: &Zext);
1007	APInt KnownZeroMask(~Known.Zero);
1008	uint32_t ShAmt = KnownZeroMask.logBase2();
1009	bool IsExpectShAmt = KnownZeroMask.isPowerOf2() &&
1010	(Zext.getType()->getScalarSizeInBits() != ShAmt + `1`);
1011	if (IsExpectShAmt &&
1012	(Cmp->getOperand(i_nocapture: `0`)->getType() == Zext.getType() \|\|
1013	Cmp->getPredicate() == ICmpInst::ICMP_NE \|\| ShAmt == `0`)) {
1014	Value *In = Cmp->getOperand(i_nocapture: `0`);
1015	if (ShAmt) {
1016	// Perform a logical shr by shiftamt.
1017	// Insert the shift to put the result in the low bit.
1018	In = Builder.CreateLShr(LHS: In, RHS: ConstantInt::get(Ty: In->getType(), V: ShAmt),
1019	Name: In->getName() + ".lobit");
1020	}
1021
1022	// Toggle the low bit for "X == 0".
1023	if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1024	In = Builder.CreateXor(LHS: In, RHS: ConstantInt::get(Ty: In->getType(), V: `1`));
1025
1026	if (Zext.getType() == In->getType())
1027	return replaceInstUsesWith(I&: Zext, V: In);
1028
1029	Value IntCast = Builder.CreateIntCast(V: In, DestTy: Zext.getType(), isSigned: false*);
1030	return replaceInstUsesWith(I&: Zext, V: IntCast);
1031	}
1032	}
1033	}
1034
1035	if (Cmp->isEquality()) {
1036	// Test if a bit is clear/set using a shifted-one mask:
1037	// zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
1038	// zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
1039	Value X, ShAmt;
1040	if (Cmp->hasOneUse() && match(V: Cmp->getOperand(i_nocapture: `1`), P: m_ZeroInt()) &&
1041	match(V: Cmp->getOperand(i_nocapture: `0`),
1042	P: m_OneUse(SubPattern: m_c_And(L: m_Shl(L: m_One(), R: m_Value(V&: ShAmt)), R: m_Value(V&: X))))) {
1043	auto *And = cast<BinaryOperator>(Val: Cmp->getOperand(i_nocapture: `0`));
1044	Value *Shift = And->getOperand(i_nocapture: X == And->getOperand(i_nocapture: `0`) ? `1` : `0`);
1045	if (Zext.getType() == And->getType() \|\|
1046	Cmp->getPredicate() != ICmpInst::ICMP_EQ \|\| Shift->hasOneUse()) {
1047	if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1048	X = Builder.CreateNot(V: X);
1049	Value *Lshr = Builder.CreateLShr(LHS: X, RHS: ShAmt);
1050	Value *And1 =
1051	Builder.CreateAnd(LHS: Lshr, RHS: ConstantInt::get(Ty: X->getType(), V: `1`));
1052	return replaceInstUsesWith(
1053	I&: Zext, V: Builder.CreateZExtOrTrunc(V: And1, DestTy: Zext.getType()));
1054	}
1055	}
1056	}
1057
1058	return nullptr;
1059	}
1060
1061	/// Determine if the specified value can be computed in the specified wider type
1062	/// and produce the same low bits. If not, return false.
1063	///
1064	/// If this function returns true, it can also return a non-zero number of bits
1065	/// (in BitsToClear) which indicates that the value it computes is correct for
1066	/// the zero extend, but that the additional BitsToClear bits need to be zero'd
1067	/// out. For example, to promote something like:
1068	///
1069	/// %B = trunc i64 %A to i32
1070	/// %C = lshr i32 %B, 8
1071	/// %E = zext i32 %C to i64
1072	///
1073	/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
1074	/// set to 8 to indicate that the promoted value needs to have bits 24-31
1075	/// cleared in addition to bits 32-63. Since an 'and' will be generated to
1076	/// clear the top bits anyway, doing this has no extra cost.
1077	///
1078	/// This function works on both vectors and scalars.
1079	static bool canEvaluateZExtd(Value V, Type Ty, unsigned &BitsToClear,
1080	InstCombinerImpl &IC, Instruction *CxtI) {
1081	BitsToClear = `0`;
1082	if (canAlwaysEvaluateInType(V, Ty))
1083	return true;
1084	if (canNotEvaluateInType(V, Ty))
1085	return false;
1086
1087	auto *I = cast<Instruction>(Val: V);
1088	unsigned Tmp;
1089	switch (I->getOpcode()) {
1090	case Instruction::ZExt: // zext(zext(x)) -> zext(x).
1091	case Instruction::SExt: // zext(sext(x)) -> sext(x).
1092	case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
1093	return true;
1094	case Instruction::And:
1095	case Instruction::Or:
1096	case Instruction::Xor:
1097	case Instruction::Add:
1098	case Instruction::Sub:
1099	case Instruction::Mul:
1100	if (!canEvaluateZExtd(V: I->getOperand(i: `0`), Ty, BitsToClear, IC, CxtI) \|\|
1101	!canEvaluateZExtd(V: I->getOperand(i: `1`), Ty, BitsToClear&: Tmp, IC, CxtI))
1102	return false;
1103	// These can all be promoted if neither operand has 'bits to clear'.
1104	if (BitsToClear == `0` && Tmp == `0`)
1105	return true;
1106
1107	// If the operation is an AND/OR/XOR and the bits to clear are zero in the
1108	// other side, BitsToClear is ok.
1109	if (Tmp == `0` && I->isBitwiseLogicOp()) {
1110	// We use MaskedValueIsZero here for generality, but the case we care
1111	// about the most is constant RHS.
1112	unsigned VSize = V->getType()->getScalarSizeInBits();
1113	if (IC.MaskedValueIsZero(V: I->getOperand(i: `1`),
1114	Mask: APInt::getHighBitsSet(numBits: VSize, hiBitsSet: BitsToClear),
1115	CxtI)) {
1116	// If this is an And instruction and all of the BitsToClear are
1117	// known to be zero we can reset BitsToClear.
1118	if (I->getOpcode() == Instruction::And)
1119	BitsToClear = `0`;
1120	return true;
1121	}
1122	}
1123
1124	// Otherwise, we don't know how to analyze this BitsToClear case yet.
1125	return false;
1126
1127	case Instruction::Shl: {
1128	// We can promote shl(x, cst) if we can promote x. Since shl overwrites the
1129	// upper bits we can reduce BitsToClear by the shift amount.
1130	const APInt *Amt;
1131	if (match(V: I->getOperand(i: `1`), P: m_APInt(Res&: Amt))) {
1132	if (!canEvaluateZExtd(V: I->getOperand(i: `0`), Ty, BitsToClear, IC, CxtI))
1133	return false;
1134	uint64_t ShiftAmt = Amt->getZExtValue();
1135	BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : `0`;
1136	return true;
1137	}
1138	return false;
1139	}
1140	case Instruction::LShr: {
1141	// We can promote lshr(x, cst) if we can promote x. This requires the
1142	// ultimate 'and' to clear out the high zero bits we're clearing out though.
1143	const APInt *Amt;
1144	if (match(V: I->getOperand(i: `1`), P: m_APInt(Res&: Amt))) {
1145	if (!canEvaluateZExtd(V: I->getOperand(i: `0`), Ty, BitsToClear, IC, CxtI))
1146	return false;
1147	BitsToClear += Amt->getZExtValue();
1148	if (BitsToClear > V->getType()->getScalarSizeInBits())
1149	BitsToClear = V->getType()->getScalarSizeInBits();
1150	return true;
1151	}
1152	// Cannot promote variable LSHR.
1153	return false;
1154	}
1155	case Instruction::Select:
1156	if (!canEvaluateZExtd(V: I->getOperand(i: `1`), Ty, BitsToClear&: Tmp, IC, CxtI) \|\|
1157	!canEvaluateZExtd(V: I->getOperand(i: `2`), Ty, BitsToClear, IC, CxtI) \|\|
1158	// TODO: If important, we could handle the case when the BitsToClear are
1159	// known zero in the disagreeing side.
1160	Tmp != BitsToClear)
1161	return false;
1162	return true;
1163
1164	case Instruction::PHI: {
1165	// We can change a phi if we can change all operands. Note that we never
1166	// get into trouble with cyclic PHIs here because we only consider
1167	// instructions with a single use.
1168	PHINode *PN = cast<PHINode>(Val: I);
1169	if (!canEvaluateZExtd(V: PN->getIncomingValue(i: `0`), Ty, BitsToClear, IC, CxtI))
1170	return false;
1171	for (unsigned i = `1`, e = PN->getNumIncomingValues(); i != e; ++i)
1172	if (!canEvaluateZExtd(V: PN->getIncomingValue(i), Ty, BitsToClear&: Tmp, IC, CxtI) \|\|
1173	// TODO: If important, we could handle the case when the BitsToClear
1174	// are known zero in the disagreeing input.
1175	Tmp != BitsToClear)
1176	return false;
1177	return true;
1178	}
1179	case Instruction::Call:
1180	// llvm.vscale() can always be executed in larger type, because the
1181	// value is automatically zero-extended.
1182	if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I))
1183	if (II->getIntrinsicID() == Intrinsic::vscale)
1184	return true;
1185	return false;
1186	default:
1187	// TODO: Can handle more cases here.
1188	return false;
1189	}
1190	}
1191
1192	Instruction *InstCombinerImpl::visitZExt(ZExtInst &Zext) {
1193	// If this zero extend is only used by a truncate, let the truncate be
1194	// eliminated before we try to optimize this zext.
1195	if (Zext.hasOneUse() && isa<TruncInst>(Val: Zext.user_back()) &&
1196	!isa<Constant>(Val: Zext.getOperand(i_nocapture: `0`)))
1197	return nullptr;
1198
1199	// If one of the common conversion will work, do it.
1200	if (Instruction *Result = commonCastTransforms(CI&: Zext))
1201	return Result;
1202
1203	Value *Src = Zext.getOperand(i_nocapture: `0`);
1204	Type SrcTy = Src->getType(), DestTy = Zext.getType();
1205
1206	// zext nneg bool x -> 0
1207	if (SrcTy->isIntOrIntVectorTy(BitWidth: `1`) && Zext.hasNonNeg())
1208	return replaceInstUsesWith(I&: Zext, V: Constant::getNullValue(Ty: Zext.getType()));
1209
1210	// Try to extend the entire expression tree to the wide destination type.
1211	unsigned BitsToClear;
1212	if (shouldChangeType(From: SrcTy, To: DestTy) &&
1213	canEvaluateZExtd(V: Src, Ty: DestTy, BitsToClear, IC&: *this, CxtI: &Zext)) {
1214	assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
1215	"Can't clear more bits than in SrcTy");
1216
1217	// Okay, we can transform this! Insert the new expression now.
1218	LLVM_DEBUG(
1219	dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1220	" to avoid zero extend: "
1221	<< Zext << `'\n'`);
1222	Value Res = EvaluateInDifferentType(V: Src, Ty: DestTy, isSigned: false*);
1223	assert(Res->getType() == DestTy);
1224
1225	// Preserve debug values referring to Src if the zext is its last use.
1226	if (auto *SrcOp = dyn_cast<Instruction>(Val: Src))
1227	if (SrcOp->hasOneUse())
1228	replaceAllDbgUsesWith(From&: SrcOp, To&: Res, DomPoint&: Zext, DT);
1229
1230	uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits() - BitsToClear;
1231	uint32_t DestBitSize = DestTy->getScalarSizeInBits();
1232
1233	// If the high bits are already filled with zeros, just replace this
1234	// cast with the result.
1235	if (MaskedValueIsZero(
1236	V: Res, Mask: APInt::getHighBitsSet(numBits: DestBitSize, hiBitsSet: DestBitSize - SrcBitsKept),
1237	CxtI: &Zext))
1238	return replaceInstUsesWith(I&: Zext, V: Res);
1239
1240	// We need to emit an AND to clear the high bits.
1241	Constant *C = ConstantInt::get(Ty: Res->getType(),
1242	V: APInt::getLowBitsSet(numBits: DestBitSize, loBitsSet: SrcBitsKept));
1243	return BinaryOperator::CreateAnd(V1: Res, V2: C);
1244	}
1245
1246	// If this is a TRUNC followed by a ZEXT then we are dealing with integral
1247	// types and if the sizes are just right we can convert this into a logical
1248	// 'and' which will be much cheaper than the pair of casts.
1249	if (auto CSrc = dyn_cast<TruncInst>(Val: Src)) { // A->B->C cast*
1250	// TODO: Subsume this into EvaluateInDifferentType.
1251
1252	// Get the sizes of the types involved. We know that the intermediate type
1253	// will be smaller than A or C, but don't know the relation between A and C.
1254	Value *A = CSrc->getOperand(i_nocapture: `0`);
1255	unsigned SrcSize = A->getType()->getScalarSizeInBits();
1256	unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
1257	unsigned DstSize = DestTy->getScalarSizeInBits();
1258	// If we're actually extending zero bits, then if
1259	// SrcSize < DstSize: zext(a & mask)
1260	// SrcSize == DstSize: a & mask
1261	// SrcSize > DstSize: trunc(a) & mask
1262	if (SrcSize < DstSize) {
1263	APInt AndValue(APInt::getLowBitsSet(numBits: SrcSize, loBitsSet: MidSize));
1264	Constant *AndConst = ConstantInt::get(Ty: A->getType(), V: AndValue);
1265	Value *And = Builder.CreateAnd(LHS: A, RHS: AndConst, Name: CSrc->getName() + ".mask");
1266	return new ZExtInst (And, DestTy);
1267	}
1268
1269	if (SrcSize == DstSize) {
1270	APInt AndValue(APInt::getLowBitsSet(numBits: SrcSize, loBitsSet: MidSize));
1271	return BinaryOperator::CreateAnd(V1: A, V2: ConstantInt::get(Ty: A->getType(),
1272	V: AndValue));
1273	}
1274	if (SrcSize > DstSize) {
1275	Value *Trunc = Builder.CreateTrunc(V: A, DestTy);
1276	APInt AndValue(APInt::getLowBitsSet(numBits: DstSize, loBitsSet: MidSize));
1277	return BinaryOperator::CreateAnd(V1: Trunc,
1278	V2: ConstantInt::get(Ty: Trunc->getType(),
1279	V: AndValue));
1280	}
1281	}
1282
1283	if (auto *Cmp = dyn_cast<ICmpInst>(Val: Src))
1284	return transformZExtICmp(Cmp, Zext);
1285
1286	// zext(trunc(X) & C) -> (X & zext(C)).
1287	Constant *C;
1288	Value *X;
1289	if (match(V: Src, P: m_OneUse(SubPattern: m_And(L: m_Trunc(Op: m_Value(V&: X)), R: m_Constant(C)))) &&
1290	X->getType() == DestTy)
1291	return BinaryOperator::CreateAnd(V1: X, V2: Builder.CreateZExt(V: C, DestTy));
1292
1293	// zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
1294	Value *And;
1295	if (match(V: Src, P: m_OneUse(SubPattern: m_Xor(L: m_Value(V&: And), R: m_Constant(C)))) &&
1296	match(V: And, P: m_OneUse(SubPattern: m_And(L: m_Trunc(Op: m_Value(V&: X)), R: m_Specific(V: C)))) &&
1297	X->getType() == DestTy) {
1298	Value *ZC = Builder.CreateZExt(V: C, DestTy);
1299	return BinaryOperator::CreateXor(V1: Builder.CreateAnd(LHS: X, RHS: ZC), V2: ZC);
1300	}
1301
1302	// If we are truncating, masking, and then zexting back to the original type,
1303	// that's just a mask. This is not handled by canEvaluateZextd if the
1304	// intermediate values have extra uses. This could be generalized further for
1305	// a non-constant mask operand.
1306	// zext (and (trunc X), C) --> and X, (zext C)
1307	if (match(V: Src, P: m_And(L: m_Trunc(Op: m_Value(V&: X)), R: m_Constant(C))) &&
1308	X->getType() == DestTy) {
1309	Value *ZextC = Builder.CreateZExt(V: C, DestTy);
1310	return BinaryOperator::CreateAnd(V1: X, V2: ZextC);
1311	}
1312
1313	if (match(V: Src, P: m_VScale())) {
1314	if (Zext.getFunction() &&
1315	Zext.getFunction()->hasFnAttribute(Kind: Attribute::VScaleRange)) {
1316	Attribute Attr =
1317	Zext.getFunction()->getFnAttribute(Kind: Attribute::VScaleRange);
1318	if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1319	unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
1320	if (Log2_32(Value: *MaxVScale) < TypeWidth)
1321	return replaceInstUsesWith(I&: Zext, V: Builder.CreateVScale(Ty: DestTy));
1322	}
1323	}
1324	}
1325
1326	if (!Zext.hasNonNeg()) {
1327	// If this zero extend is only used by a shift, add nneg flag.
1328	if (Zext.hasOneUse() &&
1329	SrcTy->getScalarSizeInBits() >
1330	Log2_64_Ceil(Value: DestTy->getScalarSizeInBits()) &&
1331	match(V: Zext.user_back(), P: m_Shift(L: m_Value(), R: m_Specific(V: &Zext)))) {
1332	Zext.setNonNeg();
1333	return &Zext;
1334	}
1335
1336	if (isKnownNonNegative(V: Src, SQ: SQ.getWithInstruction(I: &Zext))) {
1337	Zext.setNonNeg();
1338	return &Zext;
1339	}
1340	}
1341
1342	return nullptr;
1343	}
1344
1345	/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
1346	Instruction InstCombinerImpl::transformSExtICmp(ICmpInst Cmp,
1347	SExtInst &Sext) {
1348	Value Op0 = Cmp->getOperand(i_nocapture: `0`), Op1 = Cmp->getOperand(i_nocapture: `1`);
1349	ICmpInst::Predicate Pred = Cmp->getPredicate();
1350
1351	// Don't bother if Op1 isn't of vector or integer type.
1352	if (!Op1->getType()->isIntOrIntVectorTy())
1353	return nullptr;
1354
1355	if (Pred == ICmpInst::ICMP_SLT && match(V: Op1, P: m_ZeroInt())) {
1356	// sext (x <s 0) --> ashr x, 31 (all ones if negative)
1357	Value *Sh = ConstantInt::get(Ty: Op0->getType(),
1358	V: Op0->getType()->getScalarSizeInBits() - `1`);
1359	Value *In = Builder.CreateAShr(LHS: Op0, RHS: Sh, Name: Op0->getName() + ".lobit");
1360	if (In->getType() != Sext.getType())
1361	In = Builder.CreateIntCast(V: In, DestTy: Sext.getType(), isSigned: true /SExt/);
1362
1363	return replaceInstUsesWith(I&: Sext, V: In);
1364	}
1365
1366	if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Val: Op1)) {
1367	// If we know that only one bit of the LHS of the icmp can be set and we
1368	// have an equality comparison with zero or a power of 2, we can transform
1369	// the icmp and sext into bitwise/integer operations.
1370	if (Cmp->hasOneUse() &&
1371	Cmp->isEquality() && (Op1C->isZero() \|\| Op1C->getValue().isPowerOf2())){
1372	KnownBits Known = computeKnownBits(V: Op0, CxtI: &Sext);
1373
1374	APInt KnownZeroMask(~Known.Zero);
1375	if (KnownZeroMask.isPowerOf2()) {
1376	Value *In = Cmp->getOperand(i_nocapture: `0`);
1377
1378	// If the icmp tests for a known zero bit we can constant fold it.
1379	if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
1380	Value *V = Pred == ICmpInst::ICMP_NE ?
1381	ConstantInt::getAllOnesValue(Ty: Sext.getType()) :
1382	ConstantInt::getNullValue(Ty: Sext.getType());
1383	return replaceInstUsesWith(I&: Sext, V);
1384	}
1385
1386	if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
1387	// sext ((x & 2^n) == 0) -> (x >> n) - 1
1388	// sext ((x & 2^n) != 2^n) -> (x >> n) - 1
1389	unsigned ShiftAmt = KnownZeroMask.countr_zero();
1390	// Perform a right shift to place the desired bit in the LSB.
1391	if (ShiftAmt)
1392	In = Builder.CreateLShr(LHS: In,
1393	RHS: ConstantInt::get(Ty: In->getType(), V: ShiftAmt));
1394
1395	// At this point "In" is either 1 or 0. Subtract 1 to turn
1396	// {1, 0} -> {0, -1}.
1397	In = Builder.CreateAdd(LHS: In,
1398	RHS: ConstantInt::getAllOnesValue(Ty: In->getType()),
1399	Name: "sext");
1400	} else {
1401	// sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
1402	// sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
1403	unsigned ShiftAmt = KnownZeroMask.countl_zero();
1404	// Perform a left shift to place the desired bit in the MSB.
1405	if (ShiftAmt)
1406	In = Builder.CreateShl(LHS: In,
1407	RHS: ConstantInt::get(Ty: In->getType(), V: ShiftAmt));
1408
1409	// Distribute the bit over the whole bit width.
1410	In = Builder.CreateAShr(LHS: In, RHS: ConstantInt::get(Ty: In->getType(),
1411	V: KnownZeroMask.getBitWidth() - `1`), Name: "sext");
1412	}
1413
1414	if (Sext.getType() == In->getType())
1415	return replaceInstUsesWith(I&: Sext, V: In);
1416	return CastInst::CreateIntegerCast(S: In, Ty: Sext.getType(), isSigned: true/SExt/);
1417	}
1418	}
1419	}
1420
1421	return nullptr;
1422	}
1423
1424	/// Return true if we can take the specified value and return it as type Ty
1425	/// without inserting any new casts and without changing the value of the common
1426	/// low bits. This is used by code that tries to promote integer operations to
1427	/// a wider types will allow us to eliminate the extension.
1428	///
1429	/// This function works on both vectors and scalars.
1430	///
1431	static bool canEvaluateSExtd(Value V, Type Ty) {
1432	assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
1433	"Can't sign extend type to a smaller type");
1434	if (canAlwaysEvaluateInType(V, Ty))
1435	return true;
1436	if (canNotEvaluateInType(V, Ty))
1437	return false;
1438
1439	auto *I = cast<Instruction>(Val: V);
1440	switch (I->getOpcode()) {
1441	case Instruction::SExt: // sext(sext(x)) -> sext(x)
1442	case Instruction::ZExt: // sext(zext(x)) -> zext(x)
1443	case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
1444	return true;
1445	case Instruction::And:
1446	case Instruction::Or:
1447	case Instruction::Xor:
1448	case Instruction::Add:
1449	case Instruction::Sub:
1450	case Instruction::Mul:
1451	// These operators can all arbitrarily be extended if their inputs can.
1452	return canEvaluateSExtd(V: I->getOperand(i: `0`), Ty) &&
1453	canEvaluateSExtd(V: I->getOperand(i: `1`), Ty);
1454
1455	//case Instruction::Shl: TODO
1456	//case Instruction::LShr: TODO
1457
1458	case Instruction::Select:
1459	return canEvaluateSExtd(V: I->getOperand(i: `1`), Ty) &&
1460	canEvaluateSExtd(V: I->getOperand(i: `2`), Ty);
1461
1462	case Instruction::PHI: {
1463	// We can change a phi if we can change all operands. Note that we never
1464	// get into trouble with cyclic PHIs here because we only consider
1465	// instructions with a single use.
1466	PHINode *PN = cast<PHINode>(Val: I);
1467	for (Value *IncValue : PN->incoming_values())
1468	if (!canEvaluateSExtd(V: IncValue, Ty)) return false;
1469	return true;
1470	}
1471	default:
1472	// TODO: Can handle more cases here.
1473	break;
1474	}
1475
1476	return false;
1477	}
1478
1479	Instruction *InstCombinerImpl::visitSExt(SExtInst &Sext) {
1480	// If this sign extend is only used by a truncate, let the truncate be
1481	// eliminated before we try to optimize this sext.
1482	if (Sext.hasOneUse() && isa<TruncInst>(Val: Sext.user_back()))
1483	return nullptr;
1484
1485	if (Instruction *I = commonCastTransforms(CI&: Sext))
1486	return I;
1487
1488	Value *Src = Sext.getOperand(i_nocapture: `0`);
1489	Type SrcTy = Src->getType(), DestTy = Sext.getType();
1490	unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
1491	unsigned DestBitSize = DestTy->getScalarSizeInBits();
1492
1493	// If the value being extended is zero or positive, use a zext instead.
1494	if (isKnownNonNegative(V: Src, SQ: SQ.getWithInstruction(I: &Sext))) {
1495	auto CI = CastInst::Create(Instruction::ZExt, S: Src, Ty: DestTy);
1496	CI->setNonNeg(true);
1497	return CI;
1498	}
1499
1500	// Try to extend the entire expression tree to the wide destination type.
1501	if (shouldChangeType(From: SrcTy, To: DestTy) && canEvaluateSExtd(V: Src, Ty: DestTy)) {
1502	// Okay, we can transform this! Insert the new expression now.
1503	LLVM_DEBUG(
1504	dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1505	" to avoid sign extend: "
1506	<< Sext << `'\n'`);
1507	Value Res = EvaluateInDifferentType(V: Src, Ty: DestTy, isSigned: true*);
1508	assert(Res->getType() == DestTy);
1509
1510	// If the high bits are already filled with sign bit, just replace this
1511	// cast with the result.
1512	if (ComputeNumSignBits(Op: Res, CxtI: &Sext) > DestBitSize - SrcBitSize)
1513	return replaceInstUsesWith(I&: Sext, V: Res);
1514
1515	// We need to emit a shl + ashr to do the sign extend.
1516	Value *ShAmt = ConstantInt::get(Ty: DestTy, V: DestBitSize-SrcBitSize);
1517	return BinaryOperator::CreateAShr(V1: Builder.CreateShl(LHS: Res, RHS: ShAmt, Name: "sext"),
1518	V2: ShAmt);
1519	}
1520
1521	Value *X;
1522	if (match(V: Src, P: m_Trunc(Op: m_Value(V&: X)))) {
1523	// If the input has more sign bits than bits truncated, then convert
1524	// directly to final type.
1525	unsigned XBitSize = X->getType()->getScalarSizeInBits();
1526	if (ComputeNumSignBits(Op: X, CxtI: &Sext) > XBitSize - SrcBitSize)
1527	return CastInst::CreateIntegerCast(S: X, Ty: DestTy, / isSigned / true);
1528
1529	// If input is a trunc from the destination type, then convert into shifts.
1530	if (Src->hasOneUse() && X->getType() == DestTy) {
1531	// sext (trunc X) --> ashr (shl X, C), C
1532	Constant *ShAmt = ConstantInt::get(Ty: DestTy, V: DestBitSize - SrcBitSize);
1533	return BinaryOperator::CreateAShr(V1: Builder.CreateShl(LHS: X, RHS: ShAmt), V2: ShAmt);
1534	}
1535
1536	// If we are replacing shifted-in high zero bits with sign bits, convert
1537	// the logic shift to arithmetic shift and eliminate the cast to
1538	// intermediate type:
1539	// sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)
1540	Value *Y;
1541	if (Src->hasOneUse() &&
1542	match(V: X, P: m_LShr(L: m_Value(V&: Y),
1543	R: m_SpecificIntAllowPoison(V: XBitSize - SrcBitSize)))) {
1544	Value *Ashr = Builder.CreateAShr(LHS: Y, RHS: XBitSize - SrcBitSize);
1545	return CastInst::CreateIntegerCast(S: Ashr, Ty: DestTy, / isSigned / true);
1546	}
1547	}
1548
1549	if (auto *Cmp = dyn_cast<ICmpInst>(Val: Src))
1550	return transformSExtICmp(Cmp, Sext);
1551
1552	// If the input is a shl/ashr pair of a same constant, then this is a sign
1553	// extension from a smaller value. If we could trust arbitrary bitwidth
1554	// integers, we could turn this into a truncate to the smaller bit and then
1555	// use a sext for the whole extension. Since we don't, look deeper and check
1556	// for a truncate. If the source and dest are the same type, eliminate the
1557	// trunc and extend and just do shifts. For example, turn:
1558	// %a = trunc i32 %i to i8
1559	// %b = shl i8 %a, C
1560	// %c = ashr i8 %b, C
1561	// %d = sext i8 %c to i32
1562	// into:
1563	// %a = shl i32 %i, 32-(8-C)
1564	// %d = ashr i32 %a, 32-(8-C)
1565	Value A = nullptr*;
1566	// TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1567	Constant BA = nullptr, CA = nullptr;
1568	if (match(V: Src, P: m_AShr(L: m_Shl(L: m_Trunc(Op: m_Value(V&: A)), R: m_Constant(C&: BA)),
1569	R: m_ImmConstant(C&: CA))) &&
1570	BA->isElementWiseEqual(Y: CA) && A->getType() == DestTy) {
1571	Constant *WideCurrShAmt =
1572	ConstantFoldCastOperand(Opcode: Instruction::SExt, C: CA, DestTy, DL);
1573	assert(WideCurrShAmt && "Constant folding of ImmConstant cannot fail");
1574	Constant *NumLowbitsLeft = ConstantExpr::getSub(
1575	C1: ConstantInt::get(Ty: DestTy, V: SrcTy->getScalarSizeInBits()), C2: WideCurrShAmt);
1576	Constant *NewShAmt = ConstantExpr::getSub(
1577	C1: ConstantInt::get(Ty: DestTy, V: DestTy->getScalarSizeInBits()),
1578	C2: NumLowbitsLeft);
1579	NewShAmt =
1580	Constant::mergeUndefsWith(C: Constant::mergeUndefsWith(C: NewShAmt, Other: BA), Other: CA);
1581	A = Builder.CreateShl(LHS: A, RHS: NewShAmt, Name: Sext.getName());
1582	return BinaryOperator::CreateAShr(V1: A, V2: NewShAmt);
1583	}
1584
1585	// Splatting a bit of constant-index across a value:
1586	// sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
1587	// If the dest type is different, use a cast (adjust use check).
1588	if (match(V: Src, P: m_OneUse(SubPattern: m_AShr(L: m_Trunc(Op: m_Value(V&: X)),
1589	R: m_SpecificInt(V: SrcBitSize - `1`))))) {
1590	Type *XTy = X->getType();
1591	unsigned XBitSize = XTy->getScalarSizeInBits();
1592	Constant *ShlAmtC = ConstantInt::get(Ty: XTy, V: XBitSize - SrcBitSize);
1593	Constant *AshrAmtC = ConstantInt::get(Ty: XTy, V: XBitSize - `1`);
1594	if (XTy == DestTy)
1595	return BinaryOperator::CreateAShr(V1: Builder.CreateShl(LHS: X, RHS: ShlAmtC),
1596	V2: AshrAmtC);
1597	if (cast<BinaryOperator>(Val: Src)->getOperand(i_nocapture: `0`)->hasOneUse()) {
1598	Value *Ashr = Builder.CreateAShr(LHS: Builder.CreateShl(LHS: X, RHS: ShlAmtC), RHS: AshrAmtC);
1599	return CastInst::CreateIntegerCast(S: Ashr, Ty: DestTy, / isSigned / true);
1600	}
1601	}
1602
1603	if (match(V: Src, P: m_VScale())) {
1604	if (Sext.getFunction() &&
1605	Sext.getFunction()->hasFnAttribute(Kind: Attribute::VScaleRange)) {
1606	Attribute Attr =
1607	Sext.getFunction()->getFnAttribute(Kind: Attribute::VScaleRange);
1608	if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax())
1609	if (Log2_32(Value: *MaxVScale) < (SrcBitSize - `1`))
1610	return replaceInstUsesWith(I&: Sext, V: Builder.CreateVScale(Ty: DestTy));
1611	}
1612	}
1613
1614	return nullptr;
1615	}
1616
1617	/// Return a Constant for the specified floating-point constant if it fits*
1618	/// in the specified FP type without changing its value.
1619	static bool fitsInFPType(ConstantFP CFP, const* fltSemantics &Sem) {
1620	bool losesInfo;
1621	APFloat F = CFP->getValueAPF();
1622	(void)F.convert(ToSemantics: Sem, RM: APFloat::rmNearestTiesToEven, losesInfo: &losesInfo);
1623	return !losesInfo;
1624	}
1625
1626	static Type shrinkFPConstant(ConstantFP CFP, bool PreferBFloat) {
1627	if (CFP->getType() == Type::getPPC_FP128Ty(C&: CFP->getContext()))
1628	return nullptr; // No constant folding of this.
1629	// See if the value can be truncated to bfloat and then reextended.
1630	if (PreferBFloat && fitsInFPType(CFP, Sem: APFloat::BFloat()))
1631	return Type::getBFloatTy(C&: CFP->getContext());
1632	// See if the value can be truncated to half and then reextended.
1633	if (!PreferBFloat && fitsInFPType(CFP, Sem: APFloat::IEEEhalf()))
1634	return Type::getHalfTy(C&: CFP->getContext());
1635	// See if the value can be truncated to float and then reextended.
1636	if (fitsInFPType(CFP, Sem: APFloat::IEEEsingle()))
1637	return Type::getFloatTy(C&: CFP->getContext());
1638	if (CFP->getType()->isDoubleTy())
1639	return nullptr; // Won't shrink.
1640	if (fitsInFPType(CFP, Sem: APFloat::IEEEdouble()))
1641	return Type::getDoubleTy(C&: CFP->getContext());
1642	// Don't try to shrink to various long double types.
1643	return nullptr;
1644	}
1645
1646	// Determine if this is a vector of ConstantFPs and if so, return the minimal
1647	// type we can safely truncate all elements to.
1648	static Type shrinkFPConstantVector(Value V, bool PreferBFloat) {
1649	auto *CV = dyn_cast<Constant>(Val: V);
1650	auto *CVVTy = dyn_cast<FixedVectorType>(Val: V->getType());
1651	if (!CV \|\| !CVVTy)
1652	return nullptr;
1653
1654	Type MinType = nullptr*;
1655
1656	unsigned NumElts = CVVTy->getNumElements();
1657
1658	// For fixed-width vectors we find the minimal type by looking
1659	// through the constant values of the vector.
1660	for (unsigned i = `0`; i != NumElts; ++i) {
1661	if (isa<UndefValue>(Val: CV->getAggregateElement(Elt: i)))
1662	continue;
1663
1664	auto *CFP = dyn_cast_or_null<ConstantFP>(Val: CV->getAggregateElement(Elt: i));
1665	if (!CFP)
1666	return nullptr;
1667
1668	Type *T = shrinkFPConstant(CFP, PreferBFloat);
1669	if (!T)
1670	return nullptr;
1671
1672	// If we haven't found a type yet or this type has a larger mantissa than
1673	// our previous type, this is our new minimal type.
1674	if (!MinType \|\| T->getFPMantissaWidth() > MinType->getFPMantissaWidth())
1675	MinType = T;
1676	}
1677
1678	// Make a vector type from the minimal type.
1679	return MinType ? FixedVectorType::get(ElementType: MinType, NumElts) : nullptr;
1680	}
1681
1682	/// Find the minimum FP type we can safely truncate to.
1683	static Type getMinimumFPType(Value V, bool PreferBFloat) {
1684	if (auto *FPExt = dyn_cast<FPExtInst>(Val: V))
1685	return FPExt->getOperand(i_nocapture: `0`)->getType();
1686
1687	// If this value is a constant, return the constant in the smallest FP type
1688	// that can accurately represent it. This allows us to turn
1689	// (float)((double)X+2.0) into x+2.0f.
1690	if (auto *CFP = dyn_cast<ConstantFP>(Val: V))
1691	if (Type *T = shrinkFPConstant(CFP, PreferBFloat))
1692	return T;
1693
1694	// Try to shrink scalable and fixed splat vectors.
1695	if (auto *FPC = dyn_cast<Constant>(Val: V))
1696	if (isa<VectorType>(Val: V->getType()))
1697	if (auto *Splat = dyn_cast_or_null<ConstantFP>(Val: FPC->getSplatValue()))
1698	if (Type *T = shrinkFPConstant(CFP: Splat, PreferBFloat))
1699	return T;
1700
1701	// Try to shrink a vector of FP constants. This returns nullptr on scalable
1702	// vectors
1703	if (Type *T = shrinkFPConstantVector(V, PreferBFloat))
1704	return T;
1705
1706	return V->getType();
1707	}
1708
1709	/// Return true if the cast from integer to FP can be proven to be exact for all
1710	/// possible inputs (the conversion does not lose any precision).
1711	static bool isKnownExactCastIntToFP(CastInst &I, InstCombinerImpl &IC) {
1712	CastInst::CastOps Opcode = I.getOpcode();
1713	assert((Opcode == CastInst::SIToFP \|\| Opcode == CastInst::UIToFP) &&
1714	"Unexpected cast");
1715	Value *Src = I.getOperand(i_nocapture: `0`);
1716	Type *SrcTy = Src->getType();
1717	Type *FPTy = I.getType();
1718	bool IsSigned = Opcode == Instruction::SIToFP;
1719	int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
1720
1721	// Easy case - if the source integer type has less bits than the FP mantissa,
1722	// then the cast must be exact.
1723	int DestNumSigBits = FPTy->getFPMantissaWidth();
1724	if (SrcSize <= DestNumSigBits)
1725	return true;
1726
1727	// Cast from FP to integer and back to FP is independent of the intermediate
1728	// integer width because of poison on overflow.
1729	Value *F;
1730	if (match(V: Src, P: m_FPToSI(Op: m_Value(V&: F))) \|\| match(V: Src, P: m_FPToUI(Op: m_Value(V&: F)))) {
1731	// If this is uitofp (fptosi F), the source needs an extra bit to avoid
1732	// potential rounding of negative FP input values.
1733	int SrcNumSigBits = F->getType()->getFPMantissaWidth();
1734	if (!IsSigned && match(V: Src, P: m_FPToSI(Op: m_Value())))
1735	SrcNumSigBits++;
1736
1737	// [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
1738	// significant bits than the destination (and make sure neither type is
1739	// weird -- ppc_fp128).
1740	if (SrcNumSigBits > `0` && DestNumSigBits > `0` &&
1741	SrcNumSigBits <= DestNumSigBits)
1742	return true;
1743	}
1744
1745	// TODO:
1746	// Try harder to find if the source integer type has less significant bits.
1747	// For example, compute number of sign bits.
1748	KnownBits SrcKnown = IC.computeKnownBits(V: Src, CxtI: &I);
1749	int SigBits = (int)SrcTy->getScalarSizeInBits() -
1750	SrcKnown.countMinLeadingZeros() -
1751	SrcKnown.countMinTrailingZeros();
1752	if (SigBits <= DestNumSigBits)
1753	return true;
1754
1755	return false;
1756	}
1757
1758	Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) {
1759	if (Instruction *I = commonCastTransforms(CI&: FPT))
1760	return I;
1761
1762	// If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
1763	// simplify this expression to avoid one or more of the trunc/extend
1764	// operations if we can do so without changing the numerical results.
1765	//
1766	// The exact manner in which the widths of the operands interact to limit
1767	// what we can and cannot do safely varies from operation to operation, and
1768	// is explained below in the various case statements.
1769	Type *Ty = FPT.getType();
1770	auto *BO = dyn_cast<BinaryOperator>(Val: FPT.getOperand(i_nocapture: `0`));
1771	if (BO && BO->hasOneUse()) {
1772	Type *LHSMinType =
1773	getMinimumFPType(V: BO->getOperand(i_nocapture: `0`), /PreferBFloat=/Ty->isBFloatTy());
1774	Type *RHSMinType =
1775	getMinimumFPType(V: BO->getOperand(i_nocapture: `1`), /PreferBFloat=/Ty->isBFloatTy());
1776	unsigned OpWidth = BO->getType()->getFPMantissaWidth();
1777	unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
1778	unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
1779	unsigned SrcWidth = std::max(a: LHSWidth, b: RHSWidth);
1780	unsigned DstWidth = Ty->getFPMantissaWidth();
1781	switch (BO->getOpcode()) {
1782	default: break;
1783	case Instruction::FAdd:
1784	case Instruction::FSub:
1785	// For addition and subtraction, the infinitely precise result can
1786	// essentially be arbitrarily wide; proving that double rounding
1787	// will not occur because the result of OpI is exact (as we will for
1788	// FMul, for example) is hopeless. However, we can* nonetheless*
1789	// frequently know that double rounding cannot occur (or that it is
1790	// innocuous) by taking advantage of the specific structure of
1791	// infinitely-precise results that admit double rounding.
1792	//
1793	// Specifically, if OpWidth >= 2DstWdith+1 and DstWidth is sufficient*
1794	// to represent both sources, we can guarantee that the double
1795	// rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
1796	// "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
1797	// for proof of this fact).
1798	//
1799	// Note: Figueroa does not consider the case where DstFormat !=
1800	// SrcFormat. It's possible (likely even!) that this analysis
1801	// could be tightened for those cases, but they are rare (the main
1802	// case of interest here is (float)((double)float + float)).
1803	if (OpWidth >= `2`*DstWidth+`1` && DstWidth >= SrcWidth) {
1804	Value *LHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: `0`), DestTy: Ty);
1805	Value *RHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: `1`), DestTy: Ty);
1806	Instruction *RI = BinaryOperator::Create(Op: BO->getOpcode(), S1: LHS, S2: RHS);
1807	RI->copyFastMathFlags(I: BO);
1808	return RI;
1809	}
1810	break;
1811	case Instruction::FMul:
1812	// For multiplication, the infinitely precise result has at most
1813	// LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
1814	// that such a value can be exactly represented, then no double
1815	// rounding can possibly occur; we can safely perform the operation
1816	// in the destination format if it can represent both sources.
1817	if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
1818	Value *LHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: `0`), DestTy: Ty);
1819	Value *RHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: `1`), DestTy: Ty);
1820	return BinaryOperator::CreateFMulFMF(V1: LHS, V2: RHS, FMFSource: BO);
1821	}
1822	break;
1823	case Instruction::FDiv:
1824	// For division, we use again use the bound from Figueroa's
1825	// dissertation. I am entirely certain that this bound can be
1826	// tightened in the unbalanced operand case by an analysis based on
1827	// the diophantine rational approximation bound, but the well-known
1828	// condition used here is a good conservative first pass.
1829	// TODO: Tighten bound via rigorous analysis of the unbalanced case.
1830	if (OpWidth >= `2`*DstWidth && DstWidth >= SrcWidth) {
1831	Value *LHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: `0`), DestTy: Ty);
1832	Value *RHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: `1`), DestTy: Ty);
1833	return BinaryOperator::CreateFDivFMF(V1: LHS, V2: RHS, FMFSource: BO);
1834	}
1835	break;
1836	case Instruction::FRem: {
1837	// Remainder is straightforward. Remainder is always exact, so the
1838	// type of OpI doesn't enter into things at all. We simply evaluate
1839	// in whichever source type is larger, then convert to the
1840	// destination type.
1841	if (SrcWidth == OpWidth)
1842	break;
1843	Value LHS, RHS;
1844	if (LHSWidth == SrcWidth) {
1845	LHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: `0`), DestTy: LHSMinType);
1846	RHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: `1`), DestTy: LHSMinType);
1847	} else {
1848	LHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: `0`), DestTy: RHSMinType);
1849	RHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: `1`), DestTy: RHSMinType);
1850	}
1851
1852	Value *ExactResult = Builder.CreateFRemFMF(L: LHS, R: RHS, FMFSource: BO);
1853	return CastInst::CreateFPCast(S: ExactResult, Ty);
1854	}
1855	}
1856	}
1857
1858	// (fptrunc (fneg x)) -> (fneg (fptrunc x))
1859	Value *X;
1860	Instruction *Op = dyn_cast<Instruction>(Val: FPT.getOperand(i_nocapture: `0`));
1861	if (Op && Op->hasOneUse()) {
1862	FastMathFlags FMF = FPT.getFastMathFlags();
1863	if (auto *FPMO = dyn_cast<FPMathOperator>(Val: Op))
1864	FMF &= FPMO->getFastMathFlags();
1865
1866	if (match(V: Op, P: m_FNeg(X: m_Value(V&: X)))) {
1867	Value *InnerTrunc = Builder.CreateFPTruncFMF(V: X, DestTy: Ty, FMFSource: FMF);
1868	Value *Neg = Builder.CreateFNegFMF(V: InnerTrunc, FMFSource: FMF);
1869	return replaceInstUsesWith(I&: FPT, V: Neg);
1870	}
1871
1872	// If we are truncating a select that has an extended operand, we can
1873	// narrow the other operand and do the select as a narrow op.
1874	Value Cond, X, *Y;
1875	if (match(V: Op, P: m_Select(C: m_Value(V&: Cond), L: m_FPExt(Op: m_Value(V&: X)), R: m_Value(V&: Y))) &&
1876	X->getType() == Ty) {
1877	// fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
1878	Value *NarrowY = Builder.CreateFPTruncFMF(V: Y, DestTy: Ty, FMFSource: FMF);
1879	Value *Sel =
1880	Builder.CreateSelectFMF(C: Cond, True: X, False: NarrowY, FMFSource: FMF, Name: "narrow.sel", MDFrom: Op);
1881	return replaceInstUsesWith(I&: FPT, V: Sel);
1882	}
1883	if (match(V: Op, P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: Y), R: m_FPExt(Op: m_Value(V&: X)))) &&
1884	X->getType() == Ty) {
1885	// fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
1886	Value *NarrowY = Builder.CreateFPTruncFMF(V: Y, DestTy: Ty, FMFSource: FMF);
1887	Value *Sel =
1888	Builder.CreateSelectFMF(C: Cond, True: NarrowY, False: X, FMFSource: FMF, Name: "narrow.sel", MDFrom: Op);
1889	return replaceInstUsesWith(I&: FPT, V: Sel);
1890	}
1891	}
1892
1893	if (auto *II = dyn_cast<IntrinsicInst>(Val: FPT.getOperand(i_nocapture: `0`))) {
1894	switch (II->getIntrinsicID()) {
1895	default: break;
1896	case Intrinsic::ceil:
1897	case Intrinsic::fabs:
1898	case Intrinsic::floor:
1899	case Intrinsic::nearbyint:
1900	case Intrinsic::rint:
1901	case Intrinsic::round:
1902	case Intrinsic::roundeven:
1903	case Intrinsic::trunc: {
1904	Value *Src = II->getArgOperand(i: `0`);
1905	if (!Src->hasOneUse())
1906	break;
1907
1908	// Except for fabs, this transformation requires the input of the unary FP
1909	// operation to be itself an fpext from the type to which we're
1910	// truncating.
1911	if (II->getIntrinsicID() != Intrinsic::fabs) {
1912	FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Val: Src);
1913	if (!FPExtSrc \|\| FPExtSrc->getSrcTy() != Ty)
1914	break;
1915	}
1916
1917	// Do unary FP operation on smaller type.
1918	// (fptrunc (fabs x)) -> (fabs (fptrunc x))
1919	Value *InnerTrunc = Builder.CreateFPTrunc(V: Src, DestTy: Ty);
1920	Function *Overload = Intrinsic::getOrInsertDeclaration(
1921	M: FPT.getModule(), id: II->getIntrinsicID(), Tys: Ty);
1922	SmallVector<OperandBundleDef, `1`> OpBundles;
1923	II->getOperandBundlesAsDefs(Defs&: OpBundles);
1924	CallInst *NewCI =
1925	CallInst::Create(Func: Overload, Args: {InnerTrunc}, Bundles: OpBundles, NameStr: II->getName());
1926	// A normal value may be converted to an infinity. It means that we cannot
1927	// propagate ninf from the intrinsic. So we propagate FMF from fptrunc.
1928	NewCI->copyFastMathFlags(I: &FPT);
1929	return NewCI;
1930	}
1931	}
1932	}
1933
1934	if (Instruction *I = shrinkInsertElt(Trunc&: FPT, Builder))
1935	return I;
1936
1937	Value *Src = FPT.getOperand(i_nocapture: `0`);
1938	if (isa<SIToFPInst>(Val: Src) \|\| isa<UIToFPInst>(Val: Src)) {
1939	auto *FPCast = cast<CastInst>(Val: Src);
1940	if (isKnownExactCastIntToFP(I&: FPCast, IC&: this))
1941	return CastInst::Create(FPCast->getOpcode(), S: FPCast->getOperand(i_nocapture: `0`), Ty);
1942	}
1943
1944	return nullptr;
1945	}
1946
1947	Instruction *InstCombinerImpl::visitFPExt(CastInst &FPExt) {
1948	// If the source operand is a cast from integer to FP and known exact, then
1949	// cast the integer operand directly to the destination type.
1950	Type *Ty = FPExt.getType();
1951	Value *Src = FPExt.getOperand(i_nocapture: `0`);
1952	if (isa<SIToFPInst>(Val: Src) \|\| isa<UIToFPInst>(Val: Src)) {
1953	auto *FPCast = cast<CastInst>(Val: Src);
1954	if (isKnownExactCastIntToFP(I&: FPCast, IC&: this))
1955	return CastInst::Create(FPCast->getOpcode(), S: FPCast->getOperand(i_nocapture: `0`), Ty);
1956	}
1957
1958	return commonCastTransforms(CI&: FPExt);
1959	}
1960
1961	/// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
1962	/// This is safe if the intermediate type has enough bits in its mantissa to
1963	/// accurately represent all values of X. For example, this won't work with
1964	/// i64 -> float -> i64.
1965	Instruction *InstCombinerImpl::foldItoFPtoI(CastInst &FI) {
1966	if (!isa<UIToFPInst>(Val: FI.getOperand(i_nocapture: `0`)) && !isa<SIToFPInst>(Val: FI.getOperand(i_nocapture: `0`)))
1967	return nullptr;
1968
1969	auto *OpI = cast<CastInst>(Val: FI.getOperand(i_nocapture: `0`));
1970	Value *X = OpI->getOperand(i_nocapture: `0`);
1971	Type *XType = X->getType();
1972	Type *DestType = FI.getType();
1973	bool IsOutputSigned = isa<FPToSIInst>(Val: FI);
1974
1975	// Since we can assume the conversion won't overflow, our decision as to
1976	// whether the input will fit in the float should depend on the minimum
1977	// of the input range and output range.
1978
1979	// This means this is also safe for a signed input and unsigned output, since
1980	// a negative input would lead to undefined behavior.
1981	if (!isKnownExactCastIntToFP(I&: OpI, IC&: this)) {
1982	// The first cast may not round exactly based on the source integer width
1983	// and FP width, but the overflow UB rules can still allow this to fold.
1984	// If the destination type is narrow, that means the intermediate FP value
1985	// must be large enough to hold the source value exactly.
1986	// For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior.
1987	int OutputSize = (int)DestType->getScalarSizeInBits();
1988	if (OutputSize > OpI->getType()->getFPMantissaWidth())
1989	return nullptr;
1990	}
1991
1992	if (DestType->getScalarSizeInBits() > XType->getScalarSizeInBits()) {
1993	bool IsInputSigned = isa<SIToFPInst>(Val: OpI);
1994	if (IsInputSigned && IsOutputSigned)
1995	return new SExtInst (X, DestType);
1996	return new ZExtInst (X, DestType);
1997	}
1998	if (DestType->getScalarSizeInBits() < XType->getScalarSizeInBits())
1999	return new TruncInst (X, DestType);
2000
2001	assert(XType == DestType && "Unexpected types for int to FP to int casts");
2002	return replaceInstUsesWith(I&: FI, V: X);
2003	}
2004
2005	static Instruction *foldFPtoI(Instruction &FI, InstCombiner &IC) {
2006	// fpto{u/s}i non-norm --> 0
2007	FPClassTest Mask =
2008	FI.getOpcode() == Instruction::FPToUI ? fcPosNormal : fcNormal;
2009	KnownFPClass FPClass = computeKnownFPClass(
2010	V: FI.getOperand(i: `0`), InterestedClasses: Mask, SQ: IC.getSimplifyQuery().getWithInstruction(I: &FI));
2011	if (FPClass.isKnownNever(Mask))
2012	return IC.replaceInstUsesWith(I&: FI, V: ConstantInt::getNullValue(Ty: FI.getType()));
2013
2014	return nullptr;
2015	}
2016
2017	Instruction *InstCombinerImpl::visitFPToUI(FPToUIInst &FI) {
2018	if (Instruction *I = foldItoFPtoI(FI))
2019	return I;
2020
2021	if (Instruction I = foldFPtoI(FI, IC&: this))
2022	return I;
2023
2024	return commonCastTransforms(CI&: FI);
2025	}
2026
2027	Instruction *InstCombinerImpl::visitFPToSI(FPToSIInst &FI) {
2028	if (Instruction *I = foldItoFPtoI(FI))
2029	return I;
2030
2031	if (Instruction I = foldFPtoI(FI, IC&: this))
2032	return I;
2033
2034	return commonCastTransforms(CI&: FI);
2035	}
2036
2037	Instruction *InstCombinerImpl::visitUIToFP(CastInst &CI) {
2038	if (Instruction *R = commonCastTransforms(CI))
2039	return R;
2040	if (!CI.hasNonNeg() && isKnownNonNegative(V: CI.getOperand(i_nocapture: `0`), SQ)) {
2041	CI.setNonNeg();
2042	return &CI;
2043	}
2044	return nullptr;
2045	}
2046
2047	Instruction *InstCombinerImpl::visitSIToFP(CastInst &CI) {
2048	if (Instruction *R = commonCastTransforms(CI))
2049	return R;
2050	if (isKnownNonNegative(V: CI.getOperand(i_nocapture: `0`), SQ)) {
2051	auto *UI =
2052	CastInst::Create(Instruction::UIToFP, S: CI.getOperand(i_nocapture: `0`), Ty: CI.getType());
2053	UI->setNonNeg(true);
2054	return UI;
2055	}
2056	return nullptr;
2057	}
2058
2059	Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) {
2060	// If the source integer type is not the intptr_t type for this target, do a
2061	// trunc or zext to the intptr_t type, then inttoptr of it. This allows the
2062	// cast to be exposed to other transforms.
2063	unsigned AS = CI.getAddressSpace();
2064	if (CI.getOperand(i_nocapture: `0`)->getType()->getScalarSizeInBits() !=
2065	DL.getPointerSizeInBits(AS)) {
2066	Type *Ty = CI.getOperand(i_nocapture: `0`)->getType()->getWithNewType(
2067	EltTy: DL.getIntPtrType(C&: CI.getContext(), AddressSpace: AS));
2068	Value *P = Builder.CreateZExtOrTrunc(V: CI.getOperand(i_nocapture: `0`), DestTy: Ty);
2069	return new IntToPtrInst (P, CI.getType());
2070	}
2071
2072	if (Instruction *I = commonCastTransforms(CI))
2073	return I;
2074
2075	return nullptr;
2076	}
2077
2078	Value InstCombinerImpl::foldPtrToIntOfGEP(Type IntTy, Value *Ptr) {
2079	// Look through chain of one-use GEPs.
2080	Type *PtrTy = Ptr->getType();
2081	SmallVector<GEPOperator *> GEPs;
2082	while (true) {
2083	auto *GEP = dyn_cast<GEPOperator>(Val: Ptr);
2084	if (!GEP \|\| !GEP->hasOneUse())
2085	break;
2086	GEPs.push_back(Elt: GEP);
2087	Ptr = GEP->getPointerOperand();
2088	}
2089
2090	// Don't handle case where GEP converts from pointer to vector.
2091	if (GEPs.empty() \|\| PtrTy != Ptr->getType())
2092	return nullptr;
2093
2094	// Check whether we know the integer value of the base pointer.
2095	Value *Res;
2096	Type *IdxTy = DL.getIndexType(PtrTy);
2097	if (match(V: Ptr, P: m_OneUse(SubPattern: m_IntToPtr(Op: m_Value(V&: Res)))) &&
2098	Res->getType() == IntTy && IntTy == IdxTy) {
2099	// pass
2100	} else if (isa<ConstantPointerNull>(Val: Ptr)) {
2101	Res = Constant::getNullValue(Ty: IdxTy);
2102	} else {
2103	return nullptr;
2104	}
2105
2106	// Perform the entire operation on integers instead.
2107	for (GEPOperator *GEP : reverse(C&: GEPs)) {
2108	Value *Offset = EmitGEPOffset(GEP);
2109	Res = Builder.CreateAdd(LHS: Res, RHS: Offset, Name: "", HasNUW: GEP->hasNoUnsignedWrap());
2110	}
2111	return Builder.CreateZExtOrTrunc(V: Res, DestTy: IntTy);
2112	}
2113
2114	Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
2115	// If the destination integer type is not the intptr_t type for this target,
2116	// do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
2117	// to be exposed to other transforms.
2118	Value *SrcOp = CI.getPointerOperand();
2119	Type *SrcTy = SrcOp->getType();
2120	Type *Ty = CI.getType();
2121	unsigned AS = CI.getPointerAddressSpace();
2122	unsigned TySize = Ty->getScalarSizeInBits();
2123	unsigned PtrSize = DL.getPointerSizeInBits(AS);
2124	if (TySize != PtrSize) {
2125	Type *IntPtrTy =
2126	SrcTy->getWithNewType(EltTy: DL.getIntPtrType(C&: CI.getContext(), AddressSpace: AS));
2127	Value *P = Builder.CreatePtrToInt(V: SrcOp, DestTy: IntPtrTy);
2128	return CastInst::CreateIntegerCast(S: P, Ty, /isSigned=/false);
2129	}
2130
2131	// (ptrtoint (ptrmask P, M))
2132	// -> (and (ptrtoint P), M)
2133	// This is generally beneficial as `and` is better supported than `ptrmask`.
2134	Value Ptr, Mask;
2135	if (match(V: SrcOp, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ptrmask>(Op0: m_Value(V&: Ptr),
2136	Op1: m_Value(V&: Mask)))) &&
2137	Mask->getType() == Ty)
2138	return BinaryOperator::CreateAnd(V1: Builder.CreatePtrToInt(V: Ptr, DestTy: Ty), V2: Mask);
2139
2140	if (Value *V = foldPtrToIntOfGEP(IntTy: Ty, Ptr: SrcOp))
2141	return replaceInstUsesWith(I&: CI, V);
2142
2143	Value Vec, Scalar, *Index;
2144	if (match(V: SrcOp, P: m_OneUse(SubPattern: m_InsertElt(Val: m_IntToPtr(Op: m_Value(V&: Vec)),
2145	Elt: m_Value(V&: Scalar), Idx: m_Value(V&: Index)))) &&
2146	Vec->getType() == Ty) {
2147	assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
2148	// Convert the scalar to int followed by insert to eliminate one cast:
2149	// p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
2150	Value *NewCast = Builder.CreatePtrToInt(V: Scalar, DestTy: Ty->getScalarType());
2151	return InsertElementInst::Create(Vec, NewElt: NewCast, Idx: Index);
2152	}
2153
2154	return commonCastTransforms(CI);
2155	}
2156
2157	/// This input value (which is known to have vector type) is being zero extended
2158	/// or truncated to the specified vector type. Since the zext/trunc is done
2159	/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
2160	/// endianness will impact which end of the vector that is extended or
2161	/// truncated.
2162	///
2163	/// A vector is always stored with index 0 at the lowest address, which
2164	/// corresponds to the most significant bits for a big endian stored integer and
2165	/// the least significant bits for little endian. A trunc/zext of an integer
2166	/// impacts the big end of the integer. Thus, we need to add/remove elements at
2167	/// the front of the vector for big endian targets, and the back of the vector
2168	/// for little endian targets.
2169	///
2170	/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
2171	///
2172	/// The source and destination vector types may have different element types.
2173	static Instruction *
2174	optimizeVectorResizeWithIntegerBitCasts(Value InVal, VectorType DestTy,
2175	InstCombinerImpl &IC) {
2176	// We can only do this optimization if the output is a multiple of the input
2177	// element size, or the input is a multiple of the output element size.
2178	// Convert the input type to have the same element type as the output.
2179	VectorType *SrcTy = cast<VectorType>(Val: InVal->getType());
2180
2181	if (SrcTy->getElementType() != DestTy->getElementType()) {
2182	// The input types don't need to be identical, but for now they must be the
2183	// same size. There is no specific reason we couldn't handle things like
2184	// <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
2185	// there yet.
2186	if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
2187	DestTy->getElementType()->getPrimitiveSizeInBits())
2188	return nullptr;
2189
2190	SrcTy =
2191	FixedVectorType::get(ElementType: DestTy->getElementType(),
2192	NumElts: cast<FixedVectorType>(Val: SrcTy)->getNumElements());
2193	InVal = IC.Builder.CreateBitCast(V: InVal, DestTy: SrcTy);
2194	}
2195
2196	bool IsBigEndian = IC.getDataLayout().isBigEndian();
2197	unsigned SrcElts = cast<FixedVectorType>(Val: SrcTy)->getNumElements();
2198	unsigned DestElts = cast<FixedVectorType>(Val: DestTy)->getNumElements();
2199
2200	assert(SrcElts != DestElts && "Element counts should be different.");
2201
2202	// Now that the element types match, get the shuffle mask and RHS of the
2203	// shuffle to use, which depends on whether we're increasing or decreasing the
2204	// size of the input.
2205	auto ShuffleMaskStorage = llvm::to_vector<`16`>(Range: llvm::seq<int>(Begin: `0`, End: SrcElts));
2206	ArrayRef<int> ShuffleMask;
2207	Value *V2;
2208
2209	if (SrcElts > DestElts) {
2210	// If we're shrinking the number of elements (rewriting an integer
2211	// truncate), just shuffle in the elements corresponding to the least
2212	// significant bits from the input and use poison as the second shuffle
2213	// input.
2214	V2 = PoisonValue::get(T: SrcTy);
2215	// Make sure the shuffle mask selects the "least significant bits" by
2216	// keeping elements from back of the src vector for big endian, and from the
2217	// front for little endian.
2218	ShuffleMask = ShuffleMaskStorage;
2219	if (IsBigEndian)
2220	ShuffleMask = ShuffleMask.take_back(N: DestElts);
2221	else
2222	ShuffleMask = ShuffleMask.take_front(N: DestElts);
2223	} else {
2224	// If we're increasing the number of elements (rewriting an integer zext),
2225	// shuffle in all of the elements from InVal. Fill the rest of the result
2226	// elements with zeros from a constant zero.
2227	V2 = Constant::getNullValue(Ty: SrcTy);
2228	// Use first elt from V2 when indicating zero in the shuffle mask.
2229	uint32_t NullElt = SrcElts;
2230	// Extend with null values in the "most significant bits" by adding elements
2231	// in front of the src vector for big endian, and at the back for little
2232	// endian.
2233	unsigned DeltaElts = DestElts - SrcElts;
2234	if (IsBigEndian)
2235	ShuffleMaskStorage.insert(I: ShuffleMaskStorage.begin(), NumToInsert: DeltaElts, Elt: NullElt);
2236	else
2237	ShuffleMaskStorage.append(NumInputs: DeltaElts, Elt: NullElt);
2238	ShuffleMask = ShuffleMaskStorage;
2239	}
2240
2241	return new ShuffleVectorInst (InVal, V2, ShuffleMask);
2242	}
2243
2244	static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
2245	return Value % Ty->getPrimitiveSizeInBits() == `0`;
2246	}
2247
2248	static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
2249	return Value / Ty->getPrimitiveSizeInBits();
2250	}
2251
2252	/// V is a value which is inserted into a vector of VecEltTy.
2253	/// Look through the value to see if we can decompose it into
2254	/// insertions into the vector. See the example in the comment for
2255	/// OptimizeIntegerToVectorInsertions for the pattern this handles.
2256	/// The type of V is always a non-zero multiple of VecEltTy's size.
2257	/// Shift is the number of bits between the lsb of V and the lsb of
2258	/// the vector.
2259	///
2260	/// This returns false if the pattern can't be matched or true if it can,
2261	/// filling in Elements with the elements found here.
2262	static bool collectInsertionElements(Value V, unsigned* Shift,
2263	SmallVectorImpl<Value *> &Elements,
2264	Type VecEltTy, bool* isBigEndian) {
2265	assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
2266	"Shift should be a multiple of the element type size");
2267
2268	// Undef values never contribute useful bits to the result.
2269	if (isa<UndefValue>(Val: V)) return true;
2270
2271	// If we got down to a value of the right type, we win, try inserting into the
2272	// right element.
2273	if (V->getType() == VecEltTy) {
2274	// Inserting null doesn't actually insert any elements.
2275	if (Constant *C = dyn_cast<Constant>(Val: V))
2276	if (C->isNullValue())
2277	return true;
2278
2279	unsigned ElementIndex = getTypeSizeIndex(Value: Shift, Ty: VecEltTy);
2280	if (isBigEndian)
2281	ElementIndex = Elements.size() - ElementIndex - `1`;
2282
2283	// Fail if multiple elements are inserted into this slot.
2284	if (Elements [ElementIndex])
2285	return false;
2286
2287	Elements [ElementIndex] = V;
2288	return true;
2289	}
2290
2291	if (Constant *C = dyn_cast<Constant>(Val: V)) {
2292	// Figure out the # elements this provides, and bitcast it or slice it up
2293	// as required.
2294	unsigned NumElts = getTypeSizeIndex(Value: C->getType()->getPrimitiveSizeInBits(),
2295	Ty: VecEltTy);
2296	// If the constant is the size of a vector element, we just need to bitcast
2297	// it to the right type so it gets properly inserted.
2298	if (NumElts == `1`)
2299	return collectInsertionElements(V: ConstantExpr::getBitCast(C, Ty: VecEltTy),
2300	Shift, Elements, VecEltTy, isBigEndian);
2301
2302	// Okay, this is a constant that covers multiple elements. Slice it up into
2303	// pieces and insert each element-sized piece into the vector.
2304	if (!isa<IntegerType>(Val: C->getType()))
2305	C = ConstantExpr::getBitCast(C, Ty: IntegerType::get(C&: V->getContext(),
2306	NumBits: C->getType()->getPrimitiveSizeInBits()));
2307	unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
2308	Type *ElementIntTy = IntegerType::get(C&: C->getContext(), NumBits: ElementSize);
2309
2310	for (unsigned i = `0`; i != NumElts; ++i) {
2311	unsigned ShiftI = i * ElementSize;
2312	Constant *Piece = ConstantFoldBinaryInstruction(
2313	Opcode: Instruction::LShr, V1: C, V2: ConstantInt::get(Ty: C->getType(), V: ShiftI));
2314	if (!Piece)
2315	return false;
2316
2317	Piece = ConstantExpr::getTrunc(C: Piece, Ty: ElementIntTy);
2318	if (!collectInsertionElements(V: Piece, Shift: ShiftI + Shift, Elements, VecEltTy,
2319	isBigEndian))
2320	return false;
2321	}
2322	return true;
2323	}
2324
2325	if (!V->hasOneUse()) return false;
2326
2327	Instruction *I = dyn_cast<Instruction>(Val: V);
2328	if (!I) return false;
2329	switch (I->getOpcode()) {
2330	default: return false; // Unhandled case.
2331	case Instruction::BitCast:
2332	if (I->getOperand(i: `0`)->getType()->isVectorTy())
2333	return false;
2334	return collectInsertionElements(V: I->getOperand(i: `0`), Shift, Elements, VecEltTy,
2335	isBigEndian);
2336	case Instruction::ZExt:
2337	if (!isMultipleOfTypeSize(
2338	Value: I->getOperand(i: `0`)->getType()->getPrimitiveSizeInBits(),
2339	Ty: VecEltTy))
2340	return false;
2341	return collectInsertionElements(V: I->getOperand(i: `0`), Shift, Elements, VecEltTy,
2342	isBigEndian);
2343	case Instruction::Or:
2344	return collectInsertionElements(V: I->getOperand(i: `0`), Shift, Elements, VecEltTy,
2345	isBigEndian) &&
2346	collectInsertionElements(V: I->getOperand(i: `1`), Shift, Elements, VecEltTy,
2347	isBigEndian);
2348	case Instruction::Shl: {
2349	// Must be shifting by a constant that is a multiple of the element size.
2350	ConstantInt *CI = dyn_cast<ConstantInt>(Val: I->getOperand(i: `1`));
2351	if (!CI) return false;
2352	Shift += CI->getZExtValue();
2353	if (!isMultipleOfTypeSize(Value: Shift, Ty: VecEltTy)) return false;
2354	return collectInsertionElements(V: I->getOperand(i: `0`), Shift, Elements, VecEltTy,
2355	isBigEndian);
2356	}
2357
2358	}
2359	}
2360
2361
2362	/// If the input is an 'or' instruction, we may be doing shifts and ors to
2363	/// assemble the elements of the vector manually.
2364	/// Try to rip the code out and replace it with insertelements. This is to
2365	/// optimize code like this:
2366	///
2367	/// %tmp37 = bitcast float %inc to i32
2368	/// %tmp38 = zext i32 %tmp37 to i64
2369	/// %tmp31 = bitcast float %inc5 to i32
2370	/// %tmp32 = zext i32 %tmp31 to i64
2371	/// %tmp33 = shl i64 %tmp32, 32
2372	/// %ins35 = or i64 %tmp33, %tmp38
2373	/// %tmp43 = bitcast i64 %ins35 to <2 x float>
2374	///
2375	/// Into two insertelements that do "buildvector{%inc, %inc5}".
2376	static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,
2377	InstCombinerImpl &IC) {
2378	auto *DestVecTy = cast<FixedVectorType>(Val: CI.getType());
2379	Value *IntInput = CI.getOperand(i_nocapture: `0`);
2380
2381	// if the int input is just an undef value do not try to optimize to vector
2382	// insertions as it will prevent undef propagation
2383	if (isa<UndefValue>(Val: IntInput))
2384	return nullptr;
2385
2386	SmallVector<Value*, `8`> Elements(DestVecTy->getNumElements());
2387	if (!collectInsertionElements(V: IntInput, Shift: `0`, Elements,
2388	VecEltTy: DestVecTy->getElementType(),
2389	isBigEndian: IC.getDataLayout().isBigEndian()))
2390	return nullptr;
2391
2392	// If we succeeded, we know that all of the element are specified by Elements
2393	// or are zero if Elements has a null entry. Recast this as a set of
2394	// insertions.
2395	Value *Result = Constant::getNullValue(Ty: CI.getType());
2396	for (unsigned i = `0`, e = Elements.size(); i != e; ++i) {
2397	if (!Elements [i]) continue; // Unset element.
2398
2399	Result = IC.Builder.CreateInsertElement(Vec: Result, NewElt: Elements [i],
2400	Idx: IC.Builder.getInt32(C: i));
2401	}
2402
2403	return Result;
2404	}
2405
2406	/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
2407	/// vector followed by extract element. The backend tends to handle bitcasts of
2408	/// vectors better than bitcasts of scalars because vector registers are
2409	/// usually not type-specific like scalar integer or scalar floating-point.
2410	static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast,
2411	InstCombinerImpl &IC) {
2412	Value VecOp, Index;
2413	if (!match(V: BitCast.getOperand(i_nocapture: `0`),
2414	P: m_OneUse(SubPattern: m_ExtractElt(Val: m_Value(V&: VecOp), Idx: m_Value(V&: Index)))))
2415	return nullptr;
2416
2417	// The bitcast must be to a vectorizable type, otherwise we can't make a new
2418	// type to extract from.
2419	Type *DestType = BitCast.getType();
2420	VectorType *VecType = cast<VectorType>(Val: VecOp->getType());
2421	if (VectorType::isValidElementType(ElemTy: DestType)) {
2422	auto *NewVecType = VectorType::get(ElementType: DestType, Other: VecType);
2423	auto *NewBC = IC.Builder.CreateBitCast(V: VecOp, DestTy: NewVecType, Name: "bc");
2424	return ExtractElementInst::Create(Vec: NewBC, Idx: Index);
2425	}
2426
2427	// Only solve DestType is vector to avoid inverse transform in visitBitCast.
2428	// bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest)
2429	auto *FixedVType = dyn_cast<FixedVectorType>(Val: VecType);
2430	if (DestType->isVectorTy() && FixedVType && FixedVType->getNumElements() == `1`)
2431	return CastInst::Create(Instruction::BitCast, S: VecOp, Ty: DestType);
2432
2433	return nullptr;
2434	}
2435
2436	/// Change the type of a bitwise logic operation if we can eliminate a bitcast.
2437	static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast,
2438	InstCombiner::BuilderTy &Builder) {
2439	Type *DestTy = BitCast.getType();
2440	BinaryOperator *BO;
2441
2442	if (!match(V: BitCast.getOperand(i_nocapture: `0`), P: m_OneUse(SubPattern: m_BinOp(I&: BO))) \|\|
2443	!BO->isBitwiseLogicOp())
2444	return nullptr;
2445
2446	// FIXME: This transform is restricted to vector types to avoid backend
2447	// problems caused by creating potentially illegal operations. If a fix-up is
2448	// added to handle that situation, we can remove this check.
2449	if (!DestTy->isVectorTy() \|\| !BO->getType()->isVectorTy())
2450	return nullptr;
2451
2452	if (DestTy->isFPOrFPVectorTy()) {
2453	Value X, Y;
2454	// bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y))
2455	if (match(V: BO->getOperand(i_nocapture: `0`), P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: X)))) &&
2456	match(V: BO->getOperand(i_nocapture: `1`), P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: Y))))) {
2457	if (X->getType()->isFPOrFPVectorTy() &&
2458	Y->getType()->isIntOrIntVectorTy()) {
2459	Value *CastedOp =
2460	Builder.CreateBitCast(V: BO->getOperand(i_nocapture: `0`), DestTy: Y->getType());
2461	Value *NewBO = Builder.CreateBinOp(Opc: BO->getOpcode(), LHS: CastedOp, RHS: Y);
2462	return CastInst::CreateBitOrPointerCast(S: NewBO, Ty: DestTy);
2463	}
2464	if (X->getType()->isIntOrIntVectorTy() &&
2465	Y->getType()->isFPOrFPVectorTy()) {
2466	Value *CastedOp =
2467	Builder.CreateBitCast(V: BO->getOperand(i_nocapture: `1`), DestTy: X->getType());
2468	Value *NewBO = Builder.CreateBinOp(Opc: BO->getOpcode(), LHS: CastedOp, RHS: X);
2469	return CastInst::CreateBitOrPointerCast(S: NewBO, Ty: DestTy);
2470	}
2471	}
2472	return nullptr;
2473	}
2474
2475	if (!DestTy->isIntOrIntVectorTy())
2476	return nullptr;
2477
2478	Value *X;
2479	if (match(V: BO->getOperand(i_nocapture: `0`), P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: X)))) &&
2480	X->getType() == DestTy && !isa<Constant>(Val: X)) {
2481	// bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
2482	Value *CastedOp1 = Builder.CreateBitCast(V: BO->getOperand(i_nocapture: `1`), DestTy);
2483	return BinaryOperator::Create(Op: BO->getOpcode(), S1: X, S2: CastedOp1);
2484	}
2485
2486	if (match(V: BO->getOperand(i_nocapture: `1`), P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: X)))) &&
2487	X->getType() == DestTy && !isa<Constant>(Val: X)) {
2488	// bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
2489	Value *CastedOp0 = Builder.CreateBitCast(V: BO->getOperand(i_nocapture: `0`), DestTy);
2490	return BinaryOperator::Create(Op: BO->getOpcode(), S1: CastedOp0, S2: X);
2491	}
2492
2493	// Canonicalize vector bitcasts to come before vector bitwise logic with a
2494	// constant. This eases recognition of special constants for later ops.
2495	// Example:
2496	// icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
2497	Constant *C;
2498	if (match(V: BO->getOperand(i_nocapture: `1`), P: m_Constant(C))) {
2499	// bitcast (logic X, C) --> logic (bitcast X, C')
2500	Value *CastedOp0 = Builder.CreateBitCast(V: BO->getOperand(i_nocapture: `0`), DestTy);
2501	Value *CastedC = Builder.CreateBitCast(V: C, DestTy);
2502	return BinaryOperator::Create(Op: BO->getOpcode(), S1: CastedOp0, S2: CastedC);
2503	}
2504
2505	return nullptr;
2506	}
2507
2508	/// Change the type of a select if we can eliminate a bitcast.
2509	static Instruction *foldBitCastSelect(BitCastInst &BitCast,
2510	InstCombiner::BuilderTy &Builder) {
2511	Value Cond, TVal, *FVal;
2512	if (!match(V: BitCast.getOperand(i_nocapture: `0`),
2513	P: m_OneUse(SubPattern: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: TVal), R: m_Value(V&: FVal)))))
2514	return nullptr;
2515
2516	// A vector select must maintain the same number of elements in its operands.
2517	Type *CondTy = Cond->getType();
2518	Type *DestTy = BitCast.getType();
2519	if (auto *CondVTy = dyn_cast<VectorType>(Val: CondTy))
2520	if (!DestTy->isVectorTy() \|\|
2521	CondVTy->getElementCount() !=
2522	cast<VectorType>(Val: DestTy)->getElementCount())
2523	return nullptr;
2524
2525	// FIXME: This transform is restricted from changing the select between
2526	// scalars and vectors to avoid backend problems caused by creating
2527	// potentially illegal operations. If a fix-up is added to handle that
2528	// situation, we can remove this check.
2529	if (DestTy->isVectorTy() != TVal->getType()->isVectorTy())
2530	return nullptr;
2531
2532	auto *Sel = cast<Instruction>(Val: BitCast.getOperand(i_nocapture: `0`));
2533	Value *X;
2534	if (match(V: TVal, P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: X)))) && X->getType() == DestTy &&
2535	!isa<Constant>(Val: X)) {
2536	// bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
2537	Value *CastedVal = Builder.CreateBitCast(V: FVal, DestTy);
2538	return SelectInst::Create(C: Cond, S1: X, S2: CastedVal, NameStr: "", InsertBefore: nullptr, MDFrom: Sel);
2539	}
2540
2541	if (match(V: FVal, P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: X)))) && X->getType() == DestTy &&
2542	!isa<Constant>(Val: X)) {
2543	// bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
2544	Value *CastedVal = Builder.CreateBitCast(V: TVal, DestTy);
2545	return SelectInst::Create(C: Cond, S1: CastedVal, S2: X, NameStr: "", InsertBefore: nullptr, MDFrom: Sel);
2546	}
2547
2548	return nullptr;
2549	}
2550
2551	/// Check if all users of CI are StoreInsts.
2552	static bool hasStoreUsersOnly(CastInst &CI) {
2553	for (User *U : CI.users()) {
2554	if (!isa<StoreInst>(Val: U))
2555	return false;
2556	}
2557	return true;
2558	}
2559
2560	/// This function handles following case
2561	///
2562	/// A -> B cast
2563	/// PHI
2564	/// B -> A cast
2565	///
2566	/// All the related PHI nodes can be replaced by new PHI nodes with type A.
2567	/// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
2568	Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
2569	PHINode *PN) {
2570	// BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
2571	if (hasStoreUsersOnly(CI))
2572	return nullptr;
2573
2574	Value *Src = CI.getOperand(i_nocapture: `0`);
2575	Type SrcTy = Src->getType(); // Type B*
2576	Type DestTy = CI.getType(); // Type A*
2577
2578	SmallVector<PHINode *, `4`> PhiWorklist;
2579	SmallSetVector<PHINode *, `4`> OldPhiNodes;
2580
2581	// Find all of the A->B casts and PHI nodes.
2582	// We need to inspect all related PHI nodes, but PHIs can be cyclic, so
2583	// OldPhiNodes is used to track all known PHI nodes, before adding a new
2584	// PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
2585	PhiWorklist.push_back(Elt: PN);
2586	OldPhiNodes.insert(X: PN);
2587	while (!PhiWorklist.empty()) {
2588	auto *OldPN = PhiWorklist.pop_back_val();
2589	for (Value *IncValue : OldPN->incoming_values()) {
2590	if (isa<Constant>(Val: IncValue))
2591	continue;
2592
2593	if (auto *LI = dyn_cast<LoadInst>(Val: IncValue)) {
2594	// If there is a sequence of one or more load instructions, each loaded
2595	// value is used as address of later load instruction, bitcast is
2596	// necessary to change the value type, don't optimize it. For
2597	// simplicity we give up if the load address comes from another load.
2598	Value *Addr = LI->getOperand(i_nocapture: `0`);
2599	if (Addr == &CI \|\| isa<LoadInst>(Val: Addr))
2600	return nullptr;
2601	// Don't tranform "load <256 x i32>, <256 x i32>" to*
2602	// "load x86_amx, x86_amx", because x86_amx* is invalid.*
2603	// TODO: Remove this check when bitcast between vector and x86_amx
2604	// is replaced with a specific intrinsic.
2605	if (DestTy->isX86_AMXTy())
2606	return nullptr;
2607	if (LI->hasOneUse() && LI->isSimple())
2608	continue;
2609	// If a LoadInst has more than one use, changing the type of loaded
2610	// value may create another bitcast.
2611	return nullptr;
2612	}
2613
2614	if (auto *PNode = dyn_cast<PHINode>(Val: IncValue)) {
2615	if (OldPhiNodes.insert(X: PNode))
2616	PhiWorklist.push_back(Elt: PNode);
2617	continue;
2618	}
2619
2620	auto *BCI = dyn_cast<BitCastInst>(Val: IncValue);
2621	// We can't handle other instructions.
2622	if (!BCI)
2623	return nullptr;
2624
2625	// Verify it's a A->B cast.
2626	Type *TyA = BCI->getOperand(i_nocapture: `0`)->getType();
2627	Type *TyB = BCI->getType();
2628	if (TyA != DestTy \|\| TyB != SrcTy)
2629	return nullptr;
2630	}
2631	}
2632
2633	// Check that each user of each old PHI node is something that we can
2634	// rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
2635	for (auto *OldPN : OldPhiNodes) {
2636	for (User *V : OldPN->users()) {
2637	if (auto *SI = dyn_cast<StoreInst>(Val: V)) {
2638	if (!SI->isSimple() \|\| SI->getOperand(i_nocapture: `0`) != OldPN)
2639	return nullptr;
2640	} else if (auto *BCI = dyn_cast<BitCastInst>(Val: V)) {
2641	// Verify it's a B->A cast.
2642	Type *TyB = BCI->getOperand(i_nocapture: `0`)->getType();
2643	Type *TyA = BCI->getType();
2644	if (TyA != DestTy \|\| TyB != SrcTy)
2645	return nullptr;
2646	} else if (auto *PHI = dyn_cast<PHINode>(Val: V)) {
2647	// As long as the user is another old PHI node, then even if we don't
2648	// rewrite it, the PHI web we're considering won't have any users
2649	// outside itself, so it'll be dead.
2650	if (!OldPhiNodes.contains(key: PHI))
2651	return nullptr;
2652	} else {
2653	return nullptr;
2654	}
2655	}
2656	}
2657
2658	// For each old PHI node, create a corresponding new PHI node with a type A.
2659	SmallDenseMap<PHINode , PHINode > NewPNodes;
2660	for (auto *OldPN : OldPhiNodes) {
2661	Builder.SetInsertPoint(OldPN);
2662	PHINode *NewPN = Builder.CreatePHI(Ty: DestTy, NumReservedValues: OldPN->getNumOperands());
2663	NewPNodes [OldPN] = NewPN;
2664	}
2665
2666	// Fill in the operands of new PHI nodes.
2667	for (auto *OldPN : OldPhiNodes) {
2668	PHINode *NewPN = NewPNodes [OldPN];
2669	for (unsigned j = `0`, e = OldPN->getNumOperands(); j != e; ++j) {
2670	Value *V = OldPN->getOperand(i_nocapture: j);
2671	Value NewV = nullptr*;
2672	if (auto *C = dyn_cast<Constant>(Val: V)) {
2673	NewV = ConstantExpr::getBitCast(C, Ty: DestTy);
2674	} else if (auto *LI = dyn_cast<LoadInst>(Val: V)) {
2675	// Explicitly perform load combine to make sure no opposing transform
2676	// can remove the bitcast in the meantime and trigger an infinite loop.
2677	Builder.SetInsertPoint(LI);
2678	NewV = combineLoadToNewType(LI&: *LI, NewTy: DestTy);
2679	// Remove the old load and its use in the old phi, which itself becomes
2680	// dead once the whole transform finishes.
2681	replaceInstUsesWith(I&: *LI, V: PoisonValue::get(T: LI->getType()));
2682	eraseInstFromFunction(I&: *LI);
2683	} else if (auto *BCI = dyn_cast<BitCastInst>(Val: V)) {
2684	NewV = BCI->getOperand(i_nocapture: `0`);
2685	} else if (auto *PrevPN = dyn_cast<PHINode>(Val: V)) {
2686	NewV = NewPNodes [PrevPN];
2687	}
2688	assert(NewV);
2689	NewPN->addIncoming(V: NewV, BB: OldPN->getIncomingBlock(i: j));
2690	}
2691	}
2692
2693	// Traverse all accumulated PHI nodes and process its users,
2694	// which are Stores and BitcCasts. Without this processing
2695	// NewPHI nodes could be replicated and could lead to extra
2696	// moves generated after DeSSA.
2697	// If there is a store with type B, change it to type A.
2698
2699
2700	// Replace users of BitCast B->A with NewPHI. These will help
2701	// later to get rid off a closure formed by OldPHI nodes.
2702	Instruction RetVal = nullptr*;
2703	for (auto *OldPN : OldPhiNodes) {
2704	PHINode *NewPN = NewPNodes [OldPN];
2705	for (User *V : make_early_inc_range(Range: OldPN->users())) {
2706	if (auto *SI = dyn_cast<StoreInst>(Val: V)) {
2707	assert(SI->isSimple() && SI->getOperand(`0`) == OldPN);
2708	Builder.SetInsertPoint(SI);
2709	auto *NewBC =
2710	cast<BitCastInst>(Val: Builder.CreateBitCast(V: NewPN, DestTy: SrcTy));
2711	SI->setOperand(i_nocapture: `0`, Val_nocapture: NewBC);
2712	Worklist.push(I: SI);
2713	assert(hasStoreUsersOnly(*NewBC));
2714	}
2715	else if (auto *BCI = dyn_cast<BitCastInst>(Val: V)) {
2716	Type *TyB = BCI->getOperand(i_nocapture: `0`)->getType();
2717	Type *TyA = BCI->getType();
2718	assert(TyA == DestTy && TyB == SrcTy);
2719	(void) TyA;
2720	(void) TyB;
2721	Instruction I = replaceInstUsesWith(I&: BCI, V: NewPN);
2722	if (BCI == &CI)
2723	RetVal = I;
2724	} else if (auto *PHI = dyn_cast<PHINode>(Val: V)) {
2725	assert(OldPhiNodes.contains(PHI));
2726	(void) PHI;
2727	} else {
2728	llvm_unreachable("all uses should be handled");
2729	}
2730	}
2731	}
2732
2733	return RetVal;
2734	}
2735
2736	/// Fold (bitcast (or (and (bitcast X to int), signmask), nneg Y) to fp) to
2737	/// copysign((bitcast Y to fp), X)
2738	static Value *foldCopySignIdioms(BitCastInst &CI,
2739	InstCombiner::BuilderTy &Builder,
2740	const SimplifyQuery &SQ) {
2741	Value X, Y;
2742	Type *FTy = CI.getType();
2743	if (!FTy->isFPOrFPVectorTy())
2744	return nullptr;
2745	if (!match(V: &CI, P: m_ElementWiseBitCast(Op: m_c_Or(
2746	L: m_And(L: m_ElementWiseBitCast(Op: m_Value(V&: X)), R: m_SignMask()),
2747	R: m_Value(V&: Y)))))
2748	return nullptr;
2749	if (X->getType() != FTy)
2750	return nullptr;
2751	if (!isKnownNonNegative(V: Y, SQ))
2752	return nullptr;
2753
2754	return Builder.CreateCopySign(LHS: Builder.CreateBitCast(V: Y, DestTy: FTy), RHS: X);
2755	}
2756
2757	Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {
2758	// If the operands are integer typed then apply the integer transforms,
2759	// otherwise just apply the common ones.
2760	Value *Src = CI.getOperand(i_nocapture: `0`);
2761	Type *SrcTy = Src->getType();
2762	Type *DestTy = CI.getType();
2763
2764	// Get rid of casts from one type to the same type. These are useless and can
2765	// be replaced by the operand.
2766	if (DestTy == Src->getType())
2767	return replaceInstUsesWith(I&: CI, V: Src);
2768
2769	if (isa<FixedVectorType>(Val: DestTy)) {
2770	if (isa<IntegerType>(Val: SrcTy)) {
2771	// If this is a cast from an integer to vector, check to see if the input
2772	// is a trunc or zext of a bitcast from vector. If so, we can replace all
2773	// the casts with a shuffle and (potentially) a bitcast.
2774	if (isa<TruncInst>(Val: Src) \|\| isa<ZExtInst>(Val: Src)) {
2775	CastInst *SrcCast = cast<CastInst>(Val: Src);
2776	if (BitCastInst *BCIn = dyn_cast<BitCastInst>(Val: SrcCast->getOperand(i_nocapture: `0`)))
2777	if (isa<VectorType>(Val: BCIn->getOperand(i_nocapture: `0`)->getType()))
2778	if (Instruction *I = optimizeVectorResizeWithIntegerBitCasts(
2779	InVal: BCIn->getOperand(i_nocapture: `0`), DestTy: cast<VectorType>(Val: DestTy), IC&: *this))
2780	return I;
2781	}
2782
2783	// If the input is an 'or' instruction, we may be doing shifts and ors to
2784	// assemble the elements of the vector manually. Try to rip the code out
2785	// and replace it with insertelements.
2786	if (Value V = optimizeIntegerToVectorInsertions(CI, IC&: this))
2787	return replaceInstUsesWith(I&: CI, V);
2788	}
2789	}
2790
2791	if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(Val: SrcTy)) {
2792	if (SrcVTy->getNumElements() == `1`) {
2793	// If our destination is not a vector, then make this a straight
2794	// scalar-scalar cast.
2795	if (!DestTy->isVectorTy()) {
2796	Value *Elem =
2797	Builder.CreateExtractElement(Vec: Src,
2798	Idx: Constant::getNullValue(Ty: Type::getInt32Ty(C&: CI.getContext())));
2799	return CastInst::Create(Instruction::BitCast, S: Elem, Ty: DestTy);
2800	}
2801
2802	// Otherwise, see if our source is an insert. If so, then use the scalar
2803	// component directly:
2804	// bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
2805	if (auto *InsElt = dyn_cast<InsertElementInst>(Val: Src))
2806	return new BitCastInst (InsElt->getOperand(i_nocapture: `1`), DestTy);
2807	}
2808
2809	// Convert an artificial vector insert into more analyzable bitwise logic.
2810	unsigned BitWidth = DestTy->getScalarSizeInBits();
2811	Value X, Y;
2812	uint64_t IndexC;
2813	if (match(V: Src, P: m_OneUse(SubPattern: m_InsertElt(Val: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: X))),
2814	Elt: m_Value(V&: Y), Idx: m_ConstantInt(V&: IndexC)))) &&
2815	DestTy->isIntegerTy() && X->getType() == DestTy &&
2816	Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) {
2817	// Adjust for big endian - the LSBs are at the high index.
2818	if (DL.isBigEndian())
2819	IndexC = SrcVTy->getNumElements() - `1` - IndexC;
2820
2821	// We only handle (endian-normalized) insert to index 0. Any other insert
2822	// would require a left-shift, so that is an extra instruction.
2823	if (IndexC == `0`) {
2824	// bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)
2825	unsigned EltWidth = Y->getType()->getScalarSizeInBits();
2826	APInt MaskC = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth - EltWidth);
2827	Value *AndX = Builder.CreateAnd(LHS: X, RHS: MaskC);
2828	Value *ZextY = Builder.CreateZExt(V: Y, DestTy);
2829	return BinaryOperator::CreateOr(V1: AndX, V2: ZextY);
2830	}
2831	}
2832	}
2833
2834	if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: Src)) {
2835	// Okay, we have (bitcast (shuffle ..)). Check to see if this is
2836	// a bitcast to a vector with the same # elts.
2837	Value *ShufOp0 = Shuf->getOperand(i_nocapture: `0`);
2838	Value *ShufOp1 = Shuf->getOperand(i_nocapture: `1`);
2839	auto ShufElts = cast<VectorType>(Val: Shuf->getType())->getElementCount();
2840	auto SrcVecElts = cast<VectorType>(Val: ShufOp0->getType())->getElementCount();
2841	if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
2842	cast<VectorType>(Val: DestTy)->getElementCount() == ShufElts &&
2843	ShufElts == SrcVecElts) {
2844	BitCastInst *Tmp;
2845	// If either of the operands is a cast from CI.getType(), then
2846	// evaluating the shuffle in the casted destination's type will allow
2847	// us to eliminate at least one cast.
2848	if (((Tmp = dyn_cast<BitCastInst>(Val: ShufOp0)) &&
2849	Tmp->getOperand(i_nocapture: `0`)->getType() == DestTy) \|\|
2850	((Tmp = dyn_cast<BitCastInst>(Val: ShufOp1)) &&
2851	Tmp->getOperand(i_nocapture: `0`)->getType() == DestTy)) {
2852	Value *LHS = Builder.CreateBitCast(V: ShufOp0, DestTy);
2853	Value *RHS = Builder.CreateBitCast(V: ShufOp1, DestTy);
2854	// Return a new shuffle vector. Use the same element ID's, as we
2855	// know the vector types match #elts.
2856	return new ShuffleVectorInst (LHS, RHS, Shuf->getShuffleMask());
2857	}
2858	}
2859
2860	// A bitcasted-to-scalar and byte/bit reversing shuffle is better recognized
2861	// as a byte/bit swap:
2862	// bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) -> bswap (bitcast X)
2863	// bitcast <N x i1> (shuf X, undef, <N, N-1,...0>) -> bitreverse (bitcast X)
2864	if (DestTy->isIntegerTy() && ShufElts.getKnownMinValue() % `2` == `0` &&
2865	Shuf->hasOneUse() && Shuf->isReverse()) {
2866	unsigned IntrinsicNum = `0`;
2867	if (DL.isLegalInteger(Width: DestTy->getScalarSizeInBits()) &&
2868	SrcTy->getScalarSizeInBits() == `8`) {
2869	IntrinsicNum = Intrinsic::bswap;
2870	} else if (SrcTy->getScalarSizeInBits() == `1`) {
2871	IntrinsicNum = Intrinsic::bitreverse;
2872	}
2873	if (IntrinsicNum != `0`) {
2874	assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
2875	assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");
2876	Function *BswapOrBitreverse = Intrinsic::getOrInsertDeclaration(
2877	M: CI.getModule(), id: IntrinsicNum, Tys: DestTy);
2878	Value *ScalarX = Builder.CreateBitCast(V: ShufOp0, DestTy);
2879	return CallInst::Create(Func: BswapOrBitreverse, Args: {ScalarX});
2880	}
2881	}
2882	}
2883
2884	// Handle the A->B->A cast, and there is an intervening PHI node.
2885	if (PHINode *PN = dyn_cast<PHINode>(Val: Src))
2886	if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
2887	return I;
2888
2889	if (Instruction I = canonicalizeBitCastExtElt(BitCast&: CI, IC&: this))
2890	return I;
2891
2892	if (Instruction *I = foldBitCastBitwiseLogic(BitCast&: CI, Builder))
2893	return I;
2894
2895	if (Instruction *I = foldBitCastSelect(BitCast&: CI, Builder))
2896	return I;
2897
2898	if (Value *V = foldCopySignIdioms(CI, Builder, SQ: SQ.getWithInstruction(I: &CI)))
2899	return replaceInstUsesWith(I&: CI, V);
2900
2901	return commonCastTransforms(CI);
2902	}
2903
2904	Instruction *InstCombinerImpl::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
2905	return commonCastTransforms(CI);
2906	}
2907

Browse the source code of llvm_projects/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp