InstCombineCasts.cpp source code [llvm_projects/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp]

1	//===- InstCombineCasts.cpp -----------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the visit functions for cast operations.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "InstCombineInternal.h"
14	#include "llvm/ADT/APInt.h"
15	#include "llvm/ADT/DenseMap.h"
16	#include "llvm/ADT/STLExtras.h"
17	#include "llvm/ADT/STLFunctionalExtras.h"
18	#include "llvm/ADT/SetVector.h"
19	#include "llvm/ADT/SmallVector.h"
20	#include "llvm/Analysis/ConstantFolding.h"
21	#include "llvm/IR/DataLayout.h"
22	#include "llvm/IR/DebugInfo.h"
23	#include "llvm/IR/Instruction.h"
24	#include "llvm/IR/PatternMatch.h"
25	#include "llvm/IR/Type.h"
26	#include "llvm/IR/Value.h"
27	#include "llvm/Support/KnownBits.h"
28	#include "llvm/Transforms/InstCombine/InstCombiner.h"
29	#include <iterator>
30	#include <optional>
31
32	using namespace llvm;
33	using namespace PatternMatch;
34
35	#define DEBUG_TYPE "instcombine"
36
37	using EvaluatedMap = SmallDenseMap<Value , Value , `8`>;
38
39	static Value EvaluateInDifferentTypeImpl(Value V, Type Ty, bool* isSigned,
40	InstCombinerImpl &IC,
41	EvaluatedMap &Processed) {
42	// Since we cover transformation of instructions with multiple users, we might
43	// come to the same node via multiple paths. We should not create a
44	// replacement for every single one of them though.
45	if (Value *Result = Processed.lookup(Val: V))
46	return Result;
47
48	if (Constant *C = dyn_cast<Constant>(Val: V))
49	return ConstantFoldIntegerCast(C, DestTy: Ty, IsSigned: isSigned, DL: IC.getDataLayout());
50
51	// Otherwise, it must be an instruction.
52	Instruction *I = cast<Instruction>(Val: V);
53	Instruction Res = nullptr*;
54	unsigned Opc = I->getOpcode();
55	switch (Opc) {
56	case Instruction::Add:
57	case Instruction::Sub:
58	case Instruction::Mul:
59	case Instruction::And:
60	case Instruction::Or:
61	case Instruction::Xor:
62	case Instruction::AShr:
63	case Instruction::LShr:
64	case Instruction::Shl:
65	case Instruction::UDiv:
66	case Instruction::URem: {
67	Value *LHS = EvaluateInDifferentTypeImpl(V: I->getOperand(i: `0`), Ty, isSigned, IC,
68	Processed);
69	Value *RHS = EvaluateInDifferentTypeImpl(V: I->getOperand(i: `1`), Ty, isSigned, IC,
70	Processed);
71	Res = BinaryOperator::Create(Op: (Instruction::BinaryOps)Opc, S1: LHS, S2: RHS);
72	if (Opc == Instruction::LShr \|\| Opc == Instruction::AShr)
73	Res->setIsExact(I->isExact());
74	break;
75	}
76	case Instruction::Trunc:
77	case Instruction::ZExt:
78	case Instruction::SExt:
79	// If the source type of the cast is the type we're trying for then we can
80	// just return the source. There's no need to insert it because it is not
81	// new.
82	if (I->getOperand(i: `0`)->getType() == Ty)
83	return I->getOperand(i: `0`);
84
85	// Otherwise, must be the same type of cast, so just reinsert a new one.
86	// This also handles the case of zext(trunc(x)) -> zext(x).
87	Res = CastInst::CreateIntegerCast(S: I->getOperand(i: `0`), Ty,
88	isSigned: Opc == Instruction::SExt);
89	if (auto *Trunc = dyn_cast<TruncInst>(Val: I)) {
90	if (auto *NewTrunc = dyn_cast<TruncInst>(Val: Res)) {
91	if (Trunc->getType()->getScalarSizeInBits() <=
92	Ty->getScalarSizeInBits()) {
93	NewTrunc->setHasNoSignedWrap(Trunc->hasNoSignedWrap());
94	NewTrunc->setHasNoUnsignedWrap(Trunc->hasNoUnsignedWrap());
95	}
96	} else if (auto *NewZExt = dyn_cast<ZExtInst>(Val: Res)) {
97	if (Trunc->hasNoUnsignedWrap())
98	NewZExt->setNonNeg();
99	}
100	}
101	break;
102	case Instruction::Select: {
103	Value *True = EvaluateInDifferentTypeImpl(V: I->getOperand(i: `1`), Ty, isSigned,
104	IC, Processed);
105	Value *False = EvaluateInDifferentTypeImpl(V: I->getOperand(i: `2`), Ty, isSigned,
106	IC, Processed);
107	Res = SelectInst::Create(C: I->getOperand(i: `0`), S1: True, S2: False);
108	break;
109	}
110	case Instruction::PHI: {
111	PHINode *OPN = cast<PHINode>(Val: I);
112	PHINode *NPN = PHINode::Create(Ty, NumReservedValues: OPN->getNumIncomingValues());
113	for (unsigned i = `0`, e = OPN->getNumIncomingValues(); i != e; ++i) {
114	Value *V = EvaluateInDifferentTypeImpl(V: OPN->getIncomingValue(i), Ty,
115	isSigned, IC, Processed);
116	NPN->addIncoming(V, BB: OPN->getIncomingBlock(i));
117	}
118	Res = NPN;
119	break;
120	}
121	case Instruction::FPToUI:
122	case Instruction::FPToSI:
123	Res = CastInst::Create(static_cast<Instruction::CastOps>(Opc),
124	S: I->getOperand(i: `0`), Ty);
125	break;
126	case Instruction::Call:
127	if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
128	switch (II->getIntrinsicID()) {
129	default:
130	llvm_unreachable("Unsupported call!");
131	case Intrinsic::vscale: {
132	Function *Fn = Intrinsic::getOrInsertDeclaration(
133	M: I->getModule(), id: Intrinsic::vscale, OverloadTys: {Ty});
134	Res = CallInst::Create(Ty: Fn->getFunctionType(), F: Fn);
135	break;
136	}
137	case Intrinsic::umin:
138	case Intrinsic::umax:
139	case Intrinsic::smin:
140	case Intrinsic::smax: {
141	Value *Op0 = EvaluateInDifferentTypeImpl(V: II->getArgOperand(i: `0`), Ty,
142	isSigned, IC, Processed);
143	Value *Op1 = EvaluateInDifferentTypeImpl(V: II->getArgOperand(i: `1`), Ty,
144	isSigned, IC, Processed);
145	Function *Fn = Intrinsic::getOrInsertDeclaration(
146	M: I->getModule(), id: II->getIntrinsicID(), OverloadTys: {Ty});
147	Res = CallInst::Create(Ty: Fn->getFunctionType(), Func: Fn, Args: {Op0, Op1});
148	break;
149	}
150	case Intrinsic::abs: {
151	Value *Arg = EvaluateInDifferentTypeImpl(V: II->getArgOperand(i: `0`), Ty,
152	isSigned, IC, Processed);
153	Function *Fn = Intrinsic::getOrInsertDeclaration(
154	M: I->getModule(), id: II->getIntrinsicID(), OverloadTys: {Ty});
155	Res = CallInst::Create(Ty: Fn->getFunctionType(), Func: Fn,
156	Args: {Arg, ConstantInt::getFalse(Context&: I->getContext())});
157	break;
158	}
159	}
160	}
161	break;
162	case Instruction::ShuffleVector: {
163	auto *ScalarTy = cast<VectorType>(Val: Ty)->getElementType();
164	auto *VTy = cast<VectorType>(Val: I->getOperand(i: `0`)->getType());
165	auto *FixedTy = VectorType::get(ElementType: ScalarTy, EC: VTy->getElementCount());
166	Value *Op0 = EvaluateInDifferentTypeImpl(V: I->getOperand(i: `0`), Ty: FixedTy,
167	isSigned, IC, Processed);
168	Value *Op1 = EvaluateInDifferentTypeImpl(V: I->getOperand(i: `1`), Ty: FixedTy,
169	isSigned, IC, Processed);
170	Res = new ShuffleVectorInst (Op0, Op1,
171	cast<ShuffleVectorInst>(Val: I)->getShuffleMask());
172	break;
173	}
174	default:
175	// TODO: Can handle more cases here.
176	llvm_unreachable("Unreachable!");
177	}
178
179	Res->takeName(V: I);
180	Value *Result = IC.InsertNewInstWith(New: Res, Old: I->getIterator());
181	// There is no need in keeping track of the old value/new value relationship
182	// when we have only one user, we came have here from that user and no-one
183	// else cares.
184	if (!V->hasOneUse())
185	Processed [V] = Result;
186
187	return Result;
188	}
189
190	/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
191	/// true for, actually insert the code to evaluate the expression.
192	Value InstCombinerImpl::EvaluateInDifferentType(Value V, Type *Ty,
193	bool isSigned) {
194	EvaluatedMap Processed;
195	return EvaluateInDifferentTypeImpl(V, Ty, isSigned, IC&: *this, Processed);
196	}
197
198	Instruction::CastOps
199	InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
200	const CastInst *CI2) {
201	Type *SrcTy = CI1->getSrcTy();
202	Type *MidTy = CI1->getDestTy();
203	Type *DstTy = CI2->getDestTy();
204
205	Instruction::CastOps firstOp = CI1->getOpcode();
206	Instruction::CastOps secondOp = CI2->getOpcode();
207	Type *SrcIntPtrTy =
208	SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
209	Type *DstIntPtrTy =
210	DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
211	unsigned Res = CastInst::isEliminableCastPair(firstOpcode: firstOp, secondOpcode: secondOp, SrcTy, MidTy,
212	DstTy, DL: &DL);
213
214	// We don't want to form an inttoptr or ptrtoint that converts to an integer
215	// type that differs from the pointer size.
216	if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) \|\|
217	(Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
218	Res = `0`;
219
220	return Instruction::CastOps(Res);
221	}
222
223	/// Implement the transforms common to all CastInst visitors.
224	Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) {
225	Value *Src = CI.getOperand(i_nocapture: `0`);
226	Type *Ty = CI.getType();
227
228	if (Value *Res =
229	simplifyCastInst(CastOpc: CI.getOpcode(), Op: Src, Ty, Q: SQ.getWithInstruction(I: &CI)))
230	return replaceInstUsesWith(I&: CI, V: Res);
231
232	// Try to eliminate a cast of a cast.
233	if (auto CSrc = dyn_cast<CastInst>(Val: Src)) { // A->B->C cast*
234	if (Instruction::CastOps NewOpc = isEliminableCastPair(CI1: CSrc, CI2: &CI)) {
235	// The first cast (CSrc) is eliminable so we need to fix up or replace
236	// the second cast (CI). CSrc will then have a good chance of being dead.
237	auto *Res = CastInst::Create(NewOpc, S: CSrc->getOperand(i_nocapture: `0`), Ty);
238	// Point debug users of the dying cast to the new one.
239	if (CSrc->hasOneUse())
240	replaceAllDbgUsesWith(From&: CSrc, To&: Res, DomPoint&: CI, DT);
241	return Res;
242	}
243	}
244
245	if (auto *Sel = dyn_cast<SelectInst>(Val: Src)) {
246	// We are casting a select. Try to fold the cast into the select if the
247	// select does not have a compare instruction with matching operand types
248	// or the select is likely better done in a narrow type.
249	// Creating a select with operands that are different sizes than its
250	// condition may inhibit other folds and lead to worse codegen.
251	auto *Cmp = dyn_cast<CmpInst>(Val: Sel->getCondition());
252	if (!Cmp \|\| Cmp->getOperand(i_nocapture: `0`)->getType() != Sel->getType() \|\|
253	(CI.getOpcode() == Instruction::Trunc &&
254	shouldChangeType(From: CI.getSrcTy(), To: CI.getType()))) {
255
256	// If it's a bitcast involving vectors, make sure it has the same number
257	// of elements on both sides.
258	if (CI.getOpcode() != Instruction::BitCast \|\|
259	match(V: &CI, P: m_ElementWiseBitCast(Op: m_Value()))) {
260	if (Instruction *NV = FoldOpIntoSelect(Op&: CI, SI: Sel)) {
261	replaceAllDbgUsesWith(From&: Sel, To&: NV, DomPoint&: CI, DT);
262	return NV;
263	}
264	}
265	}
266	}
267
268	// If we are casting a PHI, then fold the cast into the PHI.
269	if (auto *PN = dyn_cast<PHINode>(Val: Src)) {
270	// Don't do this if it would create a PHI node with an illegal type from a
271	// legal type.
272	if (!Src->getType()->isIntegerTy() \|\| !CI.getType()->isIntegerTy() \|\|
273	shouldChangeType(From: CI.getSrcTy(), To: CI.getType()))
274	if (Instruction *NV = foldOpIntoPhi(I&: CI, PN))
275	return NV;
276	}
277
278	// Canonicalize a unary shuffle after the cast if neither operation changes
279	// the size or element size of the input vector.
280	// TODO: We could allow size-changing ops if that doesn't harm codegen.
281	// cast (shuffle X, Mask) --> shuffle (cast X), Mask
282	Value *X;
283	ArrayRef<int> Mask;
284	if (match(V: Src, P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: X), v2: m_Poison(), mask: m_Mask (Mask))))) {
285	// TODO: Allow scalable vectors?
286	auto *SrcTy = dyn_cast<FixedVectorType>(Val: X->getType());
287	auto *DestTy = dyn_cast<FixedVectorType>(Val: Ty);
288	if (SrcTy && DestTy &&
289	SrcTy->getNumElements() == DestTy->getNumElements() &&
290	SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
291	Value *CastX = Builder.CreateCast(Op: CI.getOpcode(), V: X, DestTy);
292	return new ShuffleVectorInst (CastX, Mask);
293	}
294	}
295
296	return nullptr;
297	}
298
299	namespace {
300
301	/// Helper class for evaluating whether a value can be computed in a different
302	/// type without changing its value. Used by cast simplification transforms.
303	class TypeEvaluationHelper {
304	public:
305	/// Return true if we can evaluate the specified expression tree as type Ty
306	/// instead of its larger type, and arrive with the same value.
307	/// This is used by code that tries to eliminate truncates.
308	[[nodiscard]] static bool canEvaluateTruncated(Value V, Type Ty,
309	InstCombinerImpl &IC,
310	Instruction *CxtI);
311
312	/// Determine if the specified value can be computed in the specified wider
313	/// type and produce the same low bits. If not, return false.
314	[[nodiscard]] static bool canEvaluateZExtd(Value V, Type Ty,
315	unsigned &BitsToClear,
316	InstCombinerImpl &IC,
317	Instruction *CxtI);
318
319	/// Return true if we can take the specified value and return it as type Ty
320	/// without inserting any new casts and without changing the value of the
321	/// common low bits.
322	[[nodiscard]] static bool canEvaluateSExtd(Value V, Type Ty);
323
324	private:
325	/// Constants and extensions/truncates from the destination type are always
326	/// free to be evaluated in that type.
327	[[nodiscard]] static bool canAlwaysEvaluateInType(Value V, Type Ty);
328
329	/// Check if we traversed all the users of the multi-use values we've seen.
330	[[nodiscard]] bool allPendingVisited() const {
331	return llvm::all_of(Range: Pending,
332	P: [this](Value V) { return* Visited.contains(Val: V); });
333	}
334
335	/// A generic wrapper for canEvaluate recursions to inject visitation*
336	/// tracking and enforce correct multi-use value evaluations.
337	[[nodiscard]] bool
338	canEvaluate(Value V, Type Ty,
339	llvm::function_ref<bool(Value , Type Type)> Pred) {
340	if (canAlwaysEvaluateInType(V, Ty))
341	return true;
342
343	auto *I = dyn_cast<Instruction>(Val: V);
344
345	if (I == nullptr)
346	return false;
347
348	// We insert false by default to return false when we encounter user loops.
349	const auto [It, Inserted] = Visited.insert(KV: {V, false});
350
351	// There are three possible cases for us having information on this value
352	// in the Visited map:
353	// 1. We properly checked it and concluded that we can evaluate it (true)
354	// 2. We properly checked it and concluded that we can't (false)
355	// 3. We started to check it, but during the recursive traversal we came
356	// back to it.
357	//
358	// For cases 1 and 2, we can safely return the stored result. For case 3, we
359	// can potentially have a situation where we can evaluate recursive user
360	// chains, but that can be quite tricky to do properly and isntead, we
361	// return false.
362	//
363	// In any case, we should return whatever was there in the map to begin
364	// with.
365	if (!Inserted)
366	return It ->getSecond();
367
368	// We can easily make a decision about single-user values whether they can
369	// be evaluated in a different type or not, we came from that user. This is
370	// not as simple for multi-user values.
371	//
372	// In general, we have the following case (inverted control-flow, users are
373	// at the top):
374	//
375	// Cast %A
376	// ____\|
377	// /
378	// %A = Use %B, %C
379	// ________\| \|
380	// / \|
381	// %B = Use %D \|
382	// ________\| \|
383	// / \|
384	// %D = Use %C \|
385	// ________\|___\|
386	// /
387	// %C = ...
388	//
389	// In this case, when we check %A, %B and %D, we are confident that we can
390	// make the decision here and now, since we came from their only users.
391	//
392	// For %C, it is harder. We come there twice, and when we come the first
393	// time, it's hard to tell if we will visit the second user (technically
394	// it's not hard, but we might need a lot of repetitive checks with non-zero
395	// cost).
396	//
397	// In the case above, we are allowed to evaluate %C in different type
398	// because all of it users were part of the traversal.
399	//
400	// In the following case, however, we can't make this conclusion:
401	//
402	// Cast %A
403	// ____\|
404	// /
405	// %A = Use %B, %C
406	// ________\| \|
407	// / \|
408	// %B = Use %D \|
409	// ________\| \|
410	// / \|
411	// %D = Use %C \|
412	// \| \|
413	// foo(%C) \| \| <- never traversing foo(%C)
414	// ________\|___\|
415	// /
416	// %C = ...
417	//
418	// In this case, we still can evaluate %C in a different type, but we'd need
419	// to create a copy of the original %C to be used in foo(%C). Such
420	// duplication might be not profitable.
421	//
422	// For this reason, we collect all users of the mult-user values and mark
423	// them as "pending" and defer this decision to the very end. When we are
424	// done and and ready to have a positive verdict, we should double-check all
425	// of the pending users and ensure that we visited them. allPendingVisited
426	// predicate checks exactly that.
427	if (!I->hasOneUse()) {
428	for (Use &U : I->uses()) {
429	// For most instructions, evaluating them in a different type will
430	// change the type of all operands. This is not the case for select
431	// conditions. Make sure we don't retain an extra use via the select
432	// condition.
433	if (isa<SelectInst>(Val: U.getUser()) && U.getOperandNo() == `0`)
434	return false;
435
436	Pending.push_back(Elt: U.getUser());
437	}
438	}
439
440	const bool Result = Pred (V, Ty);
441	// We have to set result this way and not via It because Pred is recursive
442	// and it is very likely that we grew Visited and invalidated It.
443	Visited [V] = Result;
444	return Result;
445	}
446
447	/// Filter out values that we can not evaluate in the destination type for
448	/// free.
449	[[nodiscard]] bool canNotEvaluateInType(Value V, Type Ty);
450
451	[[nodiscard]] bool canEvaluateTruncatedImpl(Value V, Type Ty,
452	InstCombinerImpl &IC,
453	Instruction *CxtI);
454	[[nodiscard]] bool canEvaluateTruncatedPred(Value V, Type Ty,
455	InstCombinerImpl &IC,
456	Instruction *CxtI);
457	[[nodiscard]] bool canEvaluateZExtdImpl(Value V, Type Ty,
458	unsigned &BitsToClear,
459	InstCombinerImpl &IC,
460	Instruction *CxtI);
461	[[nodiscard]] bool canEvaluateSExtdImpl(Value V, Type Ty);
462	[[nodiscard]] bool canEvaluateSExtdPred(Value V, Type Ty);
463
464	/// A bookkeeping map to memorize an already made decision for a traversed
465	/// value.
466	SmallDenseMap<Value , bool*, `8`> Visited;
467
468	/// A list of pending values to check in the end.
469	SmallVector<Value *, `8`> Pending;
470	};
471
472	} // anonymous namespace
473
474	/// Constants and extensions/truncates from the destination type are always
475	/// free to be evaluated in that type. This is a helper for canEvaluate.*
476	bool TypeEvaluationHelper::canAlwaysEvaluateInType(Value V, Type Ty) {
477	if (isa<Constant>(Val: V))
478	return match(V, P: m_ImmConstant());
479
480	Value *X;
481	if (match(V, P: m_ZExtOrSExt(Op: m_SpecificType(RefTy: Ty, V&: X))) \|\|
482	match(V, P: m_Trunc(Op: m_SpecificType(RefTy: Ty, V&: X))))
483	return true;
484
485	return false;
486	}
487
488	/// Filter out values that we can not evaluate in the destination type for free.
489	/// This is a helper for canEvaluate.*
490	bool TypeEvaluationHelper::canNotEvaluateInType(Value V, Type Ty) {
491	if (!isa<Instruction>(Val: V))
492	return true;
493	// We don't extend or shrink something that has multiple uses -- doing so
494	// would require duplicating the instruction which isn't profitable.
495	if (!V->hasOneUse())
496	return true;
497
498	return false;
499	}
500
501	/// Return true if we can evaluate the specified expression tree as type Ty
502	/// instead of its larger type, and arrive with the same value.
503	/// This is used by code that tries to eliminate truncates.
504	///
505	/// Ty will always be a type smaller than V. We should return true if trunc(V)
506	/// can be computed by computing V in the smaller type. If V is an instruction,
507	/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
508	/// makes sense if x and y can be efficiently truncated.
509	///
510	/// This function works on both vectors and scalars.
511	///
512	bool TypeEvaluationHelper::canEvaluateTruncated(Value V, Type Ty,
513	InstCombinerImpl &IC,
514	Instruction *CxtI) {
515	TypeEvaluationHelper TYH;
516	return TYH.canEvaluateTruncatedImpl(V, Ty, IC, CxtI) &&
517	// We need to check whether we visited all users of multi-user values,
518	// and we have to do it at the very end, outside of the recursion.
519	TYH.allPendingVisited();
520	}
521
522	bool TypeEvaluationHelper::canEvaluateTruncatedImpl(Value V, Type Ty,
523	InstCombinerImpl &IC,
524	Instruction *CxtI) {
525	return canEvaluate(V, Ty, Pred: [this, &IC, CxtI](Value V, Type Ty) {
526	return canEvaluateTruncatedPred(V, Ty, IC, CxtI);
527	});
528	}
529
530	bool TypeEvaluationHelper::canEvaluateTruncatedPred(Value V, Type Ty,
531	InstCombinerImpl &IC,
532	Instruction *CxtI) {
533	auto *I = cast<Instruction>(Val: V);
534	Type *OrigTy = V->getType();
535	switch (I->getOpcode()) {
536	case Instruction::Add:
537	case Instruction::Sub:
538	case Instruction::Mul:
539	case Instruction::And:
540	case Instruction::Or:
541	case Instruction::Xor:
542	// These operators can all arbitrarily be extended or truncated.
543	return canEvaluateTruncatedImpl(V: I->getOperand(i: `0`), Ty, IC, CxtI) &&
544	canEvaluateTruncatedImpl(V: I->getOperand(i: `1`), Ty, IC, CxtI);
545
546	case Instruction::UDiv:
547	case Instruction::URem: {
548	// UDiv and URem can be truncated if all the truncated bits are zero.
549	uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
550	uint32_t BitWidth = Ty->getScalarSizeInBits();
551	assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");
552	APInt Mask = APInt::getBitsSetFrom(numBits: OrigBitWidth, loBit: BitWidth);
553	// Do not preserve the original context instruction. Simplifying div/rem
554	// based on later context may introduce a trap.
555	if (IC.MaskedValueIsZero(V: I->getOperand(i: `0`), Mask, CxtI: I) &&
556	IC.MaskedValueIsZero(V: I->getOperand(i: `1`), Mask, CxtI: I)) {
557	return canEvaluateTruncatedImpl(V: I->getOperand(i: `0`), Ty, IC, CxtI) &&
558	canEvaluateTruncatedImpl(V: I->getOperand(i: `1`), Ty, IC, CxtI);
559	}
560	break;
561	}
562	case Instruction::Shl: {
563	// If we are truncating the result of this SHL, and if it's a shift of an
564	// inrange amount, we can always perform a SHL in a smaller type.
565	uint32_t BitWidth = Ty->getScalarSizeInBits();
566	KnownBits AmtKnownBits =
567	llvm::computeKnownBits(V: I->getOperand(i: `1`), DL: IC.getDataLayout());
568	if (AmtKnownBits.getMaxValue().ult(RHS: BitWidth))
569	return canEvaluateTruncatedImpl(V: I->getOperand(i: `0`), Ty, IC, CxtI) &&
570	canEvaluateTruncatedImpl(V: I->getOperand(i: `1`), Ty, IC, CxtI);
571	break;
572	}
573	case Instruction::LShr: {
574	// If this is a truncate of a logical shr, we can truncate it to a smaller
575	// lshr iff we know that the bits we would otherwise be shifting in are
576	// already zeros.
577	// TODO: It is enough to check that the bits we would be shifting in are
578	// zero - use AmtKnownBits.getMaxValue().
579	uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
580	uint32_t BitWidth = Ty->getScalarSizeInBits();
581	KnownBits AmtKnownBits = IC.computeKnownBits(V: I->getOperand(i: `1`), CxtI);
582	APInt MaxShiftAmt = AmtKnownBits.getMaxValue();
583	APInt ShiftedBits = APInt::getBitsSetFrom(numBits: OrigBitWidth, loBit: BitWidth);
584	if (MaxShiftAmt.ult(RHS: BitWidth)) {
585	// If the only user is a trunc then we can narrow the shift if any new
586	// MSBs are not going to be used.
587	if (auto *Trunc = dyn_cast<TruncInst>(Val: V->user_back())) {
588	auto DemandedBits = Trunc->getType()->getScalarSizeInBits();
589	if ((MaxShiftAmt + DemandedBits).ule(RHS: BitWidth))
590	return canEvaluateTruncatedImpl(V: I->getOperand(i: `0`), Ty, IC, CxtI) &&
591	canEvaluateTruncatedImpl(V: I->getOperand(i: `1`), Ty, IC, CxtI);
592	}
593	if (IC.MaskedValueIsZero(V: I->getOperand(i: `0`), Mask: ShiftedBits, CxtI))
594	return canEvaluateTruncatedImpl(V: I->getOperand(i: `0`), Ty, IC, CxtI) &&
595	canEvaluateTruncatedImpl(V: I->getOperand(i: `1`), Ty, IC, CxtI);
596	}
597	break;
598	}
599	case Instruction::AShr: {
600	// If this is a truncate of an arithmetic shr, we can truncate it to a
601	// smaller ashr iff we know that all the bits from the sign bit of the
602	// original type and the sign bit of the truncate type are similar.
603	// TODO: It is enough to check that the bits we would be shifting in are
604	// similar to sign bit of the truncate type.
605	uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
606	uint32_t BitWidth = Ty->getScalarSizeInBits();
607	KnownBits AmtKnownBits =
608	llvm::computeKnownBits(V: I->getOperand(i: `1`), DL: IC.getDataLayout());
609	unsigned ShiftedBits = OrigBitWidth - BitWidth;
610	if (AmtKnownBits.getMaxValue().ult(RHS: BitWidth) &&
611	ShiftedBits < IC.ComputeNumSignBits(Op: I->getOperand(i: `0`), CxtI))
612	return canEvaluateTruncatedImpl(V: I->getOperand(i: `0`), Ty, IC, CxtI) &&
613	canEvaluateTruncatedImpl(V: I->getOperand(i: `1`), Ty, IC, CxtI);
614	break;
615	}
616	case Instruction::Trunc:
617	// trunc(trunc(x)) -> trunc(x)
618	return true;
619	case Instruction::ZExt:
620	case Instruction::SExt:
621	// trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
622	// trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
623	return true;
624	case Instruction::Select: {
625	SelectInst *SI = cast<SelectInst>(Val: I);
626	return canEvaluateTruncatedImpl(V: SI->getTrueValue(), Ty, IC, CxtI) &&
627	canEvaluateTruncatedImpl(V: SI->getFalseValue(), Ty, IC, CxtI);
628	}
629	case Instruction::PHI: {
630	// We can change a phi if we can change all operands. Note that we never
631	// get into trouble with cyclic PHIs here because canEvaluate handles use
632	// chain loops.
633	PHINode *PN = cast<PHINode>(Val: I);
634	return llvm::all_of(
635	Range: PN->incoming_values(), P: [this, Ty, &IC, CxtI](Value *IncValue) {
636	return canEvaluateTruncatedImpl(V: IncValue, Ty, IC, CxtI);
637	});
638	}
639	case Instruction::FPToUI:
640	case Instruction::FPToSI: {
641	// If the integer type can hold the max FP value, it is safe to cast
642	// directly to that type. Otherwise, we may create poison via overflow
643	// that did not exist in the original code.
644	Type *InputTy = I->getOperand(i: `0`)->getType()->getScalarType();
645	const fltSemantics &Semantics = InputTy->getFltSemantics();
646	uint32_t MinBitWidth = APFloatBase::semanticsIntSizeInBits(
647	Semantics, I->getOpcode() == Instruction::FPToSI);
648	return Ty->getScalarSizeInBits() >= MinBitWidth;
649	}
650	case Instruction::ShuffleVector:
651	return canEvaluateTruncatedImpl(V: I->getOperand(i: `0`), Ty, IC, CxtI) &&
652	canEvaluateTruncatedImpl(V: I->getOperand(i: `1`), Ty, IC, CxtI);
653
654	case Instruction::Call: {
655	Value *AbsOp;
656	if (match(V: I, P: m_Intrinsic<Intrinsic::abs>(Ops: m_Value(V&: AbsOp), Ops: m_Value()))) {
657	if (IC.ComputeMaxSignificantBits(Op: AbsOp, CxtI) > Ty->getScalarSizeInBits())
658	return false;
659	return canEvaluateTruncatedImpl(V: AbsOp, Ty, IC, CxtI);
660	}
661	auto *MM = dyn_cast<MinMaxIntrinsic>(Val: I);
662	if (!MM)
663	return false;
664	// The min/max can be performed in the narrow type when each operand has
665	// zero high bits (for umin/umax) or enough sign bits (for smin/smax).
666	Value *Op0 = MM->getLHS();
667	Value *Op1 = MM->getRHS();
668	uint32_t BitWidth = Ty->getScalarSizeInBits();
669	if (MM->isSigned()) {
670	if (IC.ComputeMaxSignificantBits(Op: Op0, CxtI) > BitWidth \|\|
671	IC.ComputeMaxSignificantBits(Op: Op1, CxtI) > BitWidth)
672	break;
673	} else {
674	APInt Mask =
675	APInt::getBitsSetFrom(numBits: OrigTy->getScalarSizeInBits(), loBit: BitWidth);
676	if (!IC.MaskedValueIsZero(V: Op0, Mask, CxtI) \|\|
677	!IC.MaskedValueIsZero(V: Op1, Mask, CxtI))
678	break;
679	}
680	return canEvaluateTruncatedImpl(V: Op0, Ty, IC, CxtI) &&
681	canEvaluateTruncatedImpl(V: Op1, Ty, IC, CxtI);
682	}
683	default:
684	// TODO: Can handle more cases here.
685	break;
686	}
687
688	return false;
689	}
690
691	/// Given a vector that is bitcast to an integer, optionally logically
692	/// right-shifted, and truncated, convert it to an extractelement.
693	/// Example (big endian):
694	/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
695	/// --->
696	/// extractelement <4 x i32> %X, 1
697	static Instruction *foldVecTruncToExtElt(TruncInst &Trunc,
698	InstCombinerImpl &IC) {
699	Value *TruncOp = Trunc.getOperand(i_nocapture: `0`);
700	Type *DestType = Trunc.getType();
701	if (!TruncOp->hasOneUse() \|\| !isa<IntegerType>(Val: DestType))
702	return nullptr;
703
704	Value VecInput = nullptr*;
705	ConstantInt ShiftVal = nullptr*;
706	if (!match(V: TruncOp, P: m_CombineOr(Ps: m_BitCast(Op: m_Value(V&: VecInput)),
707	Ps: m_LShr(L: m_BitCast(Op: m_Value(V&: VecInput)),
708	R: m_ConstantInt(CI&: ShiftVal)))) \|\|
709	!isa<VectorType>(Val: VecInput->getType()))
710	return nullptr;
711
712	VectorType *VecType = cast<VectorType>(Val: VecInput->getType());
713	unsigned VecWidth = VecType->getPrimitiveSizeInBits();
714	unsigned DestWidth = DestType->getPrimitiveSizeInBits();
715	unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : `0`;
716
717	if ((VecWidth % DestWidth != `0`) \|\| (ShiftAmount % DestWidth != `0`))
718	return nullptr;
719
720	// If the element type of the vector doesn't match the result type,
721	// bitcast it to a vector type that we can extract from.
722	unsigned NumVecElts = VecWidth / DestWidth;
723	if (VecType->getElementType() != DestType) {
724	VecType = FixedVectorType::get(ElementType: DestType, NumElts: NumVecElts);
725	VecInput = IC.Builder.CreateBitCast(V: VecInput, DestTy: VecType, Name: "bc");
726	}
727
728	unsigned Elt = ShiftAmount / DestWidth;
729	if (IC.getDataLayout().isBigEndian())
730	Elt = NumVecElts - `1` - Elt;
731
732	return ExtractElementInst::Create(Vec: VecInput, Idx: IC.Builder.getInt32(C: Elt));
733	}
734
735	/// Whenever an element is extracted from a vector, optionally shifted down, and
736	/// then truncated, canonicalize by converting it to a bitcast followed by an
737	/// extractelement.
738	///
739	/// Examples (little endian):
740	/// trunc (extractelement <4 x i64> %X, 0) to i32
741	/// --->
742	/// extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
743	///
744	/// trunc (lshr (extractelement <4 x i32> %X, 0), 8) to i8
745	/// --->
746	/// extractelement <16 x i8> (bitcast <4 x i32> %X to <16 x i8>), i32 1
747	static Instruction *foldVecExtTruncToExtElt(TruncInst &Trunc,
748	InstCombinerImpl &IC) {
749	Value *Src = Trunc.getOperand(i_nocapture: `0`);
750	Type *SrcType = Src->getType();
751	Type *DstType = Trunc.getType();
752
753	// Only attempt this if we have simple aliasing of the vector elements.
754	// A badly fit destination size would result in an invalid cast.
755	unsigned SrcBits = SrcType->getScalarSizeInBits();
756	unsigned DstBits = DstType->getScalarSizeInBits();
757	unsigned TruncRatio = SrcBits / DstBits;
758	if ((SrcBits % DstBits) != `0`)
759	return nullptr;
760
761	Value *VecOp;
762	ConstantInt *Cst;
763	const APInt ShiftAmount = nullptr*;
764	if (!match(V: Src, P: m_OneUse(SubPattern: m_ExtractElt(Val: m_Value(V&: VecOp), Idx: m_ConstantInt(CI&: Cst)))) &&
765	!match(V: Src,
766	P: m_OneUse(SubPattern: m_LShr(L: m_ExtractElt(Val: m_Value(V&: VecOp), Idx: m_ConstantInt(CI&: Cst)),
767	R: m_APInt(Res&: ShiftAmount)))))
768	return nullptr;
769
770	auto *VecOpTy = cast<VectorType>(Val: VecOp->getType());
771	auto VecElts = VecOpTy->getElementCount();
772
773	uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
774	// Make sure we don't overflow in the calculation of the new index.
775	// (VecOpIdx + 1) TruncRatio should not overflow.*
776	if (Cst->uge(Num: std::numeric_limits<uint64_t>::max() / TruncRatio))
777	return nullptr;
778	uint64_t VecOpIdx = Cst->getZExtValue();
779	uint64_t NewIdx = IC.getDataLayout().isBigEndian()
780	? (VecOpIdx + `1`) * TruncRatio - `1`
781	: VecOpIdx * TruncRatio;
782
783	// Adjust index by the whole number of truncated elements.
784	if (ShiftAmount) {
785	// Check shift amount is in range and shifts a whole number of truncated
786	// elements.
787	if (ShiftAmount->uge(RHS: SrcBits) \|\| ShiftAmount->urem(RHS: DstBits) != `0`)
788	return nullptr;
789
790	uint64_t IdxOfs = ShiftAmount->udiv(RHS: DstBits).getZExtValue();
791	// IdxOfs is guaranteed to be less than TruncRatio, so we won't overflow in
792	// the adjustment.
793	assert(IdxOfs < TruncRatio &&
794	"IdxOfs is expected to be less than TruncRatio.");
795	NewIdx = IC.getDataLayout().isBigEndian() ? (NewIdx - IdxOfs)
796	: (NewIdx + IdxOfs);
797	}
798
799	assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
800	"overflow 32-bits");
801
802	auto *BitCastTo =
803	VectorType::get(ElementType: DstType, NumElements: BitCastNumElts, Scalable: VecElts.isScalable());
804	Value *BitCast = IC.Builder.CreateBitCast(V: VecOp, DestTy: BitCastTo);
805	return ExtractElementInst::Create(Vec: BitCast, Idx: IC.Builder.getInt64(C: NewIdx));
806	}
807
808	/// Funnel/Rotate left/right may occur in a wider type than necessary because of
809	/// type promotion rules. Try to narrow the inputs and convert to funnel shift.
810	Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
811	assert((isa<VectorType>(Trunc.getSrcTy()) \|\|
812	shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
813	"Don't narrow to an illegal scalar type");
814
815	// Bail out on strange types. It is possible to handle some of these patterns
816	// even with non-power-of-2 sizes, but it is not a likely scenario.
817	Type *DestTy = Trunc.getType();
818	unsigned NarrowWidth = DestTy->getScalarSizeInBits();
819	unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
820	if (!isPowerOf2_32(Value: NarrowWidth))
821	return nullptr;
822
823	// First, find an or'd pair of opposite shifts:
824	// trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
825	BinaryOperator Or0, Or1;
826	if (!match(V: Trunc.getOperand(i_nocapture: `0`), P: m_OneUse(SubPattern: m_Or(L: m_BinOp(I&: Or0), R: m_BinOp(I&: Or1)))))
827	return nullptr;
828
829	Value ShVal0, ShVal1, ShAmt0, ShAmt1;
830	if (!match(V: Or0, P: m_OneUse(SubPattern: m_LogicalShift(L: m_Value(V&: ShVal0), R: m_Value(V&: ShAmt0)))) \|\|
831	!match(V: Or1, P: m_OneUse(SubPattern: m_LogicalShift(L: m_Value(V&: ShVal1), R: m_Value(V&: ShAmt1)))) \|\|
832	Or0->getOpcode() == Or1->getOpcode())
833	return nullptr;
834
835	// Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
836	if (Or0->getOpcode() == BinaryOperator::LShr) {
837	std::swap(a&: Or0, b&: Or1);
838	std::swap(a&: ShVal0, b&: ShVal1);
839	std::swap(a&: ShAmt0, b&: ShAmt1);
840	}
841	assert(Or0->getOpcode() == BinaryOperator::Shl &&
842	Or1->getOpcode() == BinaryOperator::LShr &&
843	"Illegal or(shift,shift) pair");
844
845	// Match the shift amount operands for a funnel/rotate pattern. This always
846	// matches a subtraction on the R operand.
847	auto matchShiftAmount = [&](Value L, Value R, unsigned Width) -> Value * {
848	// The shift amounts may add up to the narrow bit width:
849	// (shl ShVal0, L) \| (lshr ShVal1, Width - L)
850	// If this is a funnel shift (different operands are shifted), then the
851	// shift amount can not over-shift (create poison) in the narrow type.
852	unsigned MaxShiftAmountWidth = Log2_32(Value: NarrowWidth);
853	APInt HiBitMask = ~APInt::getLowBitsSet(numBits: WideWidth, loBitsSet: MaxShiftAmountWidth);
854	if (ShVal0 == ShVal1 \|\| MaskedValueIsZero(V: L, Mask: HiBitMask))
855	if (match(V: R, P: m_OneUse(SubPattern: m_Sub(L: m_SpecificInt(V: Width), R: m_Specific(V: L)))))
856	return L;
857
858	// The following patterns currently only work for rotation patterns.
859	// TODO: Add more general funnel-shift compatible patterns.
860	if (ShVal0 != ShVal1)
861	return nullptr;
862
863	// The shift amount may be masked with negation:
864	// (shl ShVal0, (X & (Width - 1))) \| (lshr ShVal1, ((-X) & (Width - 1)))
865	Value *X;
866	unsigned Mask = Width - `1`;
867	if (match(V: L, P: m_And(L: m_Value(V&: X), R: m_SpecificInt(V: Mask))) &&
868	match(V: R, P: m_And(L: m_Neg(V: m_Specific(V: X)), R: m_SpecificInt(V: Mask))))
869	return X;
870
871	// Same as above, but the shift amount may be extended after masking:
872	if (match(V: L, P: m_ZExt(Op: m_And(L: m_Value(V&: X), R: m_SpecificInt(V: Mask)))) &&
873	match(V: R, P: m_ZExt(Op: m_And(L: m_Neg(V: m_Specific(V: X)), R: m_SpecificInt(V: Mask)))))
874	return X;
875
876	return nullptr;
877	};
878
879	Value *ShAmt = matchShiftAmount (ShAmt0, ShAmt1, NarrowWidth);
880	bool IsFshl = true; // Sub on LSHR.
881	if (!ShAmt) {
882	ShAmt = matchShiftAmount (ShAmt1, ShAmt0, NarrowWidth);
883	IsFshl = false; // Sub on SHL.
884	}
885	if (!ShAmt)
886	return nullptr;
887
888	// The right-shifted value must have high zeros in the wide type (for example
889	// from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
890	// truncated, so those do not matter.
891	APInt HiBitMask = APInt::getHighBitsSet(numBits: WideWidth, hiBitsSet: WideWidth - NarrowWidth);
892	if (!MaskedValueIsZero(V: ShVal1, Mask: HiBitMask, CxtI: &Trunc))
893	return nullptr;
894
895	// Adjust the width of ShAmt for narrowed funnel shift operation:
896	// - Zero-extend if ShAmt is narrower than the destination type.
897	// - Truncate if ShAmt is wider, discarding non-significant high-order bits.
898	// This prepares ShAmt for llvm.fshl.i8(trunc(ShVal), trunc(ShVal),
899	// zext/trunc(ShAmt)).
900	Value *NarrowShAmt = Builder.CreateZExtOrTrunc(V: ShAmt, DestTy);
901
902	Value X, Y;
903	X = Y = Builder.CreateTrunc(V: ShVal0, DestTy);
904	if (ShVal0 != ShVal1)
905	Y = Builder.CreateTrunc(V: ShVal1, DestTy);
906	Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
907	Function *F =
908	Intrinsic::getOrInsertDeclaration(M: Trunc.getModule(), id: IID, OverloadTys: DestTy);
909	return CallInst::Create(Func: F, Args: {X, Y, NarrowShAmt});
910	}
911
912	/// Try to narrow the width of math or bitwise logic instructions by pulling a
913	/// truncate ahead of binary operators.
914	Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
915	Type *SrcTy = Trunc.getSrcTy();
916	Type *DestTy = Trunc.getType();
917	unsigned SrcWidth = SrcTy->getScalarSizeInBits();
918	unsigned DestWidth = DestTy->getScalarSizeInBits();
919
920	if (!isa<VectorType>(Val: SrcTy) && !shouldChangeType(From: SrcTy, To: DestTy))
921	return nullptr;
922
923	BinaryOperator *BinOp;
924	if (!match(V: Trunc.getOperand(i_nocapture: `0`), P: m_OneUse(SubPattern: m_BinOp(I&: BinOp))))
925	return nullptr;
926
927	Value *BinOp0 = BinOp->getOperand(i_nocapture: `0`);
928	Value *BinOp1 = BinOp->getOperand(i_nocapture: `1`);
929	switch (BinOp->getOpcode()) {
930	case Instruction::And:
931	case Instruction::Or:
932	case Instruction::Xor:
933	case Instruction::Add:
934	case Instruction::Sub:
935	case Instruction::Mul: {
936	Constant *C;
937	if (match(V: BinOp0, P: m_Constant(C))) {
938	// trunc (binop C, X) --> binop (trunc C', X)
939	Constant *NarrowC = ConstantExpr::getTrunc(C, Ty: DestTy);
940	Value *TruncX = Builder.CreateTrunc(V: BinOp1, DestTy);
941	return BinaryOperator::Create(Op: BinOp->getOpcode(), S1: NarrowC, S2: TruncX);
942	}
943	if (match(V: BinOp1, P: m_Constant(C))) {
944	// trunc (binop X, C) --> binop (trunc X, C')
945	Constant *NarrowC = ConstantExpr::getTrunc(C, Ty: DestTy);
946	Value *TruncX = Builder.CreateTrunc(V: BinOp0, DestTy);
947	return BinaryOperator::Create(Op: BinOp->getOpcode(), S1: TruncX, S2: NarrowC);
948	}
949	Value *X;
950	if (match(V: BinOp0, P: m_ZExtOrSExt(Op: m_SpecificType(RefTy: DestTy, V&: X)))) {
951	// trunc (binop (ext X), Y) --> binop X, (trunc Y)
952	Value *NarrowOp1 = Builder.CreateTrunc(V: BinOp1, DestTy);
953	return BinaryOperator::Create(Op: BinOp->getOpcode(), S1: X, S2: NarrowOp1);
954	}
955	if (match(V: BinOp1, P: m_ZExtOrSExt(Op: m_SpecificType(RefTy: DestTy, V&: X)))) {
956	// trunc (binop Y, (ext X)) --> binop (trunc Y), X
957	Value *NarrowOp0 = Builder.CreateTrunc(V: BinOp0, DestTy);
958	return BinaryOperator::Create(Op: BinOp->getOpcode(), S1: NarrowOp0, S2: X);
959	}
960	break;
961	}
962	case Instruction::LShr:
963	case Instruction::AShr: {
964	// trunc (shr (trunc A), C) --> trunc(shr A, C)
965	Value *A;
966	Constant *C;
967	if (match(V: BinOp0, P: m_Trunc(Op: m_Value(V&: A))) && match(V: BinOp1, P: m_Constant(C))) {
968	unsigned MaxShiftAmt = SrcWidth - DestWidth;
969	// If the shift is small enough, all zero/sign bits created by the shift
970	// are removed by the trunc.
971	if (match(V: C, P: m_SpecificInt_ICMP(Predicate: ICmpInst::ICMP_ULE,
972	Threshold: APInt (SrcWidth, MaxShiftAmt)))) {
973	auto *OldShift = cast<Instruction>(Val: Trunc.getOperand(i_nocapture: `0`));
974	bool IsExact = OldShift->isExact();
975	if (Constant *ShAmt = ConstantFoldIntegerCast(C, DestTy: A->getType(),
976	/IsSigned/ true, DL)) {
977	ShAmt = Constant::mergeUndefsWith(C: ShAmt, Other: C);
978	Value *Shift =
979	OldShift->getOpcode() == Instruction::AShr
980	? Builder.CreateAShr(LHS: A, RHS: ShAmt, Name: OldShift->getName(), isExact: IsExact)
981	: Builder.CreateLShr(LHS: A, RHS: ShAmt, Name: OldShift->getName(), isExact: IsExact);
982	return CastInst::CreateTruncOrBitCast(S: Shift, Ty: DestTy);
983	}
984	}
985	}
986	break;
987	}
988	default: break;
989	}
990
991	if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
992	return NarrowOr;
993
994	return nullptr;
995	}
996
997	/// Try to narrow the width of a splat shuffle. This could be generalized to any
998	/// shuffle with a constant operand, but we limit the transform to avoid
999	/// creating a shuffle type that targets may not be able to lower effectively.
1000	static Instruction *shrinkSplatShuffle(TruncInst &Trunc,
1001	InstCombiner::BuilderTy &Builder) {
1002	Value Shuf = Trunc.getOperand(i_nocapture: `0`), ShufVec;
1003	ArrayRef<int> SplatMask;
1004	if (match(V: Shuf, P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: ShufVec), v2: m_Poison(),
1005	mask: m_Mask (SplatMask)))) &&
1006	match(Mask: SplatMask, P: m_SplatMask ()) &&
1007	ElementCount::isKnownGE(
1008	LHS: cast<VectorType>(Val: Shuf->getType())->getElementCount(),
1009	RHS: cast<VectorType>(Val: ShufVec->getType())->getElementCount())) {
1010	// trunc (shuf X, poison, SplatMask) --> shuf (trunc X), poison, SplatMask
1011	Type *NewTruncTy =
1012	ShufVec->getType()->getWithNewType(EltTy: Trunc.getType()->getScalarType());
1013	Value *NarrowOp = Builder.CreateTrunc(V: ShufVec, DestTy: NewTruncTy);
1014	return new ShuffleVectorInst (NarrowOp, SplatMask);
1015	}
1016
1017	return nullptr;
1018	}
1019
1020	/// Try to narrow the width of an insert element. This could be generalized for
1021	/// any vector constant, but we limit the transform to insertion into poison to
1022	/// avoid potential backend problems from unsupported insertion widths. This
1023	/// could also be extended to handle the case of inserting a scalar constant
1024	/// into a vector variable.
1025	static Instruction *shrinkInsertElt(CastInst &Trunc,
1026	InstCombiner::BuilderTy &Builder) {
1027	Instruction::CastOps Opcode = Trunc.getOpcode();
1028	assert((Opcode == Instruction::Trunc \|\| Opcode == Instruction::FPTrunc) &&
1029	"Unexpected instruction for shrinking");
1030
1031	Value Elt, Index;
1032	if (match(V: Trunc.getOperand(i_nocapture: `0`),
1033	P: m_OneUse(SubPattern: m_InsertElt(Val: m_Poison(), Elt: m_Value(V&: Elt), Idx: m_Value(V&: Index))))) {
1034	// trunc (inselt poison, X, Index) --> inselt poison, (trunc X), Index
1035	// fptrunc (inselt poison, X, Index) --> inselt poison, (fptrunc X), Index
1036	auto *NarrowPoison = PoisonValue::get(T: Trunc.getType());
1037	Value *NarrowOp =
1038	Builder.CreateCast(Op: Opcode, V: Elt, DestTy: Trunc.getType()->getScalarType());
1039	return InsertElementInst::Create(Vec: NarrowPoison, NewElt: NarrowOp, Idx: Index);
1040	}
1041
1042	return nullptr;
1043	}
1044
1045	Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
1046	if (Instruction *Result = commonCastTransforms(CI&: Trunc))
1047	return Result;
1048
1049	Value *Src = Trunc.getOperand(i_nocapture: `0`);
1050	Type DestTy = Trunc.getType(), SrcTy = Src->getType();
1051	unsigned DestWidth = DestTy->getScalarSizeInBits();
1052	unsigned SrcWidth = SrcTy->getScalarSizeInBits();
1053
1054	// Attempt to truncate the entire input expression tree to the destination
1055	// type. Only do this if the dest type is a simple type, don't convert the
1056	// expression tree to something weird like i93 unless the source is also
1057	// strange.
1058	if ((DestTy->isVectorTy() \|\| shouldChangeType(From: SrcTy, To: DestTy)) &&
1059	TypeEvaluationHelper::canEvaluateTruncated(V: Src, Ty: DestTy, IC&: *this, CxtI: &Trunc)) {
1060
1061	// If this cast is a truncate, evaluting in a different type always
1062	// eliminates the cast, so it is always a win.
1063	LLVM_DEBUG(
1064	dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1065	" to avoid cast: "
1066	<< Trunc << `'\n'`);
1067	Value Res = EvaluateInDifferentType(V: Src, Ty: DestTy, isSigned: false*);
1068	assert(Res->getType() == DestTy);
1069	return replaceInstUsesWith(I&: Trunc, V: Res);
1070	}
1071
1072	// For integer types, check if we can shorten the entire input expression to
1073	// DestWidth 2, which won't allow removing the truncate, but reducing the*
1074	// width may enable further optimizations, e.g. allowing for larger
1075	// vectorization factors.
1076	if (auto *DestITy = dyn_cast<IntegerType>(Val: DestTy)) {
1077	if (DestWidth * `2` < SrcWidth) {
1078	auto *NewDestTy = DestITy->getExtendedType();
1079	if (shouldChangeType(From: SrcTy, To: NewDestTy) &&
1080	TypeEvaluationHelper::canEvaluateTruncated(V: Src, Ty: NewDestTy, IC&: *this,
1081	CxtI: &Trunc)) {
1082	LLVM_DEBUG(
1083	dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1084	" to reduce the width of operand of"
1085	<< Trunc << `'\n'`);
1086	Value Res = EvaluateInDifferentType(V: Src, Ty: NewDestTy, isSigned: false*);
1087	return new TruncInst (Res, DestTy);
1088	}
1089	}
1090	}
1091	Value *X;
1092	if (DestWidth == `1` &&
1093	(Trunc.hasNoUnsignedWrap() \|\| Trunc.hasNoSignedWrap()) &&
1094	match(V: Src, P: m_Exact(SubPattern: m_Shr(L: m_Value(V&: X), R: m_Value()))))
1095	return new ICmpInst (ICmpInst::ICMP_NE, X, Constant::getNullValue(Ty: SrcTy));
1096
1097	// See if we can simplify any instructions used by the input whose sole
1098	// purpose is to compute bits we don't care about.
1099	if (SimplifyDemandedInstructionBits(Inst&: Trunc))
1100	return &Trunc;
1101
1102	if (DestWidth == `1`) {
1103	Value *Zero = Constant::getNullValue(Ty: SrcTy);
1104
1105	const APInt *C1;
1106	Constant *C2;
1107	if (match(V: Src, P: m_OneUse(SubPattern: m_Shr(L: m_Shl(L: m_Power2(V&: C1), R: m_Value(V&: X)),
1108	R: m_ImmConstant(C&: C2))))) {
1109	// trunc ((C1 << X) >> C2) to i1 --> X == (C2-cttz(C1)), where C1 is pow2
1110	Constant *Log2C1 = ConstantInt::get(Ty: SrcTy, V: C1->exactLogBase2());
1111	Constant *CmpC = ConstantExpr::getSub(C1: C2, C2: Log2C1);
1112	return new ICmpInst (ICmpInst::ICMP_EQ, X, CmpC);
1113	}
1114
1115	if (match(V: Src, P: m_Shr(L: m_Value(V&: X), R: m_SpecificInt(V: SrcWidth - `1`)))) {
1116	// trunc (ashr X, BW-1) to i1 --> icmp slt X, 0
1117	// trunc (lshr X, BW-1) to i1 --> icmp slt X, 0
1118	return new ICmpInst (ICmpInst::ICMP_SLT, X, Zero);
1119	}
1120
1121	Constant *C;
1122	if (match(V: Src, P: m_OneUse(SubPattern: m_LShr(L: m_Value(V&: X), R: m_ImmConstant(C))))) {
1123	// trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
1124	Constant *One = ConstantInt::get(Ty: SrcTy, V: APInt (SrcWidth, `1`));
1125	Value *MaskC = Builder.CreateShl(LHS: One, RHS: C);
1126	Value *And = Builder.CreateAnd(LHS: X, RHS: MaskC);
1127	return new ICmpInst (ICmpInst::ICMP_NE, And, Zero);
1128	}
1129	if (match(V: Src, P: m_OneUse(SubPattern: m_c_Or(L: m_LShr(L: m_Value(V&: X), R: m_ImmConstant(C)),
1130	R: m_Deferred(V: X))))) {
1131	// trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
1132	Constant *One = ConstantInt::get(Ty: SrcTy, V: APInt (SrcWidth, `1`));
1133	Value *MaskC = Builder.CreateShl(LHS: One, RHS: C);
1134	Value *And = Builder.CreateAnd(LHS: X, RHS: Builder.CreateOr(LHS: MaskC, RHS: One));
1135	return new ICmpInst (ICmpInst::ICMP_NE, And, Zero);
1136	}
1137
1138	{
1139	const APInt *C;
1140	if (match(V: Src, P: m_Shl(L: m_APInt(Res&: C), R: m_Value(V&: X))) && (*C)[`0`] == `1`) {
1141	// trunc (C << X) to i1 --> X == 0, where C is odd
1142	return new ICmpInst (ICmpInst::Predicate::ICMP_EQ, X, Zero);
1143	}
1144	}
1145
1146	if (Trunc.hasNoUnsignedWrap() \|\| Trunc.hasNoSignedWrap()) {
1147	Value X, Y;
1148	if (match(V: Src, P: m_Xor(L: m_Value(V&: X), R: m_Value(V&: Y))))
1149	return new ICmpInst (ICmpInst::ICMP_NE, X, Y);
1150	}
1151
1152	if (match(V: Src,
1153	P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::usub_sat>(Ops: m_One(), Ops: m_Value(V&: X)))))
1154	return new ICmpInst (ICmpInst::ICMP_EQ, X,
1155	ConstantInt::getNullValue(Ty: SrcTy));
1156	}
1157
1158	Value A, B;
1159	Constant *C;
1160
1161	// trunc(u/smin(zext(a) + zext(b), MAX)) --> uadd.sat(a, b)
1162	if (match(V: Src, P: m_OneUse(SubPattern: m_CombineOr(
1163	Ps: m_UMin(Op0: m_OneUse(SubPattern: m_Add(L: m_ZExt(Op: m_SpecificType(RefTy: DestTy, V&: A)),
1164	R: m_ZExt(Op: m_SpecificType(RefTy: DestTy, V&: B)))),
1165	Op1: m_SpecificInt(V: APInt::getMaxValue(numBits: DestWidth))),
1166	Ps: m_SMin(Op0: m_OneUse(SubPattern: m_Add(L: m_ZExt(Op: m_SpecificType(RefTy: DestTy, V&: A)),
1167	R: m_ZExt(Op: m_SpecificType(RefTy: DestTy, V&: B)))),
1168	Op1: m_SpecificInt(V: APInt::getMaxValue(numBits: DestWidth))))))) {
1169	return replaceInstUsesWith(
1170	I&: Trunc, V: Builder.CreateBinaryIntrinsic(ID: Intrinsic::uadd_sat, LHS: A, RHS: B));
1171	}
1172
1173	// trunc(smax(zext(a) - zext(b), 0)) --> usub.sat(a, b)
1174	if (match(V: Src,
1175	P: m_OneUse(SubPattern: m_SMax(Op0: m_OneUse(SubPattern: m_Sub(L: m_ZExt(Op: m_SpecificType(RefTy: DestTy, V&: A)),
1176	R: m_ZExt(Op: m_SpecificType(RefTy: DestTy, V&: B)))),
1177	Op1: m_Zero())))) {
1178	return replaceInstUsesWith(
1179	I&: Trunc, V: Builder.CreateBinaryIntrinsic(ID: Intrinsic::usub_sat, LHS: A, RHS: B));
1180	}
1181
1182	if (match(V: Src, P: m_LShr(L: m_SExt(Op: m_Value(V&: A)), R: m_Constant(C)))) {
1183	unsigned AWidth = A->getType()->getScalarSizeInBits();
1184	unsigned MaxShiftAmt = SrcWidth - std::max(a: DestWidth, b: AWidth);
1185	auto *OldSh = cast<Instruction>(Val: Src);
1186	bool IsExact = OldSh->isExact();
1187
1188	// If the shift is small enough, all zero bits created by the shift are
1189	// removed by the trunc.
1190	if (match(V: C, P: m_SpecificInt_ICMP(Predicate: ICmpInst::ICMP_ULE,
1191	Threshold: APInt (SrcWidth, MaxShiftAmt)))) {
1192	auto GetNewShAmt = [&](unsigned Width) {
1193	Constant MaxAmt = ConstantInt::get(Ty: SrcTy, V: Width - `1`, IsSigned: false*);
1194	Constant *Cmp =
1195	ConstantFoldCompareInstOperands(Predicate: ICmpInst::ICMP_ULT, LHS: C, RHS: MaxAmt, DL);
1196	Constant *ShAmt = ConstantFoldSelectInstruction(Cond: Cmp, V1: C, V2: MaxAmt);
1197	return ConstantFoldCastOperand(Opcode: Instruction::Trunc, C: ShAmt, DestTy: A->getType(),
1198	DL);
1199	};
1200
1201	// trunc (lshr (sext A), C) --> ashr A, C
1202	if (A->getType() == DestTy) {
1203	Constant *ShAmt = GetNewShAmt (DestWidth);
1204	ShAmt = Constant::mergeUndefsWith(C: ShAmt, Other: C);
1205	return IsExact ? BinaryOperator::CreateExactAShr(V1: A, V2: ShAmt)
1206	: BinaryOperator::CreateAShr(V1: A, V2: ShAmt);
1207	}
1208	// The types are mismatched, so create a cast after shifting:
1209	// trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
1210	if (Src->hasOneUse()) {
1211	Constant *ShAmt = GetNewShAmt (AWidth);
1212	Value *Shift = Builder.CreateAShr(LHS: A, RHS: ShAmt, Name: "", isExact: IsExact);
1213	return CastInst::CreateIntegerCast(S: Shift, Ty: DestTy, isSigned: true);
1214	}
1215	}
1216	// TODO: Mask high bits with 'and'.
1217	}
1218
1219	if (Instruction *I = narrowBinOp(Trunc))
1220	return I;
1221
1222	if (Instruction *I = shrinkSplatShuffle(Trunc, Builder))
1223	return I;
1224
1225	if (Instruction *I = shrinkInsertElt(Trunc, Builder))
1226	return I;
1227
1228	if (Src->hasOneUse() &&
1229	(isa<VectorType>(Val: SrcTy) \|\| shouldChangeType(From: SrcTy, To: DestTy))) {
1230	// Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
1231	// dest type is native and cst < dest size.
1232	if (match(V: Src, P: m_Shl(L: m_Value(V&: A), R: m_Constant(C))) &&
1233	!match(V: A, P: m_Shr(L: m_Value(), R: m_Constant()))) {
1234	// Skip shifts of shift by constants. It undoes a combine in
1235	// FoldShiftByConstant and is the extend in reg pattern.
1236	APInt Threshold = APInt (C->getType()->getScalarSizeInBits(), DestWidth);
1237	if (match(V: C, P: m_SpecificInt_ICMP(Predicate: ICmpInst::ICMP_ULT, Threshold))) {
1238	Value *NewTrunc = Builder.CreateTrunc(V: A, DestTy, Name: A->getName() + ".tr");
1239	return BinaryOperator::Create(Op: Instruction::Shl, S1: NewTrunc,
1240	S2: ConstantExpr::getTrunc(C, Ty: DestTy));
1241	}
1242	}
1243	}
1244
1245	// trunc (select(icmp_ult(A, DestTy_umax+1), A, sext(icmp_sgt(A, 0)))) -->
1246	// trunc (smin(smax(0, A), DestTy_umax))
1247	if (SrcTy->isIntegerTy() && isPowerOf2_64(Value: SrcTy->getPrimitiveSizeInBits()) &&
1248	isPowerOf2_64(Value: DestTy->getPrimitiveSizeInBits()) &&
1249	match(V: Src, P: m_OneUse(SubPattern: m_Select(
1250	C: m_OneUse(SubPattern: m_SpecificICmp(MatchPred: ICmpInst::ICMP_ULT, L: m_Value(V&: A),
1251	R: m_Constant(C))),
1252	L: m_Deferred(V: A),
1253	R: m_OneUse(SubPattern: m_SExt(Op: m_OneUse(SubPattern: m_SpecificICmp(
1254	MatchPred: ICmpInst::ICMP_SGT, L: m_Deferred(V: A), R: m_Zero())))))))) {
1255	APInt UpperBound = C->getUniqueInteger();
1256	APInt TruncatedMax = APInt::getAllOnes(numBits: DestTy->getIntegerBitWidth());
1257	TruncatedMax = TruncatedMax.zext(width: UpperBound.getBitWidth());
1258	if (!UpperBound.isZero() && UpperBound - `1` == TruncatedMax) {
1259	Value *SMax = Builder.CreateIntrinsic(ID: Intrinsic::smax, OverloadTypes: {SrcTy},
1260	Args: {ConstantInt::get(Ty: SrcTy, V: `0`), A});
1261	Value *SMin = Builder.CreateIntrinsic(
1262	ID: Intrinsic::smin, OverloadTypes: {SrcTy},
1263	Args: {SMax, ConstantInt::get(Ty: SrcTy, V: TruncatedMax)});
1264	return new TruncInst (SMin, DestTy);
1265	}
1266	}
1267
1268	if (Instruction I = foldVecTruncToExtElt(Trunc, IC&: this))
1269	return I;
1270
1271	if (Instruction I = foldVecExtTruncToExtElt(Trunc, IC&: this))
1272	return I;
1273
1274	// trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)
1275	if (match(V: Src, P: m_OneUse(SubPattern: m_Ctlz(Op0: m_ZExt(Op: m_Value(V&: A)), Op1: m_Value(V&: B))))) {
1276	unsigned AWidth = A->getType()->getScalarSizeInBits();
1277	if (AWidth == DestWidth && AWidth > Log2_32(Value: SrcWidth)) {
1278	Value *WidthDiff = ConstantInt::get(Ty: A->getType(), V: SrcWidth - AWidth);
1279	Value *NarrowCtlz =
1280	Builder.CreateIntrinsic(ID: Intrinsic::ctlz, OverloadTypes: {Trunc.getType()}, Args: {A, B});
1281	return BinaryOperator::CreateAdd(V1: NarrowCtlz, V2: WidthDiff);
1282	}
1283	}
1284
1285	if (match(V: Src, P: m_VScale())) {
1286	if (Trunc.getFunction() &&
1287	Trunc.getFunction()->hasFnAttribute(Kind: Attribute::VScaleRange)) {
1288	Attribute Attr =
1289	Trunc.getFunction()->getFnAttribute(Kind: Attribute::VScaleRange);
1290	if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax())
1291	if (Log2_32(Value: *MaxVScale) < DestWidth)
1292	return replaceInstUsesWith(I&: Trunc, V: Builder.CreateVScale(Ty: DestTy));
1293	}
1294	}
1295
1296	// trunc(scmp(x, y)) -> scmp(x, y) with a narrower result type.
1297	// trunc(ucmp(x, y)) -> ucmp(x, y) with a narrower result type.
1298	// scmp/ucmp produce only -1, 0, or 1, so any result type with at least 2
1299	// bits can represent every possible value and the truncation is lossless.
1300	if (DestWidth >= `2`)
1301	if (auto *CI = dyn_cast<CmpIntrinsic>(Val: Src); CI && CI->hasOneUse())
1302	return replaceInstUsesWith(
1303	I&: Trunc, V: Builder.CreateIntrinsic(RetTy: DestTy, ID: CI->getIntrinsicID(),
1304	Args: {CI->getLHS(), CI->getRHS()}));
1305
1306	if (DestWidth == `1` &&
1307	(Trunc.hasNoUnsignedWrap() \|\| Trunc.hasNoSignedWrap()) &&
1308	isKnownNonZero(V: Src, Q: SQ.getWithInstruction(I: &Trunc)))
1309	return replaceInstUsesWith(I&: Trunc, V: ConstantInt::getTrue(Ty: DestTy));
1310
1311	bool Changed = false;
1312	if (!Trunc.hasNoSignedWrap() &&
1313	ComputeMaxSignificantBits(Op: Src, CxtI: &Trunc) <= DestWidth) {
1314	Trunc.setHasNoSignedWrap(true);
1315	Changed = true;
1316	}
1317	if (!Trunc.hasNoUnsignedWrap() &&
1318	MaskedValueIsZero(V: Src, Mask: APInt::getBitsSetFrom(numBits: SrcWidth, loBit: DestWidth),
1319	CxtI: &Trunc)) {
1320	Trunc.setHasNoUnsignedWrap(true);
1321	Changed = true;
1322	}
1323
1324	const APInt *C1;
1325	Value *V1;
1326	// OP = { lshr, ashr }
1327	// trunc ( OP i8 C1, V1) to i1 -> icmp eq V1, log_2(C1) iff C1 is power of 2
1328	if (DestWidth == `1` && match(V: Src, P: m_Shr(L: m_Power2(V&: C1), R: m_Value(V&: V1)))) {
1329	Value *Right = ConstantInt::get(Ty: V1->getType(), V: C1->countr_zero());
1330	return new ICmpInst (ICmpInst::ICMP_EQ, V1, Right);
1331	}
1332
1333	// OP = { lshr, ashr }
1334	// trunc ( OP i8 C1, V1) to i1 -> icmp ult V1, log_2(C1 + 1) iff (C1 + 1) is
1335	// power of 2
1336	if (DestWidth == `1` && match(V: Src, P: m_Shr(L: m_LowBitMask(V&: C1), R: m_Value(V&: V1)))) {
1337	Value *Right = ConstantInt::get(Ty: V1->getType(), V: C1->countr_one());
1338	return new ICmpInst (ICmpInst::ICMP_ULT, V1, Right);
1339	}
1340
1341	// OP = { lshr, ashr }
1342	// trunc ( OP i8 C1, V1) to i1 -> icmp ugt V1, cttz(C1) - 1 iff (C1) is
1343	// negative power of 2
1344	if (DestWidth == `1` && match(V: Src, P: m_Shr(L: m_NegatedPower2(V&: C1), R: m_Value(V&: V1)))) {
1345	Value *Right = ConstantInt::get(Ty: V1->getType(), V: C1->countr_zero());
1346	return new ICmpInst (ICmpInst::ICMP_UGE, V1, Right);
1347	}
1348
1349	return Changed ? &Trunc : nullptr;
1350	}
1351
1352	Instruction InstCombinerImpl::transformZExtICmp(ICmpInst Cmp,
1353	ZExtInst &Zext) {
1354	// If we are just checking for a icmp eq of a single bit and zext'ing it
1355	// to an integer, then shift the bit to the appropriate place and then
1356	// cast to integer to avoid the comparison.
1357
1358	// FIXME: This set of transforms does not check for extra uses and/or creates
1359	// an extra instruction (an optional final cast is not included
1360	// in the transform comments). We may also want to favor icmp over
1361	// shifts in cases of equal instructions because icmp has better
1362	// analysis in general (invert the transform).
1363
1364	const APInt *Op1CV;
1365	if (match(V: Cmp->getOperand(i_nocapture: `1`), P: m_APInt(Res&: Op1CV))) {
1366
1367	// zext (x <s 0) to i32 --> x>>u31 true if signbit set.
1368	if (Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) {
1369	Value *In = Cmp->getOperand(i_nocapture: `0`);
1370	Value *Sh = ConstantInt::get(Ty: In->getType(),
1371	V: In->getType()->getScalarSizeInBits() - `1`);
1372	In = Builder.CreateLShr(LHS: In, RHS: Sh, Name: In->getName() + ".lobit");
1373	if (In->getType() != Zext.getType())
1374	In = Builder.CreateIntCast(V: In, DestTy: Zext.getType(), isSigned: false /ZExt/);
1375
1376	return replaceInstUsesWith(I&: Zext, V: In);
1377	}
1378
1379	// zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
1380	// zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
1381	// zext (X != 0) to i32 --> X iff X has only the low bit set.
1382	// zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
1383
1384	if (Op1CV->isZero() && Cmp->isEquality()) {
1385	// Exactly 1 possible 1? But not the high-bit because that is
1386	// canonicalized to this form.
1387	KnownBits Known = computeKnownBits(V: Cmp->getOperand(i_nocapture: `0`), CxtI: &Zext);
1388	APInt KnownZeroMask(~Known.Zero);
1389	uint32_t ShAmt = KnownZeroMask.logBase2();
1390	bool IsExpectShAmt = KnownZeroMask.isPowerOf2() &&
1391	(Zext.getType()->getScalarSizeInBits() != ShAmt + `1`);
1392	if (IsExpectShAmt &&
1393	(Cmp->getOperand(i_nocapture: `0`)->getType() == Zext.getType() \|\|
1394	Cmp->getPredicate() == ICmpInst::ICMP_NE \|\| ShAmt == `0`)) {
1395	Value *In = Cmp->getOperand(i_nocapture: `0`);
1396	if (ShAmt) {
1397	// Perform a logical shr by shiftamt.
1398	// Insert the shift to put the result in the low bit.
1399	In = Builder.CreateLShr(LHS: In, RHS: ConstantInt::get(Ty: In->getType(), V: ShAmt),
1400	Name: In->getName() + ".lobit");
1401	}
1402
1403	// Toggle the low bit for "X == 0".
1404	if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1405	In = Builder.CreateXor(LHS: In, RHS: ConstantInt::get(Ty: In->getType(), V: `1`));
1406
1407	if (Zext.getType() == In->getType())
1408	return replaceInstUsesWith(I&: Zext, V: In);
1409
1410	Value IntCast = Builder.CreateIntCast(V: In, DestTy: Zext.getType(), isSigned: false*);
1411	return replaceInstUsesWith(I&: Zext, V: IntCast);
1412	}
1413	}
1414	}
1415
1416	if (Cmp->isEquality()) {
1417	// Test if a bit is clear/set using a shifted-one mask:
1418	// zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
1419	// zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
1420	Value X, ShAmt;
1421	if (Cmp->hasOneUse() && match(V: Cmp->getOperand(i_nocapture: `1`), P: m_ZeroInt()) &&
1422	match(V: Cmp->getOperand(i_nocapture: `0`),
1423	P: m_OneUse(SubPattern: m_c_And(L: m_Shl(L: m_One(), R: m_Value(V&: ShAmt)), R: m_Value(V&: X))))) {
1424	auto *And = cast<BinaryOperator>(Val: Cmp->getOperand(i_nocapture: `0`));
1425	Value *Shift = And->getOperand(i_nocapture: X == And->getOperand(i_nocapture: `0`) ? `1` : `0`);
1426	if (Zext.getType() == And->getType() \|\|
1427	Cmp->getPredicate() != ICmpInst::ICMP_EQ \|\| Shift->hasOneUse()) {
1428	if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1429	X = Builder.CreateNot(V: X);
1430	Value *Lshr = Builder.CreateLShr(LHS: X, RHS: ShAmt);
1431	Value *And1 =
1432	Builder.CreateAnd(LHS: Lshr, RHS: ConstantInt::get(Ty: X->getType(), V: `1`));
1433	return replaceInstUsesWith(
1434	I&: Zext, V: Builder.CreateZExtOrTrunc(V: And1, DestTy: Zext.getType()));
1435	}
1436	}
1437	}
1438
1439	return nullptr;
1440	}
1441
1442	/// Determine if the specified value can be computed in the specified wider type
1443	/// and produce the same low bits. If not, return false.
1444	///
1445	/// If this function returns true, it can also return a non-zero number of bits
1446	/// (in BitsToClear) which indicates that the value it computes is correct for
1447	/// the zero extend, but that the additional BitsToClear bits need to be zero'd
1448	/// out. For example, to promote something like:
1449	///
1450	/// %B = trunc i64 %A to i32
1451	/// %C = lshr i32 %B, 8
1452	/// %E = zext i32 %C to i64
1453	///
1454	/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
1455	/// set to 8 to indicate that the promoted value needs to have bits 24-31
1456	/// cleared in addition to bits 32-63. Since an 'and' will be generated to
1457	/// clear the top bits anyway, doing this has no extra cost.
1458	///
1459	/// This function works on both vectors and scalars.
1460	bool TypeEvaluationHelper::canEvaluateZExtd(Value V, Type Ty,
1461	unsigned &BitsToClear,
1462	InstCombinerImpl &IC,
1463	Instruction *CxtI) {
1464	TypeEvaluationHelper TYH;
1465	return TYH.canEvaluateZExtdImpl(V, Ty, BitsToClear, IC, CxtI);
1466	}
1467	bool TypeEvaluationHelper::canEvaluateZExtdImpl(Value V, Type Ty,
1468	unsigned &BitsToClear,
1469	InstCombinerImpl &IC,
1470	Instruction *CxtI) {
1471	BitsToClear = `0`;
1472	if (canAlwaysEvaluateInType(V, Ty))
1473	return true;
1474	// We stick to the one-user limit for the ZExt transform due to the fact
1475	// that this predicate returns two values: predicate result and BitsToClear.
1476	if (canNotEvaluateInType(V, Ty))
1477	return false;
1478
1479	auto *I = cast<Instruction>(Val: V);
1480	unsigned Tmp;
1481	switch (I->getOpcode()) {
1482	case Instruction::ZExt: // zext(zext(x)) -> zext(x).
1483	case Instruction::SExt: // zext(sext(x)) -> sext(x).
1484	case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
1485	return true;
1486	case Instruction::And:
1487	case Instruction::Or:
1488	case Instruction::Xor:
1489	case Instruction::Add:
1490	case Instruction::Sub:
1491	case Instruction::Mul:
1492	if (!canEvaluateZExtdImpl(V: I->getOperand(i: `0`), Ty, BitsToClear, IC, CxtI) \|\|
1493	!canEvaluateZExtdImpl(V: I->getOperand(i: `1`), Ty, BitsToClear&: Tmp, IC, CxtI))
1494	return false;
1495	// These can all be promoted if neither operand has 'bits to clear'.
1496	if (BitsToClear == `0` && Tmp == `0`)
1497	return true;
1498
1499	// If the operation is an AND/OR/XOR and the bits to clear are zero in the
1500	// other side, BitsToClear is ok.
1501	if (Tmp == `0` && I->isBitwiseLogicOp()) {
1502	// We use MaskedValueIsZero here for generality, but the case we care
1503	// about the most is constant RHS.
1504	unsigned VSize = V->getType()->getScalarSizeInBits();
1505	if (IC.MaskedValueIsZero(V: I->getOperand(i: `1`),
1506	Mask: APInt::getHighBitsSet(numBits: VSize, hiBitsSet: BitsToClear),
1507	CxtI)) {
1508	// If this is an And instruction and all of the BitsToClear are
1509	// known to be zero we can reset BitsToClear.
1510	if (I->getOpcode() == Instruction::And)
1511	BitsToClear = `0`;
1512	return true;
1513	}
1514	}
1515
1516	// Otherwise, we don't know how to analyze this BitsToClear case yet.
1517	return false;
1518
1519	case Instruction::Shl: {
1520	// We can promote shl(x, cst) if we can promote x. Since shl overwrites the
1521	// upper bits we can reduce BitsToClear by the shift amount.
1522	uint64_t ShiftAmt;
1523	if (match(V: I->getOperand(i: `1`), P: m_ConstantInt(V&: ShiftAmt))) {
1524	if (!canEvaluateZExtdImpl(V: I->getOperand(i: `0`), Ty, BitsToClear, IC, CxtI))
1525	return false;
1526	BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : `0`;
1527	return true;
1528	}
1529	return false;
1530	}
1531	case Instruction::LShr: {
1532	// We can promote lshr(x, cst) if we can promote x. This requires the
1533	// ultimate 'and' to clear out the high zero bits we're clearing out though.
1534	uint64_t ShiftAmt;
1535	if (match(V: I->getOperand(i: `1`), P: m_ConstantInt(V&: ShiftAmt))) {
1536	if (!canEvaluateZExtdImpl(V: I->getOperand(i: `0`), Ty, BitsToClear, IC, CxtI))
1537	return false;
1538	BitsToClear += ShiftAmt;
1539	if (BitsToClear > V->getType()->getScalarSizeInBits())
1540	BitsToClear = V->getType()->getScalarSizeInBits();
1541	return true;
1542	}
1543	// Cannot promote variable LSHR.
1544	return false;
1545	}
1546	case Instruction::Select:
1547	if (!canEvaluateZExtdImpl(V: I->getOperand(i: `1`), Ty, BitsToClear&: Tmp, IC, CxtI) \|\|
1548	!canEvaluateZExtdImpl(V: I->getOperand(i: `2`), Ty, BitsToClear, IC, CxtI) \|\|
1549	// TODO: If important, we could handle the case when the BitsToClear are
1550	// known zero in the disagreeing side.
1551	Tmp != BitsToClear)
1552	return false;
1553	return true;
1554
1555	case Instruction::PHI: {
1556	// We can change a phi if we can change all operands. Note that we never
1557	// get into trouble with cyclic PHIs here because we only consider
1558	// instructions with a single use.
1559	PHINode *PN = cast<PHINode>(Val: I);
1560	if (!canEvaluateZExtdImpl(V: PN->getIncomingValue(i: `0`), Ty, BitsToClear, IC,
1561	CxtI))
1562	return false;
1563	for (unsigned i = `1`, e = PN->getNumIncomingValues(); i != e; ++i)
1564	if (!canEvaluateZExtdImpl(V: PN->getIncomingValue(i), Ty, BitsToClear&: Tmp, IC, CxtI) \|\|
1565	// TODO: If important, we could handle the case when the BitsToClear
1566	// are known zero in the disagreeing input.
1567	Tmp != BitsToClear)
1568	return false;
1569	return true;
1570	}
1571	case Instruction::Call:
1572	// llvm.vscale() can always be executed in larger type, because the
1573	// value is automatically zero-extended.
1574	if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I))
1575	if (II->getIntrinsicID() == Intrinsic::vscale)
1576	return true;
1577	return false;
1578	default:
1579	// TODO: Can handle more cases here.
1580	return false;
1581	}
1582	}
1583
1584	Instruction *InstCombinerImpl::visitZExt(ZExtInst &Zext) {
1585	// If this zero extend is only used by a truncate, let the truncate be
1586	// eliminated before we try to optimize this zext.
1587	if (Zext.hasOneUse() && isa<TruncInst>(Val: Zext.user_back()) &&
1588	!isa<Constant>(Val: Zext.getOperand(i_nocapture: `0`)))
1589	return nullptr;
1590
1591	// If one of the common conversion will work, do it.
1592	if (Instruction *Result = commonCastTransforms(CI&: Zext))
1593	return Result;
1594
1595	if (auto *NewI = foldExtractionOfVectorDeinterleave(RootZExt&: Zext))
1596	return NewI;
1597
1598	Value *Src = Zext.getOperand(i_nocapture: `0`);
1599	Type SrcTy = Src->getType(), DestTy = Zext.getType();
1600
1601	// zext nneg bool x -> 0
1602	if (SrcTy->isIntOrIntVectorTy(BitWidth: `1`) && Zext.hasNonNeg())
1603	return replaceInstUsesWith(I&: Zext, V: Constant::getNullValue(Ty: Zext.getType()));
1604
1605	// Try to extend the entire expression tree to the wide destination type.
1606	unsigned BitsToClear;
1607	if (shouldChangeType(From: SrcTy, To: DestTy) &&
1608	TypeEvaluationHelper::canEvaluateZExtd(V: Src, Ty: DestTy, BitsToClear, IC&: *this,
1609	CxtI: &Zext)) {
1610	assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
1611	"Can't clear more bits than in SrcTy");
1612
1613	// Okay, we can transform this! Insert the new expression now.
1614	LLVM_DEBUG(
1615	dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1616	" to avoid zero extend: "
1617	<< Zext << `'\n'`);
1618	Value Res = EvaluateInDifferentType(V: Src, Ty: DestTy, isSigned: false*);
1619	assert(Res->getType() == DestTy);
1620
1621	// Preserve debug values referring to Src if the zext is its last use.
1622	if (auto *SrcOp = dyn_cast<Instruction>(Val: Src))
1623	if (SrcOp->hasOneUse())
1624	replaceAllDbgUsesWith(From&: SrcOp, To&: Res, DomPoint&: Zext, DT);
1625
1626	uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits() - BitsToClear;
1627	uint32_t DestBitSize = DestTy->getScalarSizeInBits();
1628
1629	// If the high bits are already filled with zeros, just replace this
1630	// cast with the result.
1631	if (MaskedValueIsZero(
1632	V: Res, Mask: APInt::getHighBitsSet(numBits: DestBitSize, hiBitsSet: DestBitSize - SrcBitsKept),
1633	CxtI: &Zext))
1634	return replaceInstUsesWith(I&: Zext, V: Res);
1635
1636	// We need to emit an AND to clear the high bits.
1637	Constant *C = ConstantInt::get(Ty: Res->getType(),
1638	V: APInt::getLowBitsSet(numBits: DestBitSize, loBitsSet: SrcBitsKept));
1639	return BinaryOperator::CreateAnd(V1: Res, V2: C);
1640	}
1641
1642	// If this is a TRUNC followed by a ZEXT then we are dealing with integral
1643	// types and if the sizes are just right we can convert this into a logical
1644	// 'and' which will be much cheaper than the pair of casts.
1645	if (auto CSrc = dyn_cast<TruncInst>(Val: Src)) { // A->B->C cast*
1646	// TODO: Subsume this into EvaluateInDifferentType.
1647
1648	// Get the sizes of the types involved. We know that the intermediate type
1649	// will be smaller than A or C, but don't know the relation between A and C.
1650	Value *A = CSrc->getOperand(i_nocapture: `0`);
1651	unsigned SrcSize = A->getType()->getScalarSizeInBits();
1652	unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
1653	unsigned DstSize = DestTy->getScalarSizeInBits();
1654	// If we're actually extending zero bits, then if
1655	// SrcSize < DstSize: zext(a & mask)
1656	// SrcSize == DstSize: a & mask
1657	// SrcSize > DstSize: trunc(a) & mask
1658	if (SrcSize < DstSize) {
1659	APInt AndValue(APInt::getLowBitsSet(numBits: SrcSize, loBitsSet: MidSize));
1660	Constant *AndConst = ConstantInt::get(Ty: A->getType(), V: AndValue);
1661	Value *And = Builder.CreateAnd(LHS: A, RHS: AndConst, Name: CSrc->getName() + ".mask");
1662	return new ZExtInst (And, DestTy);
1663	}
1664
1665	if (SrcSize == DstSize) {
1666	APInt AndValue(APInt::getLowBitsSet(numBits: SrcSize, loBitsSet: MidSize));
1667	return BinaryOperator::CreateAnd(V1: A, V2: ConstantInt::get(Ty: A->getType(),
1668	V: AndValue));
1669	}
1670	if (SrcSize > DstSize) {
1671	Value *Trunc = Builder.CreateTrunc(V: A, DestTy);
1672	APInt AndValue(APInt::getLowBitsSet(numBits: DstSize, loBitsSet: MidSize));
1673	return BinaryOperator::CreateAnd(V1: Trunc,
1674	V2: ConstantInt::get(Ty: Trunc->getType(),
1675	V: AndValue));
1676	}
1677	}
1678
1679	if (auto *Cmp = dyn_cast<ICmpInst>(Val: Src))
1680	return transformZExtICmp(Cmp, Zext);
1681
1682	Constant *C;
1683	Value *X;
1684	// zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
1685	Value *And;
1686	if (match(V: Src, P: m_OneUse(SubPattern: m_Xor(L: m_Value(V&: And), R: m_Constant(C)))) &&
1687	match(V: And, P: m_OneUse(SubPattern: m_And(L: m_Trunc(Op: m_SpecificType(RefTy: DestTy, V&: X)),
1688	R: m_Specific(V: C))))) {
1689	Value *ZC = Builder.CreateZExt(V: C, DestTy);
1690	return BinaryOperator::CreateXor(V1: Builder.CreateAnd(LHS: X, RHS: ZC), V2: ZC);
1691	}
1692
1693	// zext(sub(0, trunc(X))) -> and(sub(0, X), mask)
1694	if (match(V: Src, P: m_Sub(L: m_Zero(), R: m_Trunc(Op: m_SpecificType(RefTy: DestTy, V&: X))))) {
1695	APInt Mask = APInt::getLowBitsSet(numBits: DestTy->getScalarSizeInBits(),
1696	loBitsSet: SrcTy->getScalarSizeInBits());
1697	Value *Neg = Builder.CreateSub(LHS: ConstantInt::get(Ty: DestTy, V: `0`), RHS: X);
1698	return BinaryOperator::CreateAnd(V1: Neg, V2: ConstantInt::get(Ty: DestTy, V: Mask));
1699	}
1700
1701	// If we are truncating, masking, and then zexting back to the original type,
1702	// that's just a mask. This is not handled by canEvaluateZextd if the
1703	// intermediate values have extra uses. This could be generalized further for
1704	// a non-constant mask operand.
1705	// zext (and (trunc X), C) --> and X, (zext C)
1706	if (match(V: Src, P: m_And(L: m_Trunc(Op: m_SpecificType(RefTy: DestTy, V&: X)), R: m_Constant(C)))) {
1707	Value *ZextC = Builder.CreateZExt(V: C, DestTy);
1708	return BinaryOperator::CreateAnd(V1: X, V2: ZextC);
1709	}
1710
1711	Value *Y;
1712	if (match(V: Src, P: m_OneUse(SubPattern: m_c_BitwiseLogic(
1713	L: m_NUWTrunc(Op: m_SpecificType(RefTy: DestTy, V&: X)), R: m_Value(V&: Y))))) {
1714	Value *ZextY = Builder.CreateZExt(V: Y, DestTy);
1715	return BinaryOperator::Create(Op: cast<BinaryOperator>(Val: Src)->getOpcode(), S1: X,
1716	S2: ZextY);
1717	}
1718
1719	if (match(V: Src, P: m_VScale())) {
1720	if (Zext.getFunction() &&
1721	Zext.getFunction()->hasFnAttribute(Kind: Attribute::VScaleRange)) {
1722	Attribute Attr =
1723	Zext.getFunction()->getFnAttribute(Kind: Attribute::VScaleRange);
1724	if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1725	unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
1726	if (Log2_32(Value: *MaxVScale) < TypeWidth)
1727	return replaceInstUsesWith(I&: Zext, V: Builder.CreateVScale(Ty: DestTy));
1728	}
1729	}
1730	}
1731
1732	if (!Zext.hasNonNeg()) {
1733	// If this zero extend is only used by a shift, add nneg flag.
1734	if (Zext.hasOneUse() &&
1735	SrcTy->getScalarSizeInBits() >
1736	Log2_64_Ceil(Value: DestTy->getScalarSizeInBits()) &&
1737	match(V: Zext.user_back(), P: m_Shift(L: m_Value(), R: m_Specific(V: &Zext)))) {
1738	Zext.setNonNeg();
1739	return &Zext;
1740	}
1741
1742	if (isKnownNonNegative(V: Src, SQ: SQ.getWithInstruction(I: &Zext))) {
1743	Zext.setNonNeg();
1744	return &Zext;
1745	}
1746	}
1747
1748	return nullptr;
1749	}
1750
1751	/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
1752	Instruction InstCombinerImpl::transformSExtICmp(ICmpInst Cmp,
1753	SExtInst &Sext) {
1754	Value Op0 = Cmp->getOperand(i_nocapture: `0`), Op1 = Cmp->getOperand(i_nocapture: `1`);
1755	ICmpInst::Predicate Pred = Cmp->getPredicate();
1756
1757	// Don't bother if Op1 isn't of vector or integer type.
1758	if (!Op1->getType()->isIntOrIntVectorTy())
1759	return nullptr;
1760
1761	if (Pred == ICmpInst::ICMP_SLT && match(V: Op1, P: m_ZeroInt())) {
1762	// sext (x <s 0) --> ashr x, 31 (all ones if negative)
1763	Value *Sh = ConstantInt::get(Ty: Op0->getType(),
1764	V: Op0->getType()->getScalarSizeInBits() - `1`);
1765	Value *In = Builder.CreateAShr(LHS: Op0, RHS: Sh, Name: Op0->getName() + ".lobit");
1766	if (In->getType() != Sext.getType())
1767	In = Builder.CreateIntCast(V: In, DestTy: Sext.getType(), isSigned: true /SExt/);
1768
1769	return replaceInstUsesWith(I&: Sext, V: In);
1770	}
1771
1772	if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Val: Op1)) {
1773	// If we know that only one bit of the LHS of the icmp can be set and we
1774	// have an equality comparison with zero or a power of 2, we can transform
1775	// the icmp and sext into bitwise/integer operations.
1776	if (Cmp->hasOneUse() &&
1777	Cmp->isEquality() && (Op1C->isZero() \|\| Op1C->getValue().isPowerOf2())){
1778	KnownBits Known = computeKnownBits(V: Op0, CxtI: &Sext);
1779
1780	APInt KnownZeroMask(~Known.Zero);
1781	if (KnownZeroMask.isPowerOf2()) {
1782	Value *In = Cmp->getOperand(i_nocapture: `0`);
1783
1784	// If the icmp tests for a known zero bit we can constant fold it.
1785	if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
1786	Value *V = Pred == ICmpInst::ICMP_NE ?
1787	ConstantInt::getAllOnesValue(Ty: Sext.getType()) :
1788	ConstantInt::getNullValue(Ty: Sext.getType());
1789	return replaceInstUsesWith(I&: Sext, V);
1790	}
1791
1792	if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
1793	// sext ((x & 2^n) == 0) -> (x >> n) - 1
1794	// sext ((x & 2^n) != 2^n) -> (x >> n) - 1
1795	unsigned ShiftAmt = KnownZeroMask.countr_zero();
1796	// Perform a right shift to place the desired bit in the LSB.
1797	if (ShiftAmt)
1798	In = Builder.CreateLShr(LHS: In,
1799	RHS: ConstantInt::get(Ty: In->getType(), V: ShiftAmt));
1800
1801	// At this point "In" is either 1 or 0. Subtract 1 to turn
1802	// {1, 0} -> {0, -1}.
1803	In = Builder.CreateAdd(LHS: In,
1804	RHS: ConstantInt::getAllOnesValue(Ty: In->getType()),
1805	Name: "sext");
1806	} else {
1807	// sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
1808	// sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
1809	unsigned ShiftAmt = KnownZeroMask.countl_zero();
1810	// Perform a left shift to place the desired bit in the MSB.
1811	if (ShiftAmt)
1812	In = Builder.CreateShl(LHS: In,
1813	RHS: ConstantInt::get(Ty: In->getType(), V: ShiftAmt));
1814
1815	// Distribute the bit over the whole bit width.
1816	In = Builder.CreateAShr(LHS: In, RHS: ConstantInt::get(Ty: In->getType(),
1817	V: KnownZeroMask.getBitWidth() - `1`), Name: "sext");
1818	}
1819
1820	if (Sext.getType() == In->getType())
1821	return replaceInstUsesWith(I&: Sext, V: In);
1822	return CastInst::CreateIntegerCast(S: In, Ty: Sext.getType(), isSigned: true/SExt/);
1823	}
1824	}
1825	}
1826
1827	return nullptr;
1828	}
1829
1830	/// Return true if we can take the specified value and return it as type Ty
1831	/// without inserting any new casts and without changing the value of the common
1832	/// low bits. This is used by code that tries to promote integer operations to
1833	/// a wider types will allow us to eliminate the extension.
1834	///
1835	/// This function works on both vectors and scalars.
1836	///
1837	bool TypeEvaluationHelper::canEvaluateSExtd(Value V, Type Ty) {
1838	TypeEvaluationHelper TYH;
1839	return TYH.canEvaluateSExtdImpl(V, Ty) && TYH.allPendingVisited();
1840	}
1841
1842	bool TypeEvaluationHelper::canEvaluateSExtdImpl(Value V, Type Ty) {
1843	return canEvaluate(V, Ty, Pred: [this](Value V, Type Ty) {
1844	return canEvaluateSExtdPred(V, Ty);
1845	});
1846	}
1847
1848	bool TypeEvaluationHelper::canEvaluateSExtdPred(Value V, Type Ty) {
1849	assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
1850	"Can't sign extend type to a smaller type");
1851
1852	auto *I = cast<Instruction>(Val: V);
1853	switch (I->getOpcode()) {
1854	case Instruction::SExt: // sext(sext(x)) -> sext(x)
1855	case Instruction::ZExt: // sext(zext(x)) -> zext(x)
1856	case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
1857	return true;
1858	case Instruction::And:
1859	case Instruction::Or:
1860	case Instruction::Xor:
1861	case Instruction::Add:
1862	case Instruction::Sub:
1863	case Instruction::Mul:
1864	// These operators can all arbitrarily be extended if their inputs can.
1865	return canEvaluateSExtdImpl(V: I->getOperand(i: `0`), Ty) &&
1866	canEvaluateSExtdImpl(V: I->getOperand(i: `1`), Ty);
1867
1868	// case Instruction::Shl: TODO
1869	// case Instruction::LShr: TODO
1870
1871	case Instruction::Select:
1872	return canEvaluateSExtdImpl(V: I->getOperand(i: `1`), Ty) &&
1873	canEvaluateSExtdImpl(V: I->getOperand(i: `2`), Ty);
1874
1875	case Instruction::PHI: {
1876	// We can change a phi if we can change all operands. Note that we never
1877	// get into trouble with cyclic PHIs here because canEvaluate handles use
1878	// chain loops.
1879	PHINode *PN = cast<PHINode>(Val: I);
1880	for (Value *IncValue : PN->incoming_values())
1881	if (!canEvaluateSExtdImpl(V: IncValue, Ty))
1882	return false;
1883	return true;
1884	}
1885	default:
1886	// TODO: Can handle more cases here.
1887	break;
1888	}
1889
1890	return false;
1891	}
1892
1893	Instruction *InstCombinerImpl::visitSExt(SExtInst &Sext) {
1894	// If this sign extend is only used by a truncate, let the truncate be
1895	// eliminated before we try to optimize this sext.
1896	if (Sext.hasOneUse() && isa<TruncInst>(Val: Sext.user_back()))
1897	return nullptr;
1898
1899	if (Instruction *I = commonCastTransforms(CI&: Sext))
1900	return I;
1901
1902	Value *Src = Sext.getOperand(i_nocapture: `0`);
1903	Type SrcTy = Src->getType(), DestTy = Sext.getType();
1904	unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
1905	unsigned DestBitSize = DestTy->getScalarSizeInBits();
1906
1907	// If the value being extended is zero or positive, use a zext instead.
1908	if (isKnownNonNegative(V: Src, SQ: SQ.getWithInstruction(I: &Sext))) {
1909	auto CI = CastInst::Create(Instruction::ZExt, S: Src, Ty: DestTy);
1910	CI->setNonNeg(true);
1911	return CI;
1912	}
1913
1914	// Try to extend the entire expression tree to the wide destination type.
1915	bool ShouldExtendExpression = true;
1916	Value TruncSrc = nullptr*;
1917	// It is not desirable to extend expression in the trunc + sext pattern when
1918	// destination type is narrower than original (pre-trunc) type.
1919	if (match(V: Src, P: m_Trunc(Op: m_Value(V&: TruncSrc))))
1920	if (TruncSrc->getType()->getScalarSizeInBits() > DestBitSize)
1921	ShouldExtendExpression = false;
1922	if (ShouldExtendExpression && shouldChangeType(From: SrcTy, To: DestTy) &&
1923	TypeEvaluationHelper::canEvaluateSExtd(V: Src, Ty: DestTy)) {
1924	// Okay, we can transform this! Insert the new expression now.
1925	LLVM_DEBUG(
1926	dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1927	" to avoid sign extend: "
1928	<< Sext << `'\n'`);
1929	Value Res = EvaluateInDifferentType(V: Src, Ty: DestTy, isSigned: true*);
1930	assert(Res->getType() == DestTy);
1931
1932	// If the high bits are already filled with sign bit, just replace this
1933	// cast with the result.
1934	if (ComputeNumSignBits(Op: Res, CxtI: &Sext) > DestBitSize - SrcBitSize)
1935	return replaceInstUsesWith(I&: Sext, V: Res);
1936
1937	// We need to emit a shl + ashr to do the sign extend.
1938	Value *ShAmt = ConstantInt::get(Ty: DestTy, V: DestBitSize - SrcBitSize);
1939	return BinaryOperator::CreateAShr(V1: Builder.CreateShl(LHS: Res, RHS: ShAmt, Name: "sext"),
1940	V2: ShAmt);
1941	}
1942
1943	Value *X = TruncSrc;
1944	if (X) {
1945	// If the input has more sign bits than bits truncated, then convert
1946	// directly to final type.
1947	unsigned XBitSize = X->getType()->getScalarSizeInBits();
1948	bool HasNSW = cast<TruncInst>(Val: Src)->hasNoSignedWrap();
1949	if (HasNSW \|\| (ComputeNumSignBits(Op: X, CxtI: &Sext) > XBitSize - SrcBitSize)) {
1950	auto Res = CastInst::CreateIntegerCast(S: X, Ty: DestTy, /* isSigned / true);
1951	if (auto *ResTrunc = dyn_cast<TruncInst>(Val: Res); ResTrunc && HasNSW)
1952	ResTrunc->setHasNoSignedWrap(true);
1953	return Res;
1954	}
1955
1956	// If input is a trunc from the destination type, then convert into shifts.
1957	if (Src->hasOneUse() && X->getType() == DestTy) {
1958	// sext (trunc X) --> ashr (shl X, C), C
1959	Constant *ShAmt = ConstantInt::get(Ty: DestTy, V: DestBitSize - SrcBitSize);
1960	return BinaryOperator::CreateAShr(V1: Builder.CreateShl(LHS: X, RHS: ShAmt), V2: ShAmt);
1961	}
1962
1963	// If we are replacing shifted-in high zero bits with sign bits, convert
1964	// the logic shift to arithmetic shift and eliminate the cast to
1965	// intermediate type:
1966	// sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)
1967	Value *Y;
1968	if (Src->hasOneUse() &&
1969	match(V: X, P: m_LShr(L: m_Value(V&: Y),
1970	R: m_SpecificIntAllowPoison(V: XBitSize - SrcBitSize)))) {
1971	Value *Ashr = Builder.CreateAShr(LHS: Y, RHS: XBitSize - SrcBitSize);
1972	return CastInst::CreateIntegerCast(S: Ashr, Ty: DestTy, / isSigned / true);
1973	}
1974	}
1975
1976	if (auto *Cmp = dyn_cast<ICmpInst>(Val: Src))
1977	return transformSExtICmp(Cmp, Sext);
1978
1979	// If the input is a shl/ashr pair of a same constant, then this is a sign
1980	// extension from a smaller value. If we could trust arbitrary bitwidth
1981	// integers, we could turn this into a truncate to the smaller bit and then
1982	// use a sext for the whole extension. Since we don't, look deeper and check
1983	// for a truncate. If the source and dest are the same type, eliminate the
1984	// trunc and extend and just do shifts. For example, turn:
1985	// %a = trunc i32 %i to i8
1986	// %b = shl i8 %a, C
1987	// %c = ashr i8 %b, C
1988	// %d = sext i8 %c to i32
1989	// into:
1990	// %a = shl i32 %i, 32-(8-C)
1991	// %d = ashr i32 %a, 32-(8-C)
1992	Value A = nullptr*;
1993	// TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1994	Constant BA = nullptr, CA = nullptr;
1995	if (match(V: Src,
1996	P: m_AShr(L: m_Shl(L: m_Trunc(Op: m_SpecificType(RefTy: DestTy, V&: A)), R: m_Constant(C&: BA)),
1997	R: m_ImmConstant(C&: CA))) &&
1998	BA->isElementWiseEqual(Y: CA)) {
1999	Constant *WideCurrShAmt =
2000	ConstantFoldCastOperand(Opcode: Instruction::SExt, C: CA, DestTy, DL);
2001	assert(WideCurrShAmt && "Constant folding of ImmConstant cannot fail");
2002	Constant *NumLowbitsLeft = ConstantExpr::getSub(
2003	C1: ConstantInt::get(Ty: DestTy, V: SrcTy->getScalarSizeInBits()), C2: WideCurrShAmt);
2004	Constant *NewShAmt = ConstantExpr::getSub(
2005	C1: ConstantInt::get(Ty: DestTy, V: DestTy->getScalarSizeInBits()),
2006	C2: NumLowbitsLeft);
2007	NewShAmt =
2008	Constant::mergeUndefsWith(C: Constant::mergeUndefsWith(C: NewShAmt, Other: BA), Other: CA);
2009	A = Builder.CreateShl(LHS: A, RHS: NewShAmt, Name: Sext.getName());
2010	return BinaryOperator::CreateAShr(V1: A, V2: NewShAmt);
2011	}
2012
2013	// Splatting a bit of constant-index across a value:
2014	// sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
2015	// If the dest type is different, use a cast (adjust use check).
2016	if (match(V: Src, P: m_OneUse(SubPattern: m_AShr(L: m_Trunc(Op: m_Value(V&: X)),
2017	R: m_SpecificInt(V: SrcBitSize - `1`))))) {
2018	Type *XTy = X->getType();
2019	unsigned XBitSize = XTy->getScalarSizeInBits();
2020	Constant *ShlAmtC = ConstantInt::get(Ty: XTy, V: XBitSize - SrcBitSize);
2021	Constant *AshrAmtC = ConstantInt::get(Ty: XTy, V: XBitSize - `1`);
2022	if (XTy == DestTy)
2023	return BinaryOperator::CreateAShr(V1: Builder.CreateShl(LHS: X, RHS: ShlAmtC),
2024	V2: AshrAmtC);
2025	if (cast<BinaryOperator>(Val: Src)->getOperand(i_nocapture: `0`)->hasOneUse()) {
2026	Value *Ashr = Builder.CreateAShr(LHS: Builder.CreateShl(LHS: X, RHS: ShlAmtC), RHS: AshrAmtC);
2027	return CastInst::CreateIntegerCast(S: Ashr, Ty: DestTy, / isSigned / true);
2028	}
2029	}
2030
2031	if (match(V: Src, P: m_VScale())) {
2032	if (Sext.getFunction() &&
2033	Sext.getFunction()->hasFnAttribute(Kind: Attribute::VScaleRange)) {
2034	Attribute Attr =
2035	Sext.getFunction()->getFnAttribute(Kind: Attribute::VScaleRange);
2036	if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax())
2037	if (Log2_32(Value: *MaxVScale) < (SrcBitSize - `1`))
2038	return replaceInstUsesWith(I&: Sext, V: Builder.CreateVScale(Ty: DestTy));
2039	}
2040	}
2041
2042	// sext(scmp(x, y)) -> scmp(x, y) with a wider result type.
2043	// sext(ucmp(x, y)) -> ucmp(x, y) with a wider result type.
2044	// scmp/ucmp return only -1, 0, or 1, which sign-extend correctly to any
2045	// wider integer type, so we can sink the extension into the intrinsic.
2046	if (auto *CI = dyn_cast<CmpIntrinsic>(Val: Src); CI && CI->hasOneUse())
2047	return replaceInstUsesWith(
2048	I&: Sext, V: Builder.CreateIntrinsic(RetTy: DestTy, ID: CI->getIntrinsicID(),
2049	Args: {CI->getLHS(), CI->getRHS()}));
2050
2051	Value *Y;
2052	if (match(V: Src, P: m_OneUse(SubPattern: m_c_BitwiseLogic(
2053	L: m_NSWTrunc(Op: m_SpecificType(RefTy: DestTy, V&: X)), R: m_Value(V&: Y))))) {
2054	Value *SextY = Builder.CreateSExt(V: Y, DestTy);
2055	return BinaryOperator::Create(Op: cast<BinaryOperator>(Val: Src)->getOpcode(), S1: X,
2056	S2: SextY);
2057	}
2058
2059	return nullptr;
2060	}
2061
2062	/// Return a Constant for the specified floating-point constant if it fits*
2063	/// in the specified FP type without changing its value.
2064	static bool fitsInFPType(APFloat F, const fltSemantics &Sem) {
2065	bool losesInfo;
2066	(void)F.convert(ToSemantics: Sem, RM: APFloat::rmNearestTiesToEven, losesInfo: &losesInfo);
2067	return !losesInfo;
2068	}
2069
2070	static Type shrinkFPConstant(LLVMContext &Ctx, const* APFloat &F,
2071	bool PreferBFloat) {
2072	// See if the value can be truncated to bfloat and then reextended.
2073	if (PreferBFloat && fitsInFPType(F, Sem: APFloat::BFloat()))
2074	return Type::getBFloatTy(C&: Ctx);
2075	// See if the value can be truncated to half and then reextended.
2076	if (!PreferBFloat && fitsInFPType(F, Sem: APFloat::IEEEhalf()))
2077	return Type::getHalfTy(C&: Ctx);
2078	// See if the value can be truncated to float and then reextended.
2079	if (fitsInFPType(F, Sem: APFloat::IEEEsingle()))
2080	return Type::getFloatTy(C&: Ctx);
2081	if (&F.getSemantics() == &APFloat::IEEEdouble())
2082	return nullptr; // Won't shrink.
2083	// See if the value can be truncated to double and then reextended.
2084	if (fitsInFPType(F, Sem: APFloat::IEEEdouble()))
2085	return Type::getDoubleTy(C&: Ctx);
2086	// Don't try to shrink to various long double types.
2087	return nullptr;
2088	}
2089
2090	static Type shrinkFPConstant(ConstantFP CFP, bool PreferBFloat) {
2091	Type *Ty = CFP->getType();
2092	if (Ty->getScalarType()->isPPC_FP128Ty())
2093	return nullptr; // No constant folding of this.
2094
2095	Type *ShrinkTy =
2096	shrinkFPConstant(Ctx&: CFP->getContext(), F: CFP->getValueAPF(), PreferBFloat);
2097	if (ShrinkTy)
2098	if (auto *VecTy = dyn_cast<VectorType>(Val: Ty))
2099	ShrinkTy = VectorType::get(ElementType: ShrinkTy, Other: VecTy);
2100
2101	return ShrinkTy;
2102	}
2103
2104	// Determine if this is a vector of ConstantFPs and if so, return the minimal
2105	// type we can safely truncate all elements to.
2106	static Type shrinkFPConstantVector(Value V, bool PreferBFloat) {
2107	auto *CV = dyn_cast<Constant>(Val: V);
2108	auto *CVVTy = dyn_cast<FixedVectorType>(Val: V->getType());
2109	if (!CV \|\| !CVVTy)
2110	return nullptr;
2111
2112	Type MinType = nullptr*;
2113
2114	unsigned NumElts = CVVTy->getNumElements();
2115
2116	// For fixed-width vectors we find the minimal type by looking
2117	// through the constant values of the vector.
2118	for (unsigned I = `0`; I != NumElts; ++I) {
2119	if (match(V: CV->getAggregateElement(Elt: I), P: m_Poison()))
2120	continue;
2121
2122	auto *CFP = dyn_cast_or_null<ConstantFP>(Val: CV->getAggregateElement(Elt: I));
2123	if (!CFP)
2124	return nullptr;
2125
2126	Type *T = shrinkFPConstant(CFP, PreferBFloat);
2127	if (!T)
2128	return nullptr;
2129
2130	// If we haven't found a type yet or this type has a larger mantissa than
2131	// our previous type, this is our new minimal type.
2132	if (!MinType \|\| T->getFPMantissaWidth() > MinType->getFPMantissaWidth())
2133	MinType = T;
2134	}
2135
2136	// Make a vector type from the minimal type.
2137	return MinType ? FixedVectorType::get(ElementType: MinType, NumElts) : nullptr;
2138	}
2139
2140	/// Find the minimum FP type we can safely truncate to.
2141	static Type getMinimumFPType(Value V, Type *PreferredTy, InstCombiner &IC) {
2142	if (auto *FPExt = dyn_cast<FPExtInst>(Val: V))
2143	return FPExt->getOperand(i_nocapture: `0`)->getType();
2144
2145	Value *Src;
2146	if (match(V, P: m_IToFP(Op: m_Value(V&: Src))) &&
2147	IC.canBeCastedExactlyIntToFP(V: Src, FPTy: PreferredTy, IsSigned: isa<SIToFPInst>(Val: V),
2148	CxtI: cast<Instruction>(Val: V)))
2149	return PreferredTy;
2150
2151	bool PreferBFloat = PreferredTy->getScalarType()->isBFloatTy();
2152	// If this value is a constant, return the constant in the smallest FP type
2153	// that can accurately represent it. This allows us to turn
2154	// (float)((double)X+2.0) into x+2.0f.
2155	if (auto *CFP = dyn_cast<ConstantFP>(Val: V))
2156	if (Type *T = shrinkFPConstant(CFP, PreferBFloat))
2157	return T;
2158
2159	// Try to shrink scalable and fixed splat vectors.
2160	if (auto *FPC = dyn_cast<Constant>(Val: V))
2161	if (auto *VTy = dyn_cast<VectorType>(Val: V->getType()))
2162	if (auto *Splat = dyn_cast_or_null<ConstantFP>(Val: FPC->getSplatValue()))
2163	if (Type *T = shrinkFPConstant(CFP: Splat, PreferBFloat))
2164	return VectorType::get(ElementType: T, Other: VTy);
2165
2166	// Try to shrink a vector of FP constants. This returns nullptr on scalable
2167	// vectors
2168	if (Type *T = shrinkFPConstantVector(V, PreferBFloat))
2169	return T;
2170
2171	return V->getType();
2172	}
2173
2174	bool InstCombiner::canBeCastedExactlyIntToFP(Value V, Type FPTy,
2175	bool IsSigned,
2176	const Instruction CxtI) const* {
2177	Type *SrcTy = V->getType();
2178	assert(SrcTy->isIntOrIntVectorTy() && "Expected an integer type");
2179	int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
2180	int DestNumSigBits = FPTy->getFPMantissaWidth();
2181
2182	// Easy case - if the source integer type has less bits than the FP mantissa,
2183	// then the cast must be exact.
2184	if (SrcSize <= DestNumSigBits)
2185	return true;
2186
2187	// Cast from FP to integer and back to FP is independent of the intermediate
2188	// integer width because of poison on overflow.
2189	Value *F;
2190	if (match(V, P: m_FPToI(Op: m_Value(V&: F)))) {
2191	// If this is uitofp (fptosi F), the source needs an extra bit to avoid
2192	// potential rounding of negative FP input values.
2193	int SrcNumSigBits = F->getType()->getFPMantissaWidth();
2194	if (!IsSigned && match(V, P: m_FPToSI(Op: m_Value())))
2195	SrcNumSigBits++;
2196
2197	// [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
2198	// significant bits than the destination (and make sure neither type is
2199	// weird -- ppc_fp128).
2200	if (SrcNumSigBits > `0` && DestNumSigBits > `0` &&
2201	SrcNumSigBits <= DestNumSigBits)
2202	return true;
2203	}
2204
2205	// Try harder to find if the source integer type has less significant bits.
2206	// Compute number of sign bits or determine trailing zeros.
2207	KnownBits SrcKnown = computeKnownBits(V, CxtI);
2208	int SigBits = (int)SrcTy->getScalarSizeInBits() -
2209	SrcKnown.countMinLeadingZeros() -
2210	SrcKnown.countMinTrailingZeros();
2211	if (SigBits <= DestNumSigBits)
2212	return true;
2213
2214	// For sitofp, the sign maps to the FP sign bit, so only magnitude bits
2215	// (BitWidth - NumSignBits) consume mantissa.
2216	if (IsSigned) {
2217	SigBits = (int)SrcTy->getScalarSizeInBits() - ComputeNumSignBits(Op: V, CxtI);
2218	if (SigBits <= DestNumSigBits)
2219	return true;
2220	}
2221
2222	return false;
2223	}
2224
2225	bool InstCombiner::isKnownExactCastIntToFP(CastInst &I) const {
2226	CastInst::CastOps Opcode = I.getOpcode();
2227	assert((Opcode == CastInst::SIToFP \|\| Opcode == CastInst::UIToFP) &&
2228	"Unexpected cast");
2229	Value *Src = I.getOperand(i_nocapture: `0`);
2230	Type *FPTy = I.getType();
2231	return canBeCastedExactlyIntToFP(V: Src, FPTy, IsSigned: Opcode == CastInst::SIToFP, CxtI: &I);
2232	}
2233
2234	Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) {
2235	if (Instruction *I = commonCastTransforms(CI&: FPT))
2236	return I;
2237
2238	// If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
2239	// simplify this expression to avoid one or more of the trunc/extend
2240	// operations if we can do so without changing the numerical results.
2241	//
2242	// The exact manner in which the widths of the operands interact to limit
2243	// what we can and cannot do safely varies from operation to operation, and
2244	// is explained below in the various case statements.
2245	Type *Ty = FPT.getType();
2246	auto *BO = dyn_cast<BinaryOperator>(Val: FPT.getOperand(i_nocapture: `0`));
2247	if (BO && BO->hasOneUse()) {
2248	Type LHSMinType = getMinimumFPType(V: BO->getOperand(i_nocapture: `0`), PreferredTy: Ty, IC&: this);
2249	Type RHSMinType = getMinimumFPType(V: BO->getOperand(i_nocapture: `1`), PreferredTy: Ty, IC&: this);
2250	unsigned OpWidth = BO->getType()->getFPMantissaWidth();
2251	unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
2252	unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
2253	unsigned SrcWidth = std::max(a: LHSWidth, b: RHSWidth);
2254	unsigned DstWidth = Ty->getFPMantissaWidth();
2255
2256	// Narrowing recomputes the binop in a smaller type, which can overflow to
2257	// inf where the wide op was finite. Therefore we can only keep ninf if
2258	// both the binop and the fptrunc have that flag.
2259	FastMathFlags NarrowFMF = BO->getFastMathFlags();
2260	NarrowFMF.setNoInfs(NarrowFMF.noInfs() && FPT.hasNoInfs());
2261
2262	switch (BO->getOpcode()) {
2263	default: break;
2264	case Instruction::FAdd:
2265	case Instruction::FSub:
2266	// For addition and subtraction, the infinitely precise result can
2267	// essentially be arbitrarily wide; proving that double rounding
2268	// will not occur because the result of OpI is exact (as we will for
2269	// FMul, for example) is hopeless. However, we can* nonetheless*
2270	// frequently know that double rounding cannot occur (or that it is
2271	// innocuous) by taking advantage of the specific structure of
2272	// infinitely-precise results that admit double rounding.
2273	//
2274	// Specifically, if OpWidth >= 2DstWdith+1 and DstWidth is sufficient*
2275	// to represent both sources, we can guarantee that the double
2276	// rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
2277	// "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
2278	// for proof of this fact).
2279	//
2280	// Note: Figueroa does not consider the case where DstFormat !=
2281	// SrcFormat. It's possible (likely even!) that this analysis
2282	// could be tightened for those cases, but they are rare (the main
2283	// case of interest here is (float)((double)float + float)).
2284	if (OpWidth >= `2`*DstWidth+`1` && DstWidth >= SrcWidth) {
2285	Value *LHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: `0`), DestTy: Ty);
2286	Value *RHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: `1`), DestTy: Ty);
2287	Instruction *RI = BinaryOperator::Create(Op: BO->getOpcode(), S1: LHS, S2: RHS);
2288	RI->setFastMathFlags(NarrowFMF);
2289	return RI;
2290	}
2291	break;
2292	case Instruction::FMul:
2293	// For multiplication, the infinitely precise result has at most
2294	// LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
2295	// that such a value can be exactly represented, then no double
2296	// rounding can possibly occur; we can safely perform the operation
2297	// in the destination format if it can represent both sources.
2298	if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
2299	Value *LHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: `0`), DestTy: Ty);
2300	Value *RHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: `1`), DestTy: Ty);
2301	return BinaryOperator::CreateFMulFMF(V1: LHS, V2: RHS, FMF: NarrowFMF);
2302	}
2303	break;
2304	case Instruction::FDiv:
2305	// For division, we use again use the bound from Figueroa's
2306	// dissertation. I am entirely certain that this bound can be
2307	// tightened in the unbalanced operand case by an analysis based on
2308	// the diophantine rational approximation bound, but the well-known
2309	// condition used here is a good conservative first pass.
2310	// TODO: Tighten bound via rigorous analysis of the unbalanced case.
2311	if (OpWidth >= `2`*DstWidth && DstWidth >= SrcWidth) {
2312	Value *LHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: `0`), DestTy: Ty);
2313	Value *RHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: `1`), DestTy: Ty);
2314	return BinaryOperator::CreateFDivFMF(V1: LHS, V2: RHS, FMF: NarrowFMF);
2315	}
2316	break;
2317	case Instruction::FRem: {
2318	// Remainder is straightforward. Remainder is always exact, so the
2319	// type of OpI doesn't enter into things at all. We simply evaluate
2320	// in whichever source type is larger, then convert to the
2321	// destination type.
2322	if (SrcWidth == OpWidth)
2323	break;
2324	Value LHS, RHS;
2325	if (LHSWidth == SrcWidth) {
2326	LHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: `0`), DestTy: LHSMinType);
2327	RHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: `1`), DestTy: LHSMinType);
2328	} else {
2329	LHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: `0`), DestTy: RHSMinType);
2330	RHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: `1`), DestTy: RHSMinType);
2331	}
2332
2333	Value *ExactResult = Builder.CreateFRemFMF(L: LHS, R: RHS, FMFSource: BO);
2334	return CastInst::CreateFPCast(S: ExactResult, Ty);
2335	}
2336	}
2337	}
2338
2339	// (fptrunc (fneg x)) -> (fneg (fptrunc x))
2340	Value *X;
2341	Instruction *Op = dyn_cast<Instruction>(Val: FPT.getOperand(i_nocapture: `0`));
2342	if (Op && Op->hasOneUse()) {
2343	FastMathFlags FMF = FPT.getFastMathFlags();
2344	if (auto *FPMO = dyn_cast<FPMathOperator>(Val: Op))
2345	FMF &= FPMO->getFastMathFlags();
2346
2347	if (match(V: Op, P: m_FNeg(X: m_Value(V&: X)))) {
2348	Value *InnerTrunc = Builder.CreateFPTruncFMF(V: X, DestTy: Ty, FMFSource: FMF);
2349	Value *Neg = Builder.CreateFNegFMF(V: InnerTrunc, FMFSource: FMF);
2350	return replaceInstUsesWith(I&: FPT, V: Neg);
2351	}
2352
2353	// If we are truncating a select that has an extended operand, we can
2354	// narrow the other operand and do the select as a narrow op.
2355	Value Cond, X, *Y;
2356	if (match(V: Op, P: m_Select(C: m_Value(V&: Cond), L: m_FPExt(Op: m_SpecificType(RefTy: Ty, V&: X)),
2357	R: m_Value(V&: Y)))) {
2358	// fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
2359	Value *NarrowY = Builder.CreateFPTruncFMF(V: Y, DestTy: Ty, FMFSource: FMF);
2360	Value *Sel =
2361	Builder.CreateSelectFMF(C: Cond, True: X, False: NarrowY, FMFSource: FMF, Name: "narrow.sel", MDFrom: Op);
2362	return replaceInstUsesWith(I&: FPT, V: Sel);
2363	}
2364	if (match(V: Op, P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: Y),
2365	R: m_FPExt(Op: m_SpecificType(RefTy: Ty, V&: X))))) {
2366	// fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
2367	Value *NarrowY = Builder.CreateFPTruncFMF(V: Y, DestTy: Ty, FMFSource: FMF);
2368	Value *Sel =
2369	Builder.CreateSelectFMF(C: Cond, True: NarrowY, False: X, FMFSource: FMF, Name: "narrow.sel", MDFrom: Op);
2370	return replaceInstUsesWith(I&: FPT, V: Sel);
2371	}
2372	}
2373
2374	if (auto *II = dyn_cast<IntrinsicInst>(Val: FPT.getOperand(i_nocapture: `0`))) {
2375	switch (II->getIntrinsicID()) {
2376	default: break;
2377	case Intrinsic::ceil:
2378	case Intrinsic::fabs:
2379	case Intrinsic::floor:
2380	case Intrinsic::nearbyint:
2381	case Intrinsic::rint:
2382	case Intrinsic::round:
2383	case Intrinsic::roundeven:
2384	case Intrinsic::trunc: {
2385	Value *Src = II->getArgOperand(i: `0`);
2386	if (!Src->hasOneUse())
2387	break;
2388
2389	// Except for fabs, this transformation requires the input of the unary FP
2390	// operation to be itself an fpext from the type to which we're
2391	// truncating.
2392	if (II->getIntrinsicID() != Intrinsic::fabs) {
2393	FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Val: Src);
2394	if (!FPExtSrc \|\| FPExtSrc->getSrcTy() != Ty)
2395	break;
2396	}
2397
2398	// Do unary FP operation on smaller type.
2399	// (fptrunc (fabs x)) -> (fabs (fptrunc x))
2400	Value *InnerTrunc = Builder.CreateFPTrunc(V: Src, DestTy: Ty);
2401	Function *Overload = Intrinsic::getOrInsertDeclaration(
2402	M: FPT.getModule(), id: II->getIntrinsicID(), OverloadTys: Ty);
2403	SmallVector<OperandBundleDef, `1`> OpBundles;
2404	II->getOperandBundlesAsDefs(Defs&: OpBundles);
2405	CallInst *NewCI =
2406	CallInst::Create(Func: Overload, Args: {InnerTrunc}, Bundles: OpBundles, NameStr: II->getName());
2407	// A normal value may be converted to an infinity. It means that we cannot
2408	// propagate ninf from the intrinsic. So we propagate FMF from fptrunc.
2409	NewCI->copyFastMathFlags(I: &FPT);
2410	return NewCI;
2411	}
2412	}
2413	}
2414
2415	if (Instruction *I = shrinkInsertElt(Trunc&: FPT, Builder))
2416	return I;
2417
2418	Value *Src = FPT.getOperand(i_nocapture: `0`);
2419	if (isa<SIToFPInst>(Val: Src) \|\| isa<UIToFPInst>(Val: Src)) {
2420	auto *FPCast = cast<CastInst>(Val: Src);
2421	if (isKnownExactCastIntToFP(I&: *FPCast))
2422	return CastInst::Create(FPCast->getOpcode(), S: FPCast->getOperand(i_nocapture: `0`), Ty);
2423	}
2424
2425	return nullptr;
2426	}
2427
2428	Instruction *InstCombinerImpl::visitFPExt(CastInst &FPExt) {
2429	// If the source operand is a cast from integer to FP and known exact, then
2430	// cast the integer operand directly to the destination type.
2431	Type *Ty = FPExt.getType();
2432	Value *Src = FPExt.getOperand(i_nocapture: `0`);
2433	if (isa<SIToFPInst>(Val: Src) \|\| isa<UIToFPInst>(Val: Src)) {
2434	auto *FPCast = cast<CastInst>(Val: Src);
2435	if (isKnownExactCastIntToFP(I&: *FPCast))
2436	return CastInst::Create(FPCast->getOpcode(), S: FPCast->getOperand(i_nocapture: `0`), Ty);
2437	}
2438
2439	return commonCastTransforms(CI&: FPExt);
2440	}
2441
2442	/// fpto{s/u}i[.sat]({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
2443	/// This is safe if the intermediate type has enough bits in its mantissa to
2444	/// accurately represent all values of X. For example, this won't work with
2445	/// i64 -> float -> i64.
2446	template <typename FPToIntTy>
2447	Instruction *InstCombinerImpl::foldItoFPtoI(FPToIntTy &FI) {
2448	constexpr bool IsSaturating = std::is_same_v<FPToIntTy, IntrinsicInst>;
2449
2450	if (!isa<UIToFPInst>(FI.getOperand(`0`)) && !isa<SIToFPInst>(FI.getOperand(`0`)))
2451	return nullptr;
2452
2453	auto *OpI = cast<CastInst>(FI.getOperand(`0`));
2454	Value *X = OpI->getOperand(`0`);
2455	Type *XType = X->getType();
2456	Type *DestType = FI.getType();
2457	bool IsInputSigned = isa<SIToFPInst>(OpI);
2458
2459	bool IsOutputSigned;
2460	if constexpr (IsSaturating)
2461	IsOutputSigned = FI.getIntrinsicID() == Intrinsic::fptosi_sat;
2462	else
2463	IsOutputSigned = isa<FPToSIInst>(FI);
2464
2465	// Since we can assume the conversion won't overflow, our decision as to
2466	// whether the input will fit in the float should depend on the minimum
2467	// of the input range and output range.
2468
2469	// This means this is also safe for a signed input and unsigned output, since
2470	// a negative input would lead to undefined behavior.
2471	if (!isKnownExactCastIntToFP(I&: *OpI)) {
2472	if constexpr (!IsSaturating) {
2473	// The first cast may not round exactly based on the source integer width
2474	// and FP width, but the overflow UB rules can still allow this to fold.
2475	// If the destination type is narrow, that means the intermediate FP value
2476	// must be large enough to hold the source value exactly.
2477	//
2478	// For example, (uint8_t)((float)(uint32_t 16777217) is UB.
2479	int OutputSize = (int)DestType->getScalarSizeInBits();
2480	if (OutputSize > OpI->getType()->getFPMantissaWidth())
2481	return nullptr;
2482	} else {
2483	// Sat intrinsics produce a defined saturated value on overflow, so
2484	// the UB-based shortcut is invalid. Require exactness.
2485	return nullptr;
2486	}
2487	}
2488
2489	unsigned SrcWidth = XType->getScalarSizeInBits();
2490	unsigned DestWidth = DestType->getScalarSizeInBits();
2491
2492	if constexpr (IsSaturating) {
2493	// TODO: cross-sign and narrowing cases could be handled with range
2494	// analysis to prove the source fits in the destination.
2495	if (IsInputSigned != IsOutputSigned \|\| DestWidth < SrcWidth)
2496	return nullptr;
2497	}
2498
2499	if (DestWidth > SrcWidth) {
2500	if (IsInputSigned && IsOutputSigned)
2501	return new SExtInst (X, DestType);
2502	return new ZExtInst (X, DestType);
2503	}
2504	if (DestWidth < SrcWidth)
2505	return new TruncInst (X, DestType);
2506
2507	assert(XType == DestType && "Unexpected types for int to FP to int casts");
2508	return replaceInstUsesWith(I&: FI, V: X);
2509	}
2510
2511	template Instruction *InstCombinerImpl::foldItoFPtoI<CastInst>(CastInst &);
2512	template Instruction *
2513	InstCombinerImpl::foldItoFPtoI<IntrinsicInst>(IntrinsicInst &);
2514
2515	static Instruction *foldFPtoI(Instruction &FI, InstCombiner &IC) {
2516	// fpto{u/s}i non-norm --> 0
2517	FPClassTest Mask =
2518	FI.getOpcode() == Instruction::FPToUI ? fcPosNormal : fcNormal;
2519	KnownFPClass FPClass = computeKnownFPClass(
2520	V: FI.getOperand(i: `0`), InterestedClasses: Mask, SQ: IC.getSimplifyQuery().getWithInstruction(I: &FI));
2521	if (FPClass.isKnownNever(Mask))
2522	return IC.replaceInstUsesWith(I&: FI, V: ConstantInt::getNullValue(Ty: FI.getType()));
2523
2524	// fpto{u/s}i (fdiv ({u/s}itofp X to F), C_fp) --> {u/s}div X, C
2525	//
2526	// F has precision p (significand bits incl. hidden bit); C_fp is the exact FP
2527	// value of the integer constant C. Given N = integer width, this is safe if:
2528	// Unsigned: C > 0 and N <= p.
2529	// Signed: C != 0 and N - 1 <= p, excluding (X == INT_MIN, C == -1) since
2530	// sdiv INT_MIN, -1 is UB while the FP path only yields poison.
2531	// fdiv X, -1 gets transformed to fneg in InstCombine regardless.
2532	//
2533	// The bounds make {u/s}itofp and C_fp exact (every \|int\| <= 2^p is exact),
2534	// and ensure the rounded quotient never crosses an integer boundary:
2535	// Rounding lemma: for 0 <= A <= 2^p, 1 <= B <= 2^p, q = floor(A/B),
2536	// trunc(R_p(A/B)) = q.
2537	// For r = A - qB > 0, m = q+1, half-gap H(m) <= q/2^p and
2538	// m - A/B = (B-r)/B >= 1/B > q/2^p >= H(m), so R_p(A/B) < m; q = 0 is
2539	// similar (H(1) = 2^(-p-1) < 2^-p <= 1/B).
2540	// Signed case: by symmetry R_p(-z) = -R_p(z), so fptosi yields sq = sdiv.*
2541	bool IsSigned = FI.getOpcode() == Instruction::FPToSI;
2542	Value *X;
2543	const APFloat *APF;
2544	if (IsSigned) {
2545	if (!match(V: FI.getOperand(i: `0`),
2546	P: m_OneUse(SubPattern: m_FDiv(L: m_SIToFP(Op: m_Value(V&: X)), R: m_APFloat(Res&: APF)))))
2547	return nullptr;
2548	} else {
2549	if (!match(V: FI.getOperand(i: `0`),
2550	P: m_OneUse(SubPattern: m_FDiv(L: m_UIToFP(Op: m_Value(V&: X)), R: m_APFloat(Res&: APF)))))
2551	return nullptr;
2552	}
2553	Type *IntTy = X->getType();
2554	if (FI.getType() != IntTy)
2555	return nullptr;
2556
2557	unsigned IntWidth = IntTy->getScalarSizeInBits();
2558	unsigned Precision = APFloat::semanticsPrecision(APF->getSemantics());
2559	if (Precision + IsSigned < IntWidth)
2560	return nullptr;
2561
2562	if (!APF->isInteger())
2563	return nullptr;
2564
2565	APSInt Divisor(IntWidth, !IsSigned);
2566	bool IsExact = false;
2567	APF->convertToInteger(Result&: Divisor, RM: APFloat::rmTowardZero, IsExact: &IsExact);
2568	if (!IsExact)
2569	return nullptr;
2570
2571	if (Divisor.isZero())
2572	return nullptr;
2573
2574	// sdiv INT_MIN, -1 is UB, not poison, so this isn't valid if X == INT_MIN.
2575	// fdiv X, -1 gets transformed to fneg anyways, so we do not handle C == -1.
2576	if (IsSigned && Divisor.isAllOnes())
2577	return nullptr;
2578
2579	Constant *C = ConstantInt::get(Ty: IntTy, V: Divisor);
2580	return IsSigned ? BinaryOperator::CreateSDiv(V1: X, V2: C)
2581	: BinaryOperator::CreateUDiv(V1: X, V2: C);
2582	}
2583
2584	Instruction *InstCombinerImpl::visitFPToUI(FPToUIInst &FI) {
2585	if (Instruction *I = foldItoFPtoI(FI))
2586	return I;
2587
2588	if (Instruction I = foldFPtoI(FI, IC&: this))
2589	return I;
2590
2591	return commonCastTransforms(CI&: FI);
2592	}
2593
2594	Instruction *InstCombinerImpl::visitFPToSI(FPToSIInst &FI) {
2595	if (Instruction *I = foldItoFPtoI(FI))
2596	return I;
2597
2598	if (Instruction I = foldFPtoI(FI, IC&: this))
2599	return I;
2600
2601	return commonCastTransforms(CI&: FI);
2602	}
2603
2604	Instruction *InstCombinerImpl::visitUIToFP(CastInst &CI) {
2605	if (Instruction *R = commonCastTransforms(CI))
2606	return R;
2607	if (!CI.hasNonNeg() && isKnownNonNegative(V: CI.getOperand(i_nocapture: `0`), SQ)) {
2608	CI.setNonNeg();
2609	return &CI;
2610	}
2611	return nullptr;
2612	}
2613
2614	Instruction *InstCombinerImpl::visitSIToFP(CastInst &CI) {
2615	if (Instruction *R = commonCastTransforms(CI))
2616	return R;
2617	if (isKnownNonNegative(V: CI.getOperand(i_nocapture: `0`), SQ)) {
2618	auto *UI =
2619	CastInst::Create(Instruction::UIToFP, S: CI.getOperand(i_nocapture: `0`), Ty: CI.getType());
2620	UI->setNonNeg(true);
2621	return UI;
2622	}
2623	return nullptr;
2624	}
2625
2626	Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) {
2627	// If the source integer type is not the intptr_t type for this target, do a
2628	// trunc or zext to the intptr_t type, then inttoptr of it. This allows the
2629	// cast to be exposed to other transforms.
2630	unsigned AS = CI.getAddressSpace();
2631	if (CI.getOperand(i_nocapture: `0`)->getType()->getScalarSizeInBits() !=
2632	DL.getPointerSizeInBits(AS)) {
2633	Type *Ty = CI.getOperand(i_nocapture: `0`)->getType()->getWithNewType(
2634	EltTy: DL.getIntPtrType(C&: CI.getContext(), AddressSpace: AS));
2635	Value *P = Builder.CreateZExtOrTrunc(V: CI.getOperand(i_nocapture: `0`), DestTy: Ty);
2636	return new IntToPtrInst (P, CI.getType());
2637	}
2638
2639	// Replace (inttoptr (add (ptrtoint %Base), %Offset)) with
2640	// (getelementptr i8, %Base, %Offset) if the pointer is only used as integer
2641	// value.
2642	Value *Base;
2643	Value *Offset;
2644	auto UsesPointerAsInt = [](User *U) {
2645	if (isa<ICmpInst, PtrToIntInst>(Val: U))
2646	return true;
2647	if (auto *P = dyn_cast<PHINode>(Val: U))
2648	return P->hasOneUse() && isa<ICmpInst, PtrToIntInst>(Val: *P->user_begin());
2649	return false;
2650	};
2651	if (match(V: CI.getOperand(i_nocapture: `0`),
2652	P: m_OneUse(SubPattern: m_c_Add(L: m_PtrToIntSameSize(DL, Op: m_Value(V&: Base)),
2653	R: m_Value(V&: Offset)))) &&
2654	CI.getType()->getPointerAddressSpace() ==
2655	Base->getType()->getPointerAddressSpace() &&
2656	all_of(Range: CI.users(), P: UsesPointerAsInt)) {
2657	return GetElementPtrInst::Create(PointeeType: Builder.getInt8Ty(), Ptr: Base, IdxList: Offset);
2658	}
2659
2660	if (Instruction *I = commonCastTransforms(CI))
2661	return I;
2662
2663	return nullptr;
2664	}
2665
2666	Value InstCombinerImpl::foldPtrToIntOrAddrOfGEP(Type IntTy, Value *Ptr) {
2667	// Look through chain of one-use GEPs.
2668	Type *PtrTy = Ptr->getType();
2669	SmallVector<GEPOperator *> GEPs;
2670	while (true) {
2671	auto *GEP = dyn_cast<GEPOperator>(Val: Ptr);
2672	if (!GEP \|\| !GEP->hasOneUse())
2673	break;
2674	GEPs.push_back(Elt: GEP);
2675	Ptr = GEP->getPointerOperand();
2676	}
2677
2678	// Don't handle case where GEP converts from pointer to vector.
2679	if (GEPs.empty() \|\| PtrTy != Ptr->getType())
2680	return nullptr;
2681
2682	// Check whether we know the integer value of the base pointer.
2683	Value *Res;
2684	Type *IdxTy = DL.getIndexType(PtrTy);
2685	if (match(V: Ptr, P: m_OneUse(SubPattern: m_IntToPtr(Op: m_Value(V&: Res)))) &&
2686	Res->getType() == IntTy && IntTy == IdxTy) {
2687	// pass
2688	} else if (isa<ConstantPointerNull>(Val: Ptr)) {
2689	Res = Constant::getNullValue(Ty: IdxTy);
2690	} else {
2691	return nullptr;
2692	}
2693
2694	// Perform the entire operation on integers instead.
2695	for (GEPOperator *GEP : reverse(C&: GEPs)) {
2696	Value *Offset = EmitGEPOffset(GEP);
2697	Res = Builder.CreateAdd(LHS: Res, RHS: Offset, Name: "", HasNUW: GEP->hasNoUnsignedWrap());
2698	}
2699	return Builder.CreateZExtOrTrunc(V: Res, DestTy: IntTy);
2700	}
2701
2702	Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
2703	// If the destination integer type is not the intptr_t type for this target,
2704	// do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
2705	// to be exposed to other transforms.
2706	Value *SrcOp = CI.getPointerOperand();
2707	Type *SrcTy = SrcOp->getType();
2708	Type *Ty = CI.getType();
2709	unsigned AS = CI.getPointerAddressSpace();
2710	unsigned TySize = Ty->getScalarSizeInBits();
2711	unsigned PtrSize = DL.getPointerSizeInBits(AS);
2712	if (TySize != PtrSize) {
2713	Type *IntPtrTy =
2714	SrcTy->getWithNewType(EltTy: DL.getIntPtrType(C&: CI.getContext(), AddressSpace: AS));
2715	Value *P = Builder.CreatePtrToInt(V: SrcOp, DestTy: IntPtrTy);
2716	return CastInst::CreateIntegerCast(S: P, Ty, /isSigned=/false);
2717	}
2718
2719	// (ptrtoint (ptrmask P, M))
2720	// -> (and (ptrtoint P), M)
2721	// This is generally beneficial as `and` is better supported than `ptrmask`.
2722	Value Ptr, Mask;
2723	if (match(V: SrcOp, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ptrmask>(
2724	Ops: m_Value(V&: Ptr), Ops: m_SpecificType(RefTy: Ty, V&: Mask)))))
2725	return BinaryOperator::CreateAnd(V1: Builder.CreatePtrToInt(V: Ptr, DestTy: Ty), V2: Mask);
2726
2727	if (Value *V = foldPtrToIntOrAddrOfGEP(IntTy: Ty, Ptr: SrcOp))
2728	return replaceInstUsesWith(I&: CI, V);
2729
2730	Value Vec, Scalar, *Index;
2731	if (match(V: SrcOp, P: m_OneUse(SubPattern: m_InsertElt(Val: m_IntToPtr(Op: m_SpecificType(RefTy: Ty, V&: Vec)),
2732	Elt: m_Value(V&: Scalar), Idx: m_Value(V&: Index))))) {
2733	assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
2734	// Convert the scalar to int followed by insert to eliminate one cast:
2735	// p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
2736	Value *NewCast = Builder.CreatePtrToInt(V: Scalar, DestTy: Ty->getScalarType());
2737	return InsertElementInst::Create(Vec, NewElt: NewCast, Idx: Index);
2738	}
2739
2740	return commonCastTransforms(CI);
2741	}
2742
2743	Instruction *InstCombinerImpl::visitPtrToAddr(PtrToAddrInst &CI) {
2744	Value *SrcOp = CI.getPointerOperand();
2745	Type *Ty = CI.getType();
2746
2747	// (ptrtoaddr (ptrmask P, M))
2748	// -> (and (ptrtoaddr P), M)
2749	// This is generally beneficial as `and` is better supported than `ptrmask`.
2750	Value Ptr, Mask;
2751	if (match(V: SrcOp, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ptrmask>(
2752	Ops: m_Value(V&: Ptr), Ops: m_SpecificType(RefTy: Ty, V&: Mask)))))
2753	return BinaryOperator::CreateAnd(V1: Builder.CreatePtrToAddr(V: Ptr), V2: Mask);
2754
2755	if (Value *V = foldPtrToIntOrAddrOfGEP(IntTy: Ty, Ptr: SrcOp))
2756	return replaceInstUsesWith(I&: CI, V);
2757
2758	// FIXME: Implement variants of ptrtoint folds.
2759	return commonCastTransforms(CI);
2760	}
2761
2762	/// This input value (which is known to have vector type) is being zero extended
2763	/// or truncated to the specified vector type. Since the zext/trunc is done
2764	/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
2765	/// endianness will impact which end of the vector that is extended or
2766	/// truncated.
2767	///
2768	/// A vector is always stored with index 0 at the lowest address, which
2769	/// corresponds to the most significant bits for a big endian stored integer and
2770	/// the least significant bits for little endian. A trunc/zext of an integer
2771	/// impacts the big end of the integer. Thus, we need to add/remove elements at
2772	/// the front of the vector for big endian targets, and the back of the vector
2773	/// for little endian targets.
2774	///
2775	/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
2776	///
2777	/// The source and destination vector types may have different element types.
2778	static Instruction *
2779	optimizeVectorResizeWithIntegerBitCasts(Value InVal, VectorType DestTy,
2780	InstCombinerImpl &IC) {
2781	// We can only do this optimization if the output is a multiple of the input
2782	// element size, or the input is a multiple of the output element size.
2783	// Convert the input type to have the same element type as the output.
2784	VectorType *SrcTy = cast<VectorType>(Val: InVal->getType());
2785
2786	if (SrcTy->getElementType() != DestTy->getElementType()) {
2787	// The input types don't need to be identical, but for now they must be the
2788	// same size. There is no specific reason we couldn't handle things like
2789	// <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
2790	// there yet.
2791	if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
2792	DestTy->getElementType()->getPrimitiveSizeInBits())
2793	return nullptr;
2794
2795	SrcTy =
2796	FixedVectorType::get(ElementType: DestTy->getElementType(),
2797	NumElts: cast<FixedVectorType>(Val: SrcTy)->getNumElements());
2798	InVal = IC.Builder.CreateBitCast(V: InVal, DestTy: SrcTy);
2799	}
2800
2801	bool IsBigEndian = IC.getDataLayout().isBigEndian();
2802	unsigned SrcElts = cast<FixedVectorType>(Val: SrcTy)->getNumElements();
2803	unsigned DestElts = cast<FixedVectorType>(Val: DestTy)->getNumElements();
2804
2805	assert(SrcElts != DestElts && "Element counts should be different.");
2806
2807	// Now that the element types match, get the shuffle mask and RHS of the
2808	// shuffle to use, which depends on whether we're increasing or decreasing the
2809	// size of the input.
2810	auto ShuffleMaskStorage = llvm::to_vector<`16`>(Range: llvm::seq<int>(Begin: `0`, End: SrcElts));
2811	ArrayRef<int> ShuffleMask;
2812	Value *V2;
2813
2814	if (SrcElts > DestElts) {
2815	// If we're shrinking the number of elements (rewriting an integer
2816	// truncate), just shuffle in the elements corresponding to the least
2817	// significant bits from the input and use poison as the second shuffle
2818	// input.
2819	V2 = PoisonValue::get(T: SrcTy);
2820	// Make sure the shuffle mask selects the "least significant bits" by
2821	// keeping elements from back of the src vector for big endian, and from the
2822	// front for little endian.
2823	ShuffleMask = ShuffleMaskStorage;
2824	if (IsBigEndian)
2825	ShuffleMask = ShuffleMask.take_back(N: DestElts);
2826	else
2827	ShuffleMask = ShuffleMask.take_front(N: DestElts);
2828	} else {
2829	// If we're increasing the number of elements (rewriting an integer zext),
2830	// shuffle in all of the elements from InVal. Fill the rest of the result
2831	// elements with zeros from a constant zero.
2832	V2 = Constant::getNullValue(Ty: SrcTy);
2833	// Use first elt from V2 when indicating zero in the shuffle mask.
2834	uint32_t NullElt = SrcElts;
2835	// Extend with null values in the "most significant bits" by adding elements
2836	// in front of the src vector for big endian, and at the back for little
2837	// endian.
2838	unsigned DeltaElts = DestElts - SrcElts;
2839	if (IsBigEndian)
2840	ShuffleMaskStorage.insert(I: ShuffleMaskStorage.begin(), NumToInsert: DeltaElts, Elt: NullElt);
2841	else
2842	ShuffleMaskStorage.append(NumInputs: DeltaElts, Elt: NullElt);
2843	ShuffleMask = ShuffleMaskStorage;
2844	}
2845
2846	return new ShuffleVectorInst (InVal, V2, ShuffleMask);
2847	}
2848
2849	static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
2850	return Value % Ty->getPrimitiveSizeInBits() == `0`;
2851	}
2852
2853	static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
2854	return Value / Ty->getPrimitiveSizeInBits();
2855	}
2856
2857	/// V is a value which is inserted into a vector of VecEltTy.
2858	/// Look through the value to see if we can decompose it into
2859	/// insertions into the vector. See the example in the comment for
2860	/// OptimizeIntegerToVectorInsertions for the pattern this handles.
2861	/// The type of V is always a non-zero multiple of VecEltTy's size.
2862	/// Shift is the number of bits between the lsb of V and the lsb of
2863	/// the vector.
2864	///
2865	/// This returns false if the pattern can't be matched or true if it can,
2866	/// filling in Elements with the elements found here.
2867	static bool collectInsertionElements(Value V, unsigned* Shift,
2868	SmallVectorImpl<Value *> &Elements,
2869	Type VecEltTy, bool* isBigEndian) {
2870	assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
2871	"Shift should be a multiple of the element type size");
2872
2873	// Poison values never contribute useful bits to the result.
2874	if (match(V, P: m_Poison()))
2875	return true;
2876
2877	// If we got down to a value of the right type, we win, try inserting into the
2878	// right element.
2879	if (V->getType() == VecEltTy) {
2880	// Inserting null doesn't actually insert any elements.
2881	if (Constant *C = dyn_cast<Constant>(Val: V))
2882	if (C->isNullValue())
2883	return true;
2884
2885	unsigned ElementIndex = getTypeSizeIndex(Value: Shift, Ty: VecEltTy);
2886	if (isBigEndian)
2887	ElementIndex = Elements.size() - ElementIndex - `1`;
2888
2889	// Fail if multiple elements are inserted into this slot.
2890	if (Elements [ElementIndex])
2891	return false;
2892
2893	Elements [ElementIndex] = V;
2894	return true;
2895	}
2896
2897	if (Constant *C = dyn_cast<Constant>(Val: V)) {
2898	// Figure out the # elements this provides, and bitcast it or slice it up
2899	// as required.
2900	unsigned NumElts = getTypeSizeIndex(Value: C->getType()->getPrimitiveSizeInBits(),
2901	Ty: VecEltTy);
2902	// If the constant is the size of a vector element, we just need to bitcast
2903	// it to the right type so it gets properly inserted.
2904	if (NumElts == `1`)
2905	return collectInsertionElements(V: ConstantExpr::getBitCast(C, Ty: VecEltTy),
2906	Shift, Elements, VecEltTy, isBigEndian);
2907
2908	// Okay, this is a constant that covers multiple elements. Slice it up into
2909	// pieces and insert each element-sized piece into the vector.
2910	if (!isa<IntegerType>(Val: C->getType()))
2911	C = ConstantExpr::getBitCast(C, Ty: IntegerType::get(C&: V->getContext(),
2912	NumBits: C->getType()->getPrimitiveSizeInBits()));
2913	unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
2914	Type *ElementIntTy = IntegerType::get(C&: C->getContext(), NumBits: ElementSize);
2915
2916	for (unsigned i = `0`; i != NumElts; ++i) {
2917	unsigned ShiftI = i * ElementSize;
2918	Constant *Piece = ConstantFoldBinaryInstruction(
2919	Opcode: Instruction::LShr, V1: C, V2: ConstantInt::get(Ty: C->getType(), V: ShiftI));
2920	if (!Piece)
2921	return false;
2922
2923	Piece = ConstantExpr::getTrunc(C: Piece, Ty: ElementIntTy);
2924	if (!collectInsertionElements(V: Piece, Shift: ShiftI + Shift, Elements, VecEltTy,
2925	isBigEndian))
2926	return false;
2927	}
2928	return true;
2929	}
2930
2931	if (!V->hasOneUse()) return false;
2932
2933	Instruction *I = dyn_cast<Instruction>(Val: V);
2934	if (!I) return false;
2935	switch (I->getOpcode()) {
2936	default: return false; // Unhandled case.
2937	case Instruction::BitCast:
2938	if (I->getOperand(i: `0`)->getType()->isVectorTy())
2939	return false;
2940	return collectInsertionElements(V: I->getOperand(i: `0`), Shift, Elements, VecEltTy,
2941	isBigEndian);
2942	case Instruction::ZExt:
2943	if (!isMultipleOfTypeSize(
2944	Value: I->getOperand(i: `0`)->getType()->getPrimitiveSizeInBits(),
2945	Ty: VecEltTy))
2946	return false;
2947	return collectInsertionElements(V: I->getOperand(i: `0`), Shift, Elements, VecEltTy,
2948	isBigEndian);
2949	case Instruction::Or:
2950	return collectInsertionElements(V: I->getOperand(i: `0`), Shift, Elements, VecEltTy,
2951	isBigEndian) &&
2952	collectInsertionElements(V: I->getOperand(i: `1`), Shift, Elements, VecEltTy,
2953	isBigEndian);
2954	case Instruction::Shl: {
2955	// Must be shifting by a constant that is a multiple of the element size.
2956	ConstantInt *CI = dyn_cast<ConstantInt>(Val: I->getOperand(i: `1`));
2957	if (!CI) return false;
2958	Shift += CI->getZExtValue();
2959	if (!isMultipleOfTypeSize(Value: Shift, Ty: VecEltTy)) return false;
2960	return collectInsertionElements(V: I->getOperand(i: `0`), Shift, Elements, VecEltTy,
2961	isBigEndian);
2962	}
2963
2964	}
2965	}
2966
2967
2968	/// If the input is an 'or' instruction, we may be doing shifts and ors to
2969	/// assemble the elements of the vector manually.
2970	/// Try to rip the code out and replace it with insertelements. This is to
2971	/// optimize code like this:
2972	///
2973	/// %tmp37 = bitcast float %inc to i32
2974	/// %tmp38 = zext i32 %tmp37 to i64
2975	/// %tmp31 = bitcast float %inc5 to i32
2976	/// %tmp32 = zext i32 %tmp31 to i64
2977	/// %tmp33 = shl i64 %tmp32, 32
2978	/// %ins35 = or i64 %tmp33, %tmp38
2979	/// %tmp43 = bitcast i64 %ins35 to <2 x float>
2980	///
2981	/// Into two insertelements that do "buildvector{%inc, %inc5}".
2982	static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,
2983	InstCombinerImpl &IC) {
2984	auto *DestVecTy = cast<FixedVectorType>(Val: CI.getType());
2985	Value *IntInput = CI.getOperand(i_nocapture: `0`);
2986
2987	// if the int input is just an undef value do not try to optimize to vector
2988	// insertions as it will prevent undef propagation
2989	if (isa<UndefValue>(Val: IntInput))
2990	return nullptr;
2991
2992	SmallVector<Value*, `8`> Elements(DestVecTy->getNumElements());
2993	if (!collectInsertionElements(V: IntInput, Shift: `0`, Elements,
2994	VecEltTy: DestVecTy->getElementType(),
2995	isBigEndian: IC.getDataLayout().isBigEndian()))
2996	return nullptr;
2997
2998	// If we succeeded, we know that all of the element are specified by Elements
2999	// or are zero if Elements has a null entry. Recast this as a set of
3000	// insertions.
3001	Value *Result = Constant::getNullValue(Ty: CI.getType());
3002	for (unsigned i = `0`, e = Elements.size(); i != e; ++i) {
3003	if (!Elements [i]) continue; // Unset element.
3004
3005	Result = IC.Builder.CreateInsertElement(Vec: Result, NewElt: Elements [i], Idx: i);
3006	}
3007
3008	return Result;
3009	}
3010
3011	/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
3012	/// vector followed by extract element. The backend tends to handle bitcasts of
3013	/// vectors better than bitcasts of scalars because vector registers are
3014	/// usually not type-specific like scalar integer or scalar floating-point.
3015	static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast,
3016	InstCombinerImpl &IC) {
3017	Value VecOp, Index;
3018	if (!match(V: BitCast.getOperand(i_nocapture: `0`),
3019	P: m_OneUse(SubPattern: m_ExtractElt(Val: m_Value(V&: VecOp), Idx: m_Value(V&: Index)))))
3020	return nullptr;
3021
3022	// The bitcast must be to a vectorizable type, otherwise we can't make a new
3023	// type to extract from.
3024	Type *DestType = BitCast.getType();
3025	VectorType *VecType = cast<VectorType>(Val: VecOp->getType());
3026	if (VectorType::isValidElementType(ElemTy: DestType)) {
3027	auto *NewVecType = VectorType::get(ElementType: DestType, Other: VecType);
3028	auto *NewBC = IC.Builder.CreateBitCast(V: VecOp, DestTy: NewVecType, Name: "bc");
3029	return ExtractElementInst::Create(Vec: NewBC, Idx: Index);
3030	}
3031
3032	// Only solve DestType is vector to avoid inverse transform in visitBitCast.
3033	// bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest)
3034	auto *FixedVType = dyn_cast<FixedVectorType>(Val: VecType);
3035	if (DestType->isVectorTy() && FixedVType && FixedVType->getNumElements() == `1`)
3036	return CastInst::Create(Instruction::BitCast, S: VecOp, Ty: DestType);
3037
3038	return nullptr;
3039	}
3040
3041	/// Change the type of a bitwise logic operation if we can eliminate a bitcast.
3042	static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast,
3043	InstCombiner::BuilderTy &Builder) {
3044	Type *DestTy = BitCast.getType();
3045	BinaryOperator *BO;
3046
3047	if (!match(V: BitCast.getOperand(i_nocapture: `0`), P: m_OneUse(SubPattern: m_BinOp(I&: BO))) \|\|
3048	!BO->isBitwiseLogicOp())
3049	return nullptr;
3050
3051	// FIXME: This transform is restricted to vector types to avoid backend
3052	// problems caused by creating potentially illegal operations. If a fix-up is
3053	// added to handle that situation, we can remove this check.
3054	if (!DestTy->isVectorTy() \|\| !BO->getType()->isVectorTy())
3055	return nullptr;
3056
3057	if (DestTy->isFPOrFPVectorTy()) {
3058	Value X, Y;
3059	// bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y))
3060	if (match(V: BO->getOperand(i_nocapture: `0`), P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: X)))) &&
3061	match(V: BO->getOperand(i_nocapture: `1`), P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: Y))))) {
3062	if (X->getType()->isFPOrFPVectorTy() &&
3063	Y->getType()->isIntOrIntVectorTy()) {
3064	Value *CastedOp =
3065	Builder.CreateBitCast(V: BO->getOperand(i_nocapture: `0`), DestTy: Y->getType());
3066	Value *NewBO = Builder.CreateBinOp(Opc: BO->getOpcode(), LHS: CastedOp, RHS: Y);
3067	return CastInst::CreateBitOrPointerCast(S: NewBO, Ty: DestTy);
3068	}
3069	if (X->getType()->isIntOrIntVectorTy() &&
3070	Y->getType()->isFPOrFPVectorTy()) {
3071	Value *CastedOp =
3072	Builder.CreateBitCast(V: BO->getOperand(i_nocapture: `1`), DestTy: X->getType());
3073	Value *NewBO = Builder.CreateBinOp(Opc: BO->getOpcode(), LHS: CastedOp, RHS: X);
3074	return CastInst::CreateBitOrPointerCast(S: NewBO, Ty: DestTy);
3075	}
3076	}
3077	return nullptr;
3078	}
3079
3080	if (!DestTy->isIntOrIntVectorTy())
3081	return nullptr;
3082
3083	Value *X;
3084	if (match(V: BO->getOperand(i_nocapture: `0`),
3085	P: m_OneUse(SubPattern: m_BitCast(Op: m_SpecificType(RefTy: DestTy, V&: X)))) &&
3086	!isa<Constant>(Val: X)) {
3087	// bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
3088	Value *CastedOp1 = Builder.CreateBitCast(V: BO->getOperand(i_nocapture: `1`), DestTy);
3089	return BinaryOperator::Create(Op: BO->getOpcode(), S1: X, S2: CastedOp1);
3090	}
3091
3092	if (match(V: BO->getOperand(i_nocapture: `1`),
3093	P: m_OneUse(SubPattern: m_BitCast(Op: m_SpecificType(RefTy: DestTy, V&: X)))) &&
3094	!isa<Constant>(Val: X)) {
3095	// bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
3096	Value *CastedOp0 = Builder.CreateBitCast(V: BO->getOperand(i_nocapture: `0`), DestTy);
3097	return BinaryOperator::Create(Op: BO->getOpcode(), S1: CastedOp0, S2: X);
3098	}
3099
3100	// Canonicalize vector bitcasts to come before vector bitwise logic with a
3101	// constant. This eases recognition of special constants for later ops.
3102	// Example:
3103	// icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
3104	Constant *C;
3105	if (match(V: BO->getOperand(i_nocapture: `1`), P: m_Constant(C))) {
3106	// bitcast (logic X, C) --> logic (bitcast X, C')
3107	Value *CastedOp0 = Builder.CreateBitCast(V: BO->getOperand(i_nocapture: `0`), DestTy);
3108	Value *CastedC = Builder.CreateBitCast(V: C, DestTy);
3109	return BinaryOperator::Create(Op: BO->getOpcode(), S1: CastedOp0, S2: CastedC);
3110	}
3111
3112	return nullptr;
3113	}
3114
3115	/// Change the type of a select if we can eliminate a bitcast.
3116	static Instruction *foldBitCastSelect(BitCastInst &BitCast,
3117	InstCombiner::BuilderTy &Builder) {
3118	Value Cond, TVal, *FVal;
3119	if (!match(V: BitCast.getOperand(i_nocapture: `0`),
3120	P: m_OneUse(SubPattern: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: TVal), R: m_Value(V&: FVal)))))
3121	return nullptr;
3122
3123	// A vector select must maintain the same number of elements in its operands.
3124	Type *CondTy = Cond->getType();
3125	Type *DestTy = BitCast.getType();
3126
3127	auto *DestVecTy = dyn_cast<VectorType>(Val: DestTy);
3128
3129	if (auto *CondVTy = dyn_cast<VectorType>(Val: CondTy))
3130	if (!DestVecTy \|\|
3131	CondVTy->getElementCount() != DestVecTy->getElementCount())
3132	return nullptr;
3133
3134	auto *Sel = cast<Instruction>(Val: BitCast.getOperand(i_nocapture: `0`));
3135	auto *SrcVecTy = dyn_cast<VectorType>(Val: TVal->getType());
3136
3137	if ((isa<Constant>(Val: TVal) \|\| isa<Constant>(Val: FVal)) &&
3138	(!DestVecTy \|\|
3139	(SrcVecTy && ElementCount::isKnownLE(LHS: DestVecTy->getElementCount(),
3140	RHS: SrcVecTy->getElementCount())))) {
3141	// Avoid introducing select of vector (or select of vector with more
3142	// elements) until the backend can undo this transformation.
3143	Value *CastedTVal = Builder.CreateBitCast(V: TVal, DestTy);
3144	Value *CastedFVal = Builder.CreateBitCast(V: FVal, DestTy);
3145	return SelectInst::Create(C: Cond, S1: CastedTVal, S2: CastedFVal, NameStr: "", InsertBefore: nullptr, MDFrom: Sel);
3146	}
3147
3148	// FIXME: This transform is restricted from changing the select between
3149	// scalars and vectors to avoid backend problems caused by creating
3150	// potentially illegal operations. If a fix-up is added to handle that
3151	// situation, we can remove this check.
3152	if ((DestVecTy != nullptr) != (SrcVecTy != nullptr))
3153	return nullptr;
3154
3155	Value *X;
3156	if (match(V: TVal, P: m_OneUse(SubPattern: m_BitCast(Op: m_SpecificType(RefTy: DestTy, V&: X)))) &&
3157	!isa<Constant>(Val: X)) {
3158	// bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
3159	Value *CastedVal = Builder.CreateBitCast(V: FVal, DestTy);
3160	return SelectInst::Create(C: Cond, S1: X, S2: CastedVal, NameStr: "", InsertBefore: nullptr, MDFrom: Sel);
3161	}
3162
3163	if (match(V: FVal, P: m_OneUse(SubPattern: m_BitCast(Op: m_SpecificType(RefTy: DestTy, V&: X)))) &&
3164	!isa<Constant>(Val: X)) {
3165	// bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
3166	Value *CastedVal = Builder.CreateBitCast(V: TVal, DestTy);
3167	return SelectInst::Create(C: Cond, S1: CastedVal, S2: X, NameStr: "", InsertBefore: nullptr, MDFrom: Sel);
3168	}
3169
3170	return nullptr;
3171	}
3172
3173	/// Check if all users of CI are StoreInsts.
3174	static bool hasStoreUsersOnly(CastInst &CI) {
3175	for (User *U : CI.users()) {
3176	if (!isa<StoreInst>(Val: U))
3177	return false;
3178	}
3179	return true;
3180	}
3181
3182	/// This function handles following case
3183	///
3184	/// A -> B cast
3185	/// PHI
3186	/// B -> A cast
3187	///
3188	/// All the related PHI nodes can be replaced by new PHI nodes with type A.
3189	/// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
3190	Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
3191	PHINode *PN) {
3192	// BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
3193	if (hasStoreUsersOnly(CI))
3194	return nullptr;
3195
3196	Value *Src = CI.getOperand(i_nocapture: `0`);
3197	Type SrcTy = Src->getType(); // Type B*
3198	Type DestTy = CI.getType(); // Type A*
3199
3200	SmallVector<PHINode *, `4`> PhiWorklist;
3201	SmallSetVector<PHINode *, `4`> OldPhiNodes;
3202
3203	// Find all of the A->B casts and PHI nodes.
3204	// We need to inspect all related PHI nodes, but PHIs can be cyclic, so
3205	// OldPhiNodes is used to track all known PHI nodes, before adding a new
3206	// PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
3207	PhiWorklist.push_back(Elt: PN);
3208	OldPhiNodes.insert(X: PN);
3209	while (!PhiWorklist.empty()) {
3210	auto *OldPN = PhiWorklist.pop_back_val();
3211	for (Value *IncValue : OldPN->incoming_values()) {
3212	if (isa<Constant>(Val: IncValue))
3213	continue;
3214
3215	if (auto *LI = dyn_cast<LoadInst>(Val: IncValue)) {
3216	// If there is a sequence of one or more load instructions, each loaded
3217	// value is used as address of later load instruction, bitcast is
3218	// necessary to change the value type, don't optimize it. For
3219	// simplicity we give up if the load address comes from another load.
3220	Value *Addr = LI->getOperand(i_nocapture: `0`);
3221	if (Addr == &CI \|\| isa<LoadInst>(Val: Addr))
3222	return nullptr;
3223	// Don't tranform "load <256 x i32>, <256 x i32>" to*
3224	// "load x86_amx, x86_amx", because x86_amx* is invalid.*
3225	// TODO: Remove this check when bitcast between vector and x86_amx
3226	// is replaced with a specific intrinsic.
3227	if (DestTy->isX86_AMXTy())
3228	return nullptr;
3229	if (LI->hasOneUse() && LI->isSimple())
3230	continue;
3231	// If a LoadInst has more than one use, changing the type of loaded
3232	// value may create another bitcast.
3233	return nullptr;
3234	}
3235
3236	if (auto *PNode = dyn_cast<PHINode>(Val: IncValue)) {
3237	if (OldPhiNodes.insert(X: PNode))
3238	PhiWorklist.push_back(Elt: PNode);
3239	continue;
3240	}
3241
3242	auto *BCI = dyn_cast<BitCastInst>(Val: IncValue);
3243	// We can't handle other instructions.
3244	if (!BCI)
3245	return nullptr;
3246
3247	// Verify it's a A->B cast.
3248	Type *TyA = BCI->getOperand(i_nocapture: `0`)->getType();
3249	Type *TyB = BCI->getType();
3250	if (TyA != DestTy \|\| TyB != SrcTy)
3251	return nullptr;
3252	}
3253	}
3254
3255	// Check that each user of each old PHI node is something that we can
3256	// rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
3257	for (auto *OldPN : OldPhiNodes) {
3258	for (User *V : OldPN->users()) {
3259	if (auto *SI = dyn_cast<StoreInst>(Val: V)) {
3260	if (!SI->isSimple() \|\| SI->getOperand(i_nocapture: `0`) != OldPN)
3261	return nullptr;
3262	} else if (auto *BCI = dyn_cast<BitCastInst>(Val: V)) {
3263	// Verify it's a B->A cast.
3264	Type *TyB = BCI->getOperand(i_nocapture: `0`)->getType();
3265	Type *TyA = BCI->getType();
3266	if (TyA != DestTy \|\| TyB != SrcTy)
3267	return nullptr;
3268	} else if (auto *PHI = dyn_cast<PHINode>(Val: V)) {
3269	// As long as the user is another old PHI node, then even if we don't
3270	// rewrite it, the PHI web we're considering won't have any users
3271	// outside itself, so it'll be dead.
3272	if (!OldPhiNodes.contains(key: PHI))
3273	return nullptr;
3274	} else {
3275	return nullptr;
3276	}
3277	}
3278	}
3279
3280	// For each old PHI node, create a corresponding new PHI node with a type A.
3281	SmallDenseMap<PHINode , PHINode > NewPNodes;
3282	for (auto *OldPN : OldPhiNodes) {
3283	Builder.SetInsertPoint(OldPN);
3284	PHINode *NewPN = Builder.CreatePHI(Ty: DestTy, NumReservedValues: OldPN->getNumOperands());
3285	NewPNodes [OldPN] = NewPN;
3286	}
3287
3288	// Fill in the operands of new PHI nodes.
3289	for (auto *OldPN : OldPhiNodes) {
3290	PHINode *NewPN = NewPNodes [OldPN];
3291	for (unsigned j = `0`, e = OldPN->getNumOperands(); j != e; ++j) {
3292	Value *V = OldPN->getOperand(i_nocapture: j);
3293	Value NewV = nullptr*;
3294	if (auto *C = dyn_cast<Constant>(Val: V)) {
3295	NewV = ConstantExpr::getBitCast(C, Ty: DestTy);
3296	} else if (auto *LI = dyn_cast<LoadInst>(Val: V)) {
3297	// Explicitly perform load combine to make sure no opposing transform
3298	// can remove the bitcast in the meantime and trigger an infinite loop.
3299	Builder.SetInsertPoint(LI);
3300	NewV = combineLoadToNewType(LI&: *LI, NewTy: DestTy);
3301	// Remove the old load and its use in the old phi, which itself becomes
3302	// dead once the whole transform finishes.
3303	replaceInstUsesWith(I&: *LI, V: PoisonValue::get(T: LI->getType()));
3304	eraseInstFromFunction(I&: *LI);
3305	} else if (auto *BCI = dyn_cast<BitCastInst>(Val: V)) {
3306	NewV = BCI->getOperand(i_nocapture: `0`);
3307	} else if (auto *PrevPN = dyn_cast<PHINode>(Val: V)) {
3308	NewV = NewPNodes [PrevPN];
3309	}
3310	assert(NewV);
3311	NewPN->addIncoming(V: NewV, BB: OldPN->getIncomingBlock(i: j));
3312	}
3313	}
3314
3315	// Traverse all accumulated PHI nodes and process its users,
3316	// which are Stores and BitcCasts. Without this processing
3317	// NewPHI nodes could be replicated and could lead to extra
3318	// moves generated after DeSSA.
3319	// If there is a store with type B, change it to type A.
3320
3321
3322	// Replace users of BitCast B->A with NewPHI. These will help
3323	// later to get rid off a closure formed by OldPHI nodes.
3324	Instruction RetVal = nullptr*;
3325	for (auto *OldPN : OldPhiNodes) {
3326	PHINode *NewPN = NewPNodes [OldPN];
3327	for (User *V : make_early_inc_range(Range: OldPN->users())) {
3328	if (auto *SI = dyn_cast<StoreInst>(Val: V)) {
3329	assert(SI->isSimple() && SI->getOperand(`0`) == OldPN);
3330	Builder.SetInsertPoint(SI);
3331	auto *NewBC =
3332	cast<BitCastInst>(Val: Builder.CreateBitCast(V: NewPN, DestTy: SrcTy));
3333	SI->setOperand(i_nocapture: `0`, Val_nocapture: NewBC);
3334	Worklist.push(I: SI);
3335	assert(hasStoreUsersOnly(*NewBC));
3336	}
3337	else if (auto *BCI = dyn_cast<BitCastInst>(Val: V)) {
3338	Type *TyB = BCI->getOperand(i_nocapture: `0`)->getType();
3339	Type *TyA = BCI->getType();
3340	assert(TyA == DestTy && TyB == SrcTy);
3341	(void) TyA;
3342	(void) TyB;
3343	Instruction I = replaceInstUsesWith(I&: BCI, V: NewPN);
3344	if (BCI == &CI)
3345	RetVal = I;
3346	} else if (auto *PHI = dyn_cast<PHINode>(Val: V)) {
3347	assert(OldPhiNodes.contains(PHI));
3348	(void) PHI;
3349	} else {
3350	llvm_unreachable("all uses should be handled");
3351	}
3352	}
3353	}
3354
3355	return RetVal;
3356	}
3357
3358	/// Fold (bitcast (or (and (bitcast X to int), signmask), nneg Y) to fp) to
3359	/// copysign((bitcast Y to fp), X)
3360	static Value *foldCopySignIdioms(BitCastInst &CI,
3361	InstCombiner::BuilderTy &Builder,
3362	const SimplifyQuery &SQ) {
3363	Value X, Y;
3364	Type *FTy = CI.getType();
3365	if (!FTy->isFPOrFPVectorTy())
3366	return nullptr;
3367	if (!match(V: &CI, P: m_ElementWiseBitCast(Op: m_c_Or(
3368	L: m_And(L: m_ElementWiseBitCast(Op: m_Value(V&: X)), R: m_SignMask()),
3369	R: m_Value(V&: Y)))))
3370	return nullptr;
3371	if (X->getType() != FTy)
3372	return nullptr;
3373	if (!isKnownNonNegative(V: Y, SQ))
3374	return nullptr;
3375
3376	return Builder.CreateCopySign(LHS: Builder.CreateBitCast(V: Y, DestTy: FTy), RHS: X);
3377	}
3378
3379	Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {
3380	// If the operands are integer typed then apply the integer transforms,
3381	// otherwise just apply the common ones.
3382	Value *Src = CI.getOperand(i_nocapture: `0`);
3383	Type *SrcTy = Src->getType();
3384	Type *DestTy = CI.getType();
3385
3386	// Get rid of casts from one type to the same type. These are useless and can
3387	// be replaced by the operand.
3388	if (DestTy == Src->getType())
3389	return replaceInstUsesWith(I&: CI, V: Src);
3390
3391	if (isa<FixedVectorType>(Val: DestTy)) {
3392	if (isa<IntegerType>(Val: SrcTy)) {
3393	// If this is a cast from an integer to vector, check to see if the input
3394	// is a trunc or zext of a bitcast from vector. If so, we can replace all
3395	// the casts with a shuffle and (potentially) a bitcast.
3396	if (isa<TruncInst>(Val: Src) \|\| isa<ZExtInst>(Val: Src)) {
3397	CastInst *SrcCast = cast<CastInst>(Val: Src);
3398	if (BitCastInst *BCIn = dyn_cast<BitCastInst>(Val: SrcCast->getOperand(i_nocapture: `0`)))
3399	if (isa<VectorType>(Val: BCIn->getOperand(i_nocapture: `0`)->getType()))
3400	if (Instruction *I = optimizeVectorResizeWithIntegerBitCasts(
3401	InVal: BCIn->getOperand(i_nocapture: `0`), DestTy: cast<VectorType>(Val: DestTy), IC&: *this))
3402	return I;
3403	}
3404
3405	// If the input is an 'or' instruction, we may be doing shifts and ors to
3406	// assemble the elements of the vector manually. Try to rip the code out
3407	// and replace it with insertelements.
3408	if (Value V = optimizeIntegerToVectorInsertions(CI, IC&: this))
3409	return replaceInstUsesWith(I&: CI, V);
3410	}
3411	}
3412
3413	if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(Val: SrcTy)) {
3414	if (SrcVTy->getNumElements() == `1`) {
3415	// If our destination is not a vector, then make this a straight
3416	// scalar-scalar cast.
3417	if (!DestTy->isVectorTy()) {
3418	Value *Elem = Builder.CreateExtractElement(Vec: Src, Idx: uint64_t{`0`});
3419	return CastInst::Create(Instruction::BitCast, S: Elem, Ty: DestTy);
3420	}
3421
3422	// Otherwise, see if our source is an insert. If so, then use the scalar
3423	// component directly:
3424	// bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
3425	if (auto *InsElt = dyn_cast<InsertElementInst>(Val: Src))
3426	return new BitCastInst (InsElt->getOperand(i_nocapture: `1`), DestTy);
3427	}
3428
3429	// Convert an artificial vector insert into more analyzable bitwise logic.
3430	unsigned BitWidth = DestTy->getScalarSizeInBits();
3431	Value X, Y;
3432	uint64_t IndexC;
3433	if (match(V: Src, P: m_OneUse(SubPattern: m_InsertElt(
3434	Val: m_OneUse(SubPattern: m_BitCast(Op: m_SpecificType(RefTy: DestTy, V&: X))),
3435	Elt: m_Value(V&: Y), Idx: m_ConstantInt(V&: IndexC)))) &&
3436	DestTy->isIntegerTy() && Y->getType()->isIntegerTy() &&
3437	isDesirableIntType(BitWidth)) {
3438	// Adjust for big endian - the LSBs are at the high index.
3439	if (DL.isBigEndian())
3440	IndexC = SrcVTy->getNumElements() - `1` - IndexC;
3441
3442	// We only handle (endian-normalized) insert to index 0. Any other insert
3443	// would require a left-shift, so that is an extra instruction.
3444	if (IndexC == `0`) {
3445	// bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)
3446	unsigned EltWidth = Y->getType()->getScalarSizeInBits();
3447	APInt MaskC = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth - EltWidth);
3448	Value *AndX = Builder.CreateAnd(LHS: X, RHS: MaskC);
3449	Value *ZextY = Builder.CreateZExt(V: Y, DestTy);
3450	return BinaryOperator::CreateOr(V1: AndX, V2: ZextY);
3451	}
3452	}
3453	}
3454
3455	if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: Src)) {
3456	// Okay, we have (bitcast (shuffle ..)). Check to see if this is
3457	// a bitcast to a vector with the same # elts.
3458	Value *ShufOp0 = Shuf->getOperand(i_nocapture: `0`);
3459	Value *ShufOp1 = Shuf->getOperand(i_nocapture: `1`);
3460	auto ShufElts = cast<VectorType>(Val: Shuf->getType())->getElementCount();
3461	auto SrcVecElts = cast<VectorType>(Val: ShufOp0->getType())->getElementCount();
3462	if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
3463	cast<VectorType>(Val: DestTy)->getElementCount() == ShufElts &&
3464	ShufElts == SrcVecElts) {
3465	BitCastInst *Tmp;
3466	// If either of the operands is a cast from CI.getType(), then
3467	// evaluating the shuffle in the casted destination's type will allow
3468	// us to eliminate at least one cast.
3469	if (((Tmp = dyn_cast<BitCastInst>(Val: ShufOp0)) &&
3470	Tmp->getOperand(i_nocapture: `0`)->getType() == DestTy) \|\|
3471	((Tmp = dyn_cast<BitCastInst>(Val: ShufOp1)) &&
3472	Tmp->getOperand(i_nocapture: `0`)->getType() == DestTy)) {
3473	Value *LHS = Builder.CreateBitCast(V: ShufOp0, DestTy);
3474	Value *RHS = Builder.CreateBitCast(V: ShufOp1, DestTy);
3475	// Return a new shuffle vector. Use the same element ID's, as we
3476	// know the vector types match #elts.
3477	return new ShuffleVectorInst (LHS, RHS, Shuf->getShuffleMask());
3478	}
3479	}
3480
3481	// A bitcasted-to-scalar and byte/bit reversing shuffle is better recognized
3482	// as a byte/bit swap:
3483	// bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) -> bswap (bitcast X)
3484	// bitcast <N x i1> (shuf X, undef, <N, N-1,...0>) -> bitreverse (bitcast X)
3485	if (DestTy->isIntegerTy() && ShufElts.getKnownMinValue() % `2` == `0` &&
3486	Shuf->hasOneUse() && Shuf->isReverse() && match(V: ShufOp1, P: m_Poison())) {
3487	unsigned IntrinsicNum = `0`;
3488	if (DL.isLegalInteger(Width: DestTy->getScalarSizeInBits()) &&
3489	SrcTy->getScalarSizeInBits() == `8`) {
3490	IntrinsicNum = Intrinsic::bswap;
3491	} else if (SrcTy->getScalarSizeInBits() == `1`) {
3492	IntrinsicNum = Intrinsic::bitreverse;
3493	}
3494	if (IntrinsicNum != `0`) {
3495	assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
3496	Function *BswapOrBitreverse = Intrinsic::getOrInsertDeclaration(
3497	M: CI.getModule(), id: IntrinsicNum, OverloadTys: DestTy);
3498	Value *ScalarX = Builder.CreateBitCast(V: ShufOp0, DestTy);
3499	return CallInst::Create(Func: BswapOrBitreverse, Args: {ScalarX});
3500	}
3501	}
3502	}
3503
3504	// Handle the A->B->A cast, and there is an intervening PHI node.
3505	if (PHINode *PN = dyn_cast<PHINode>(Val: Src))
3506	if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
3507	return I;
3508
3509	if (Instruction I = canonicalizeBitCastExtElt(BitCast&: CI, IC&: this))
3510	return I;
3511
3512	if (Instruction *I = foldBitCastBitwiseLogic(BitCast&: CI, Builder))
3513	return I;
3514
3515	if (Instruction *I = foldBitCastSelect(BitCast&: CI, Builder))
3516	return I;
3517
3518	if (Value *V = foldCopySignIdioms(CI, Builder, SQ: SQ.getWithInstruction(I: &CI)))
3519	return replaceInstUsesWith(I&: CI, V);
3520
3521	return commonCastTransforms(CI);
3522	}
3523
3524	Instruction *InstCombinerImpl::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
3525	return commonCastTransforms(CI);
3526	}
3527

Browse the source code of llvm_projects/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp