InstCombineCalls.cpp source code [llvm_projects/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp]

1	//===- InstCombineCalls.cpp -----------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the visitCall, visitInvoke, and visitCallBr functions.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "InstCombineInternal.h"
14	#include "llvm/ADT/APFloat.h"
15	#include "llvm/ADT/APInt.h"
16	#include "llvm/ADT/APSInt.h"
17	#include "llvm/ADT/ArrayRef.h"
18	#include "llvm/ADT/STLFunctionalExtras.h"
19	#include "llvm/ADT/SmallBitVector.h"
20	#include "llvm/ADT/SmallVector.h"
21	#include "llvm/ADT/Statistic.h"
22	#include "llvm/Analysis/AliasAnalysis.h"
23	#include "llvm/Analysis/AssumeBundleQueries.h"
24	#include "llvm/Analysis/AssumptionCache.h"
25	#include "llvm/Analysis/InstructionSimplify.h"
26	#include "llvm/Analysis/Loads.h"
27	#include "llvm/Analysis/MemoryBuiltins.h"
28	#include "llvm/Analysis/ValueTracking.h"
29	#include "llvm/Analysis/VectorUtils.h"
30	#include "llvm/IR/AttributeMask.h"
31	#include "llvm/IR/Attributes.h"
32	#include "llvm/IR/BasicBlock.h"
33	#include "llvm/IR/Constant.h"
34	#include "llvm/IR/Constants.h"
35	#include "llvm/IR/DataLayout.h"
36	#include "llvm/IR/DebugInfo.h"
37	#include "llvm/IR/DerivedTypes.h"
38	#include "llvm/IR/Function.h"
39	#include "llvm/IR/GlobalVariable.h"
40	#include "llvm/IR/InlineAsm.h"
41	#include "llvm/IR/InstrTypes.h"
42	#include "llvm/IR/Instruction.h"
43	#include "llvm/IR/Instructions.h"
44	#include "llvm/IR/IntrinsicInst.h"
45	#include "llvm/IR/Intrinsics.h"
46	#include "llvm/IR/IntrinsicsAArch64.h"
47	#include "llvm/IR/IntrinsicsAMDGPU.h"
48	#include "llvm/IR/IntrinsicsARM.h"
49	#include "llvm/IR/IntrinsicsHexagon.h"
50	#include "llvm/IR/LLVMContext.h"
51	#include "llvm/IR/Metadata.h"
52	#include "llvm/IR/PatternMatch.h"
53	#include "llvm/IR/Statepoint.h"
54	#include "llvm/IR/Type.h"
55	#include "llvm/IR/User.h"
56	#include "llvm/IR/Value.h"
57	#include "llvm/IR/ValueHandle.h"
58	#include "llvm/Support/AtomicOrdering.h"
59	#include "llvm/Support/Casting.h"
60	#include "llvm/Support/CommandLine.h"
61	#include "llvm/Support/Compiler.h"
62	#include "llvm/Support/Debug.h"
63	#include "llvm/Support/ErrorHandling.h"
64	#include "llvm/Support/KnownBits.h"
65	#include "llvm/Support/MathExtras.h"
66	#include "llvm/Support/raw_ostream.h"
67	#include "llvm/Transforms/InstCombine/InstCombiner.h"
68	#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
69	#include "llvm/Transforms/Utils/Local.h"
70	#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
71	#include <algorithm>
72	#include <cassert>
73	#include <cstdint>
74	#include <optional>
75	#include <utility>
76	#include <vector>
77
78	#define DEBUG_TYPE "instcombine"
79	#include "llvm/Transforms/Utils/InstructionWorklist.h"
80
81	using namespace llvm;
82	using namespace PatternMatch;
83
84	STATISTIC(NumSimplified, "Number of library calls simplified");
85
86	static cl::opt<unsigned> GuardWideningWindow(
87	"instcombine-guard-widening-window",
88	cl::init(Val: `3`),
89	cl::desc ("How wide an instruction window to bypass looking for "
90	"another guard"));
91
92	/// Return the specified type promoted as it would be to pass though a va_arg
93	/// area.
94	static Type getPromotedType(Type Ty) {
95	if (IntegerType* ITy = dyn_cast<IntegerType>(Val: Ty)) {
96	if (ITy->getBitWidth() < `32`)
97	return Type::getInt32Ty(C&: Ty->getContext());
98	}
99	return Ty;
100	}
101
102	/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
103	/// TODO: This should probably be integrated with visitAllocSites, but that
104	/// requires a deeper change to allow either unread or unwritten objects.
105	static bool hasUndefSource(AnyMemTransferInst *MI) {
106	auto *Src = MI->getRawSource();
107	while (isa<GetElementPtrInst>(Val: Src) \|\| isa<BitCastInst>(Val: Src)) {
108	if (!Src->hasOneUse())
109	return false;
110	Src = cast<Instruction>(Val: Src)->getOperand(i: `0`);
111	}
112	return isa<AllocaInst>(Val: Src) && Src->hasOneUse();
113	}
114
115	Instruction InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst MI) {
116	Align DstAlign = getKnownAlignment(V: MI->getRawDest(), DL, CxtI: MI, AC: &AC, DT: &DT);
117	MaybeAlign CopyDstAlign = MI->getDestAlign();
118	if (!CopyDstAlign \|\| *CopyDstAlign < DstAlign) {
119	MI->setDestAlignment(DstAlign);
120	return MI;
121	}
122
123	Align SrcAlign = getKnownAlignment(V: MI->getRawSource(), DL, CxtI: MI, AC: &AC, DT: &DT);
124	MaybeAlign CopySrcAlign = MI->getSourceAlign();
125	if (!CopySrcAlign \|\| *CopySrcAlign < SrcAlign) {
126	MI->setSourceAlignment(SrcAlign);
127	return MI;
128	}
129
130	// If we have a store to a location which is known constant, we can conclude
131	// that the store must be storing the constant value (else the memory
132	// wouldn't be constant), and this must be a noop.
133	if (!isModSet(MRI: AA->getModRefInfoMask(P: MI->getDest()))) {
134	// Set the size of the copy to 0, it will be deleted on the next iteration.
135	MI->setLength(Constant::getNullValue(Ty: MI->getLength()->getType()));
136	return MI;
137	}
138
139	// If the source is provably undef, the memcpy/memmove doesn't do anything
140	// (unless the transfer is volatile).
141	if (hasUndefSource(MI) && !MI->isVolatile()) {
142	// Set the size of the copy to 0, it will be deleted on the next iteration.
143	MI->setLength(Constant::getNullValue(Ty: MI->getLength()->getType()));
144	return MI;
145	}
146
147	// If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
148	// load/store.
149	ConstantInt *MemOpLength = dyn_cast<ConstantInt>(Val: MI->getLength());
150	if (!MemOpLength) return nullptr;
151
152	// Source and destination pointer types are always "i8" for intrinsic. See*
153	// if the size is something we can handle with a single primitive load/store.
154	// A single load+store correctly handles overlapping memory in the memmove
155	// case.
156	uint64_t Size = MemOpLength->getLimitedValue();
157	assert(Size && "0-sized memory transferring should be removed already.");
158
159	if (Size > `8` \|\| (Size&(Size-`1`)))
160	return nullptr; // If not 1/2/4/8 bytes, exit.
161
162	// If it is an atomic and alignment is less than the size then we will
163	// introduce the unaligned memory access which will be later transformed
164	// into libcall in CodeGen. This is not evident performance gain so disable
165	// it now.
166	if (isa<AtomicMemTransferInst>(Val: MI))
167	if (CopyDstAlign < Size \|\| CopySrcAlign < Size)
168	return nullptr;
169
170	// Use an integer load+store unless we can find something better.
171	IntegerType* IntType = IntegerType::get(C&: MI->getContext(), NumBits: Size<<`3`);
172
173	// If the memcpy has metadata describing the members, see if we can get the
174	// TBAA, scope and noalias tags describing our copy.
175	AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(AccessSize: Size);
176
177	Value *Src = MI->getArgOperand(i: `1`);
178	Value *Dest = MI->getArgOperand(i: `0`);
179	LoadInst *L = Builder.CreateLoad(Ty: IntType, Ptr: Src);
180	// Alignment from the mem intrinsic will be better, so use it.
181	L->setAlignment(*CopySrcAlign);
182	L->setAAMetadata(AACopyMD);
183	MDNode *LoopMemParallelMD =
184	MI->getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access);
185	if (LoopMemParallelMD)
186	L->setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access, Node: LoopMemParallelMD);
187	MDNode *AccessGroupMD = MI->getMetadata(KindID: LLVMContext::MD_access_group);
188	if (AccessGroupMD)
189	L->setMetadata(KindID: LLVMContext::MD_access_group, Node: AccessGroupMD);
190
191	StoreInst *S = Builder.CreateStore(Val: L, Ptr: Dest);
192	// Alignment from the mem intrinsic will be better, so use it.
193	S->setAlignment(*CopyDstAlign);
194	S->setAAMetadata(AACopyMD);
195	if (LoopMemParallelMD)
196	S->setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access, Node: LoopMemParallelMD);
197	if (AccessGroupMD)
198	S->setMetadata(KindID: LLVMContext::MD_access_group, Node: AccessGroupMD);
199	S->copyMetadata(SrcInst: *MI, WL: LLVMContext::MD_DIAssignID);
200
201	if (auto *MT = dyn_cast<MemTransferInst>(Val: MI)) {
202	// non-atomics can be volatile
203	L->setVolatile(MT->isVolatile());
204	S->setVolatile(MT->isVolatile());
205	}
206	if (isa<AtomicMemTransferInst>(Val: MI)) {
207	// atomics have to be unordered
208	L->setOrdering(AtomicOrdering::Unordered);
209	S->setOrdering(AtomicOrdering::Unordered);
210	}
211
212	// Set the size of the copy to 0, it will be deleted on the next iteration.
213	MI->setLength(Constant::getNullValue(Ty: MemOpLength->getType()));
214	return MI;
215	}
216
217	Instruction InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst MI) {
218	const Align KnownAlignment =
219	getKnownAlignment(V: MI->getDest(), DL, CxtI: MI, AC: &AC, DT: &DT);
220	MaybeAlign MemSetAlign = MI->getDestAlign();
221	if (!MemSetAlign \|\| *MemSetAlign < KnownAlignment) {
222	MI->setDestAlignment(KnownAlignment);
223	return MI;
224	}
225
226	// If we have a store to a location which is known constant, we can conclude
227	// that the store must be storing the constant value (else the memory
228	// wouldn't be constant), and this must be a noop.
229	if (!isModSet(MRI: AA->getModRefInfoMask(P: MI->getDest()))) {
230	// Set the size of the copy to 0, it will be deleted on the next iteration.
231	MI->setLength(Constant::getNullValue(Ty: MI->getLength()->getType()));
232	return MI;
233	}
234
235	// Remove memset with an undef value.
236	// FIXME: This is technically incorrect because it might overwrite a poison
237	// value. Change to PoisonValue once #52930 is resolved.
238	if (isa<UndefValue>(Val: MI->getValue())) {
239	// Set the size of the copy to 0, it will be deleted on the next iteration.
240	MI->setLength(Constant::getNullValue(Ty: MI->getLength()->getType()));
241	return MI;
242	}
243
244	// Extract the length and alignment and fill if they are constant.
245	ConstantInt *LenC = dyn_cast<ConstantInt>(Val: MI->getLength());
246	ConstantInt *FillC = dyn_cast<ConstantInt>(Val: MI->getValue());
247	if (!LenC \|\| !FillC \|\| !FillC->getType()->isIntegerTy(Bitwidth: `8`))
248	return nullptr;
249	const uint64_t Len = LenC->getLimitedValue();
250	assert(Len && "0-sized memory setting should be removed already.");
251	const Align Alignment = MI->getDestAlign().valueOrOne();
252
253	// If it is an atomic and alignment is less than the size then we will
254	// introduce the unaligned memory access which will be later transformed
255	// into libcall in CodeGen. This is not evident performance gain so disable
256	// it now.
257	if (isa<AtomicMemSetInst>(Val: MI))
258	if (Alignment < Len)
259	return nullptr;
260
261	// memset(s,c,n) -> store s, c (for n=1,2,4,8)
262	if (Len <= `8` && isPowerOf2_32(Value: (uint32_t)Len)) {
263	Type ITy = IntegerType::get(C&: MI->getContext(), NumBits: Len`8`); // n=1 -> i8.
264
265	Value *Dest = MI->getDest();
266
267	// Extract the fill value and store.
268	const uint64_t Fill = FillC->getZExtValue()*`0x0101010101010101ULL`;
269	Constant *FillVal = ConstantInt::get(Ty: ITy, V: Fill);
270	StoreInst *S = Builder.CreateStore(Val: FillVal, Ptr: Dest, isVolatile: MI->isVolatile());
271	S->copyMetadata(SrcInst: *MI, WL: LLVMContext::MD_DIAssignID);
272	auto replaceOpForAssignmentMarkers = [FillC, FillVal](auto *DbgAssign) {
273	if (llvm::is_contained(DbgAssign->location_ops(), FillC))
274	DbgAssign->replaceVariableLocationOp(FillC, FillVal);
275	};
276	for_each(Range: at::getAssignmentMarkers(Inst: S), F: replaceOpForAssignmentMarkers);
277	for_each(Range: at::getDVRAssignmentMarkers(Inst: S), F: replaceOpForAssignmentMarkers);
278
279	S->setAlignment(Alignment);
280	if (isa<AtomicMemSetInst>(Val: MI))
281	S->setOrdering(AtomicOrdering::Unordered);
282
283	// Set the size of the copy to 0, it will be deleted on the next iteration.
284	MI->setLength(Constant::getNullValue(Ty: LenC->getType()));
285	return MI;
286	}
287
288	return nullptr;
289	}
290
291	// TODO, Obvious Missing Transforms:
292	// Narrow width by halfs excluding zero/undef lanes*
293	Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
294	Value *LoadPtr = II.getArgOperand(i: `0`);
295	const Align Alignment =
296	cast<ConstantInt>(Val: II.getArgOperand(i: `1`))->getAlignValue();
297
298	// If the mask is all ones or undefs, this is a plain vector load of the 1st
299	// argument.
300	if (maskIsAllOneOrUndef(Mask: II.getArgOperand(i: `2`))) {
301	LoadInst *L = Builder.CreateAlignedLoad(Ty: II.getType(), Ptr: LoadPtr, Align: Alignment,
302	Name: "unmaskedload");
303	L->copyMetadata(SrcInst: II);
304	return L;
305	}
306
307	// If we can unconditionally load from this address, replace with a
308	// load/select idiom. TODO: use DT for context sensitive query
309	if (isDereferenceablePointer(V: LoadPtr, Ty: II.getType(),
310	DL: II.getDataLayout(), CtxI: &II, AC: &AC)) {
311	LoadInst *LI = Builder.CreateAlignedLoad(Ty: II.getType(), Ptr: LoadPtr, Align: Alignment,
312	Name: "unmaskedload");
313	LI->copyMetadata(SrcInst: II);
314	return Builder.CreateSelect(C: II.getArgOperand(i: `2`), True: LI, False: II.getArgOperand(i: `3`));
315	}
316
317	return nullptr;
318	}
319
320	// TODO, Obvious Missing Transforms:
321	// Single constant active lane -> store*
322	// Narrow width by halfs excluding zero/undef lanes*
323	Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
324	auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: `3`));
325	if (!ConstMask)
326	return nullptr;
327
328	// If the mask is all zeros, this instruction does nothing.
329	if (ConstMask->isNullValue())
330	return eraseInstFromFunction(I&: II);
331
332	// If the mask is all ones, this is a plain vector store of the 1st argument.
333	if (ConstMask->isAllOnesValue()) {
334	Value *StorePtr = II.getArgOperand(i: `1`);
335	Align Alignment = cast<ConstantInt>(Val: II.getArgOperand(i: `2`))->getAlignValue();
336	StoreInst *S =
337	new StoreInst (II.getArgOperand(i: `0`), StorePtr, false, Alignment);
338	S->copyMetadata(SrcInst: II);
339	return S;
340	}
341
342	if (isa<ScalableVectorType>(Val: ConstMask->getType()))
343	return nullptr;
344
345	// Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
346	APInt DemandedElts = possiblyDemandedEltsInMask(Mask: ConstMask);
347	APInt PoisonElts(DemandedElts.getBitWidth(), `0`);
348	if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: `0`), DemandedElts,
349	PoisonElts))
350	return replaceOperand(I&: II, OpNum: `0`, V);
351
352	return nullptr;
353	}
354
355	// TODO, Obvious Missing Transforms:
356	// Single constant active lane load -> load*
357	// Dereferenceable address & few lanes -> scalarize speculative load/selects*
358	// Adjacent vector addresses -> masked.load*
359	// Narrow width by halfs excluding zero/undef lanes*
360	// Vector incrementing address -> vector masked load*
361	Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
362	auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: `2`));
363	if (!ConstMask)
364	return nullptr;
365
366	// Vector splat address w/known mask -> scalar load
367	// Fold the gather to load the source vector first lane
368	// because it is reloading the same value each time
369	if (ConstMask->isAllOnesValue())
370	if (auto *SplatPtr = getSplatValue(V: II.getArgOperand(i: `0`))) {
371	auto *VecTy = cast<VectorType>(Val: II.getType());
372	const Align Alignment =
373	cast<ConstantInt>(Val: II.getArgOperand(i: `1`))->getAlignValue();
374	LoadInst *L = Builder.CreateAlignedLoad(Ty: VecTy->getElementType(), Ptr: SplatPtr,
375	Align: Alignment, Name: "load.scalar");
376	Value *Shuf =
377	Builder.CreateVectorSplat(EC: VecTy->getElementCount(), V: L, Name: "broadcast");
378	return replaceInstUsesWith(I&: II, V: cast<Instruction>(Val: Shuf));
379	}
380
381	return nullptr;
382	}
383
384	// TODO, Obvious Missing Transforms:
385	// Single constant active lane -> store*
386	// Adjacent vector addresses -> masked.store*
387	// Narrow store width by halfs excluding zero/undef lanes*
388	// Vector incrementing address -> vector masked store*
389	Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
390	auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: `3`));
391	if (!ConstMask)
392	return nullptr;
393
394	// If the mask is all zeros, a scatter does nothing.
395	if (ConstMask->isNullValue())
396	return eraseInstFromFunction(I&: II);
397
398	// Vector splat address -> scalar store
399	if (auto *SplatPtr = getSplatValue(V: II.getArgOperand(i: `1`))) {
400	// scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
401	if (auto *SplatValue = getSplatValue(V: II.getArgOperand(i: `0`))) {
402	if (maskContainsAllOneOrUndef(Mask: ConstMask)) {
403	Align Alignment =
404	cast<ConstantInt>(Val: II.getArgOperand(i: `2`))->getAlignValue();
405	StoreInst S = new* StoreInst (SplatValue, SplatPtr, /IsVolatile=/false,
406	Alignment);
407	S->copyMetadata(SrcInst: II);
408	return S;
409	}
410	}
411	// scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
412	// lastlane), ptr
413	if (ConstMask->isAllOnesValue()) {
414	Align Alignment = cast<ConstantInt>(Val: II.getArgOperand(i: `2`))->getAlignValue();
415	VectorType *WideLoadTy = cast<VectorType>(Val: II.getArgOperand(i: `1`)->getType());
416	ElementCount VF = WideLoadTy->getElementCount();
417	Value *RunTimeVF = Builder.CreateElementCount(DstType: Builder.getInt32Ty(), EC: VF);
418	Value *LastLane = Builder.CreateSub(LHS: RunTimeVF, RHS: Builder.getInt32(C: `1`));
419	Value *Extract =
420	Builder.CreateExtractElement(Vec: II.getArgOperand(i: `0`), Idx: LastLane);
421	StoreInst *S =
422	new StoreInst (Extract, SplatPtr, /IsVolatile=/false, Alignment);
423	S->copyMetadata(SrcInst: II);
424	return S;
425	}
426	}
427	if (isa<ScalableVectorType>(Val: ConstMask->getType()))
428	return nullptr;
429
430	// Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
431	APInt DemandedElts = possiblyDemandedEltsInMask(Mask: ConstMask);
432	APInt PoisonElts(DemandedElts.getBitWidth(), `0`);
433	if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: `0`), DemandedElts,
434	PoisonElts))
435	return replaceOperand(I&: II, OpNum: `0`, V);
436	if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: `1`), DemandedElts,
437	PoisonElts))
438	return replaceOperand(I&: II, OpNum: `1`, V);
439
440	return nullptr;
441	}
442
443	/// This function transforms launder.invariant.group and strip.invariant.group
444	/// like:
445	/// launder(launder(%x)) -> launder(%x) (the result is not the argument)
446	/// launder(strip(%x)) -> launder(%x)
447	/// strip(strip(%x)) -> strip(%x) (the result is not the argument)
448	/// strip(launder(%x)) -> strip(%x)
449	/// This is legal because it preserves the most recent information about
450	/// the presence or absence of invariant.group.
451	static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II,
452	InstCombinerImpl &IC) {
453	auto *Arg = II.getArgOperand(i: `0`);
454	auto *StrippedArg = Arg->stripPointerCasts();
455	auto *StrippedInvariantGroupsArg = StrippedArg;
456	while (auto *Intr = dyn_cast<IntrinsicInst>(Val: StrippedInvariantGroupsArg)) {
457	if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
458	Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
459	break;
460	StrippedInvariantGroupsArg = Intr->getArgOperand(i: `0`)->stripPointerCasts();
461	}
462	if (StrippedArg == StrippedInvariantGroupsArg)
463	return nullptr; // No launders/strips to remove.
464
465	Value Result = nullptr*;
466
467	if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
468	Result = IC.Builder.CreateLaunderInvariantGroup(Ptr: StrippedInvariantGroupsArg);
469	else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
470	Result = IC.Builder.CreateStripInvariantGroup(Ptr: StrippedInvariantGroupsArg);
471	else
472	llvm_unreachable(
473	"simplifyInvariantGroupIntrinsic only handles launder and strip");
474	if (Result->getType()->getPointerAddressSpace() !=
475	II.getType()->getPointerAddressSpace())
476	Result = IC.Builder.CreateAddrSpaceCast(V: Result, DestTy: II.getType());
477
478	return cast<Instruction>(Val: Result);
479	}
480
481	static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
482	assert((II.getIntrinsicID() == Intrinsic::cttz \|\|
483	II.getIntrinsicID() == Intrinsic::ctlz) &&
484	"Expected cttz or ctlz intrinsic");
485	bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
486	Value *Op0 = II.getArgOperand(i: `0`);
487	Value *Op1 = II.getArgOperand(i: `1`);
488	Value *X;
489	// ctlz(bitreverse(x)) -> cttz(x)
490	// cttz(bitreverse(x)) -> ctlz(x)
491	if (match(V: Op0, P: m_BitReverse(Op0: m_Value(V&: X)))) {
492	Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
493	Function *F = Intrinsic::getDeclaration(M: II.getModule(), id: ID, Tys: II.getType());
494	return CallInst::Create(Func: F, Args: {X, II.getArgOperand(i: `1`)});
495	}
496
497	if (II.getType()->isIntOrIntVectorTy(BitWidth: `1`)) {
498	// ctlz/cttz i1 Op0 --> not Op0
499	if (match(V: Op1, P: m_Zero()))
500	return BinaryOperator::CreateNot(Op: Op0);
501	// If zero is poison, then the input can be assumed to be "true", so the
502	// instruction simplifies to "false".
503	assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
504	return IC.replaceInstUsesWith(I&: II, V: ConstantInt::getNullValue(Ty: II.getType()));
505	}
506
507	// If ctlz/cttz is only used as a shift amount, set is_zero_poison to true.
508	if (II.hasOneUse() && match(V: Op1, P: m_Zero()) &&
509	match(V: II.user_back(), P: m_Shift(L: m_Value(), R: m_Specific(V: &II))))
510	return IC.replaceOperand(I&: II, OpNum: `1`, V: IC.Builder.getTrue());
511
512	Constant *C;
513
514	if (IsTZ) {
515	// cttz(-x) -> cttz(x)
516	if (match(V: Op0, P: m_Neg(V: m_Value(V&: X))))
517	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
518
519	// cttz(-x & x) -> cttz(x)
520	if (match(V: Op0, P: m_c_And(L: m_Neg(V: m_Value(V&: X)), R: m_Deferred(V: X))))
521	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
522
523	// cttz(sext(x)) -> cttz(zext(x))
524	if (match(V: Op0, P: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: X))))) {
525	auto *Zext = IC.Builder.CreateZExt(V: X, DestTy: II.getType());
526	auto *CttzZext =
527	IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: Zext, RHS: Op1);
528	return IC.replaceInstUsesWith(I&: II, V: CttzZext);
529	}
530
531	// Zext doesn't change the number of trailing zeros, so narrow:
532	// cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
533	if (match(V: Op0, P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X)))) && match(V: Op1, P: m_One())) {
534	auto *Cttz = IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: X,
535	RHS: IC.Builder.getTrue());
536	auto *ZextCttz = IC.Builder.CreateZExt(V: Cttz, DestTy: II.getType());
537	return IC.replaceInstUsesWith(I&: II, V: ZextCttz);
538	}
539
540	// cttz(abs(x)) -> cttz(x)
541	// cttz(nabs(x)) -> cttz(x)
542	Value *Y;
543	SelectPatternFlavor SPF = matchSelectPattern(V: Op0, LHS&: X, RHS&: Y).Flavor;
544	if (SPF == SPF_ABS \|\| SPF == SPF_NABS)
545	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
546
547	if (match(V: Op0, P: m_Intrinsic<Intrinsic::abs>(Op0: m_Value(V&: X))))
548	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
549
550	// cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
551	if (match(V: Op0, P: m_Shl(L: m_ImmConstant(C), R: m_Value(V&: X))) &&
552	match(V: Op1, P: m_One())) {
553	Value *ConstCttz =
554	IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: C, RHS: Op1);
555	return BinaryOperator::CreateAdd(V1: ConstCttz, V2: X);
556	}
557
558	// cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)
559	if (match(V: Op0, P: m_Exact(SubPattern: m_LShr(L: m_ImmConstant(C), R: m_Value(V&: X)))) &&
560	match(V: Op1, P: m_One())) {
561	Value *ConstCttz =
562	IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: C, RHS: Op1);
563	return BinaryOperator::CreateSub(V1: ConstCttz, V2: X);
564	}
565
566	// cttz(add(lshr(UINT_MAX, %val), 1)) --> sub(width, %val)
567	if (match(V: Op0, P: m_Add(L: m_LShr(L: m_AllOnes(), R: m_Value(V&: X)), R: m_One()))) {
568	Value *Width =
569	ConstantInt::get(Ty: II.getType(), V: II.getType()->getScalarSizeInBits());
570	return BinaryOperator::CreateSub(V1: Width, V2: X);
571	}
572	} else {
573	// ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
574	if (match(V: Op0, P: m_LShr(L: m_ImmConstant(C), R: m_Value(V&: X))) &&
575	match(V: Op1, P: m_One())) {
576	Value *ConstCtlz =
577	IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::ctlz, LHS: C, RHS: Op1);
578	return BinaryOperator::CreateAdd(V1: ConstCtlz, V2: X);
579	}
580
581	// ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)
582	if (match(V: Op0, P: m_NUWShl(L: m_ImmConstant(C), R: m_Value(V&: X))) &&
583	match(V: Op1, P: m_One())) {
584	Value *ConstCtlz =
585	IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::ctlz, LHS: C, RHS: Op1);
586	return BinaryOperator::CreateSub(V1: ConstCtlz, V2: X);
587	}
588	}
589
590	KnownBits Known = IC.computeKnownBits(V: Op0, Depth: `0`, CxtI: &II);
591
592	// Create a mask for bits above (ctlz) or below (cttz) the first known one.
593	unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
594	: Known.countMaxLeadingZeros();
595	unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
596	: Known.countMinLeadingZeros();
597
598	// If all bits above (ctlz) or below (cttz) the first known one are known
599	// zero, this value is constant.
600	// FIXME: This should be in InstSimplify because we're replacing an
601	// instruction with a constant.
602	if (PossibleZeros == DefiniteZeros) {
603	auto *C = ConstantInt::get(Ty: Op0->getType(), V: DefiniteZeros);
604	return IC.replaceInstUsesWith(I&: II, V: C);
605	}
606
607	// If the input to cttz/ctlz is known to be non-zero,
608	// then change the 'ZeroIsPoison' parameter to 'true'
609	// because we know the zero behavior can't affect the result.
610	if (!Known.One.isZero() \|\|
611	isKnownNonZero(V: Op0, Q: IC.getSimplifyQuery().getWithInstruction(I: &II))) {
612	if (!match(V: II.getArgOperand(i: `1`), P: m_One()))
613	return IC.replaceOperand(I&: II, OpNum: `1`, V: IC.Builder.getTrue());
614	}
615
616	// Add range attribute since known bits can't completely reflect what we know.
617	unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
618	if (BitWidth != `1` && !II.hasRetAttr(Kind: Attribute::Range) &&
619	!II.getMetadata(KindID: LLVMContext::MD_range)) {
620	ConstantRange Range(APInt (BitWidth, DefiniteZeros),
621	APInt (BitWidth, PossibleZeros + `1`));
622	II.addRangeRetAttr(CR: Range);
623	return &II;
624	}
625
626	return nullptr;
627	}
628
629	static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
630	assert(II.getIntrinsicID() == Intrinsic::ctpop &&
631	"Expected ctpop intrinsic");
632	Type *Ty = II.getType();
633	unsigned BitWidth = Ty->getScalarSizeInBits();
634	Value *Op0 = II.getArgOperand(i: `0`);
635	Value X, Y;
636
637	// ctpop(bitreverse(x)) -> ctpop(x)
638	// ctpop(bswap(x)) -> ctpop(x)
639	if (match(V: Op0, P: m_BitReverse(Op0: m_Value(V&: X))) \|\| match(V: Op0, P: m_BSwap(Op0: m_Value(V&: X))))
640	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
641
642	// ctpop(rot(x)) -> ctpop(x)
643	if ((match(V: Op0, P: m_FShl(Op0: m_Value(V&: X), Op1: m_Value(V&: Y), Op2: m_Value())) \|\|
644	match(V: Op0, P: m_FShr(Op0: m_Value(V&: X), Op1: m_Value(V&: Y), Op2: m_Value()))) &&
645	X == Y)
646	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
647
648	// ctpop(x \| -x) -> bitwidth - cttz(x, false)
649	if (Op0->hasOneUse() &&
650	match(V: Op0, P: m_c_Or(L: m_Value(V&: X), R: m_Neg(V: m_Deferred(V: X))))) {
651	Function *F =
652	Intrinsic::getDeclaration(M: II.getModule(), id: Intrinsic::cttz, Tys: Ty);
653	auto *Cttz = IC.Builder.CreateCall(Callee: F, Args: {X, IC.Builder.getFalse()});
654	auto *Bw = ConstantInt::get(Ty, V: APInt (BitWidth, BitWidth));
655	return IC.replaceInstUsesWith(I&: II, V: IC.Builder.CreateSub(LHS: Bw, RHS: Cttz));
656	}
657
658	// ctpop(~x & (x - 1)) -> cttz(x, false)
659	if (match(V: Op0,
660	P: m_c_And(L: m_Not(V: m_Value(V&: X)), R: m_Add(L: m_Deferred(V: X), R: m_AllOnes())))) {
661	Function *F =
662	Intrinsic::getDeclaration(M: II.getModule(), id: Intrinsic::cttz, Tys: Ty);
663	return CallInst::Create(Func: F, Args: {X, IC.Builder.getFalse()});
664	}
665
666	// Zext doesn't change the number of set bits, so narrow:
667	// ctpop (zext X) --> zext (ctpop X)
668	if (match(V: Op0, P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X))))) {
669	Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(ID: Intrinsic::ctpop, V: X);
670	return CastInst::Create(Instruction::ZExt, S: NarrowPop, Ty);
671	}
672
673	KnownBits Known(BitWidth);
674	IC.computeKnownBits(V: Op0, Known, Depth: `0`, CxtI: &II);
675
676	// If all bits are zero except for exactly one fixed bit, then the result
677	// must be 0 or 1, and we can get that answer by shifting to LSB:
678	// ctpop (X & 32) --> (X & 32) >> 5
679	// TODO: Investigate removing this as its likely unnecessary given the below
680	// `isKnownToBeAPowerOfTwo` check.
681	if ((~Known.Zero).isPowerOf2())
682	return BinaryOperator::CreateLShr(
683	V1: Op0, V2: ConstantInt::get(Ty, V: (~Known.Zero).exactLogBase2()));
684
685	// More generally we can also handle non-constant power of 2 patterns such as
686	// shl/shr(Pow2, X), (X & -X), etc... by transforming:
687	// ctpop(Pow2OrZero) --> icmp ne X, 0
688	if (IC.isKnownToBeAPowerOfTwo(V: Op0, / OrZero / true))
689	return CastInst::Create(Instruction::ZExt,
690	S: IC.Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: Op0,
691	RHS: Constant::getNullValue(Ty)),
692	Ty);
693
694	// Add range attribute since known bits can't completely reflect what we know.
695	if (BitWidth != `1` && !II.hasRetAttr(Kind: Attribute::Range) &&
696	!II.getMetadata(KindID: LLVMContext::MD_range)) {
697	ConstantRange Range(APInt (BitWidth, Known.countMinPopulation()),
698	APInt (BitWidth, Known.countMaxPopulation() + `1`));
699	II.addRangeRetAttr(CR: Range);
700	return &II;
701	}
702
703	return nullptr;
704	}
705
706	/// Convert a table lookup to shufflevector if the mask is constant.
707	/// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
708	/// which case we could lower the shufflevector with rev64 instructions
709	/// as it's actually a byte reverse.
710	static Value simplifyNeonTbl1(const* IntrinsicInst &II,
711	InstCombiner::BuilderTy &Builder) {
712	// Bail out if the mask is not a constant.
713	auto *C = dyn_cast<Constant>(Val: II.getArgOperand(i: `1`));
714	if (!C)
715	return nullptr;
716
717	auto *VecTy = cast<FixedVectorType>(Val: II.getType());
718	unsigned NumElts = VecTy->getNumElements();
719
720	// Only perform this transformation for <8 x i8> vector types.
721	if (!VecTy->getElementType()->isIntegerTy(Bitwidth: `8`) \|\| NumElts != `8`)
722	return nullptr;
723
724	int Indexes[`8`];
725
726	for (unsigned I = `0`; I < NumElts; ++I) {
727	Constant *COp = C->getAggregateElement(Elt: I);
728
729	if (!COp \|\| !isa<ConstantInt>(Val: COp))
730	return nullptr;
731
732	Indexes[I] = cast<ConstantInt>(Val: COp)->getLimitedValue();
733
734	// Make sure the mask indices are in range.
735	if ((unsigned)Indexes[I] >= NumElts)
736	return nullptr;
737	}
738
739	auto *V1 = II.getArgOperand(i: `0`);
740	auto *V2 = Constant::getNullValue(Ty: V1->getType());
741	return Builder.CreateShuffleVector(V1, V2, Mask: ArrayRef(Indexes));
742	}
743
744	// Returns true iff the 2 intrinsics have the same operands, limiting the
745	// comparison to the first NumOperands.
746	static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
747	unsigned NumOperands) {
748	assert(I.arg_size() >= NumOperands && "Not enough operands");
749	assert(E.arg_size() >= NumOperands && "Not enough operands");
750	for (unsigned i = `0`; i < NumOperands; i++)
751	if (I.getArgOperand(i) != E.getArgOperand(i))
752	return false;
753	return true;
754	}
755
756	// Remove trivially empty start/end intrinsic ranges, i.e. a start
757	// immediately followed by an end (ignoring debuginfo or other
758	// start/end intrinsics in between). As this handles only the most trivial
759	// cases, tracking the nesting level is not needed:
760	//
761	// call @llvm.foo.start(i1 0)
762	// call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
763	// call @llvm.foo.end(i1 0)
764	// call @llvm.foo.end(i1 0) ; &I
765	static bool
766	removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC,
767	std::function<bool(const IntrinsicInst &)> IsStart) {
768	// We start from the end intrinsic and scan backwards, so that InstCombine
769	// has already processed (and potentially removed) all the instructions
770	// before the end intrinsic.
771	BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
772	for (; BI != BE; ++BI) {
773	if (auto I = dyn_cast<IntrinsicInst>(Val: &BI)) {
774	if (I->isDebugOrPseudoInst() \|\|
775	I->getIntrinsicID() == EndI.getIntrinsicID())
776	continue;
777	if (IsStart (*I)) {
778	if (haveSameOperands(I: EndI, E: *I, NumOperands: EndI.arg_size())) {
779	IC.eraseInstFromFunction(I&: *I);
780	IC.eraseInstFromFunction(I&: EndI);
781	return true;
782	}
783	// Skip start intrinsics that don't pair with this end intrinsic.
784	continue;
785	}
786	}
787	break;
788	}
789
790	return false;
791	}
792
793	Instruction *InstCombinerImpl::visitVAEndInst(VAEndInst &I) {
794	removeTriviallyEmptyRange(EndI&: I, IC&: *this, IsStart: [](const IntrinsicInst &I) {
795	return I.getIntrinsicID() == Intrinsic::vastart \|\|
796	I.getIntrinsicID() == Intrinsic::vacopy;
797	});
798	return nullptr;
799	}
800
801	static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) {
802	assert(Call.arg_size() > `1` && "Need at least 2 args to swap");
803	Value Arg0 = Call.getArgOperand(i: `0`), Arg1 = Call.getArgOperand(i: `1`);
804	if (isa<Constant>(Val: Arg0) && !isa<Constant>(Val: Arg1)) {
805	Call.setArgOperand(i: `0`, v: Arg1);
806	Call.setArgOperand(i: `1`, v: Arg0);
807	return &Call;
808	}
809	return nullptr;
810	}
811
812	/// Creates a result tuple for an overflow intrinsic \p II with a given
813	/// \p Result and a constant \p Overflow value.
814	static Instruction createOverflowTuple(IntrinsicInst II, Value *Result,
815	Constant *Overflow) {
816	Constant *V[] = {PoisonValue::get(T: Result->getType()), Overflow};
817	StructType *ST = cast<StructType>(Val: II->getType());
818	Constant *Struct = ConstantStruct::get(T: ST, V);
819	return InsertValueInst::Create(Agg: Struct, Val: Result, Idxs: `0`);
820	}
821
822	Instruction *
823	InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
824	WithOverflowInst *WO = cast<WithOverflowInst>(Val: II);
825	Value OperationResult = nullptr*;
826	Constant OverflowResult = nullptr*;
827	if (OptimizeOverflowCheck(BinaryOp: WO->getBinaryOp(), IsSigned: WO->isSigned(), LHS: WO->getLHS(),
828	RHS: WO->getRHS(), CtxI&: *WO, OperationResult, OverflowResult))
829	return createOverflowTuple(II: WO, Result: OperationResult, Overflow: OverflowResult);
830	return nullptr;
831	}
832
833	static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
834	Ty = Ty->getScalarType();
835	return F.getDenormalMode(FPType: Ty->getFltSemantics()).Input == DenormalMode::IEEE;
836	}
837
838	static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) {
839	Ty = Ty->getScalarType();
840	return F.getDenormalMode(FPType: Ty->getFltSemantics()).inputsAreZero();
841	}
842
843	/// \returns the compare predicate type if the test performed by
844	/// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the
845	/// floating-point environment assumed for \p F for type \p Ty
846	static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask,
847	const Function &F, Type *Ty) {
848	switch (static_cast<unsigned>(Mask)) {
849	case fcZero:
850	if (inputDenormalIsIEEE(F, Ty))
851	return FCmpInst::FCMP_OEQ;
852	break;
853	case fcZero \| fcSubnormal:
854	if (inputDenormalIsDAZ(F, Ty))
855	return FCmpInst::FCMP_OEQ;
856	break;
857	case fcPositive \| fcNegZero:
858	if (inputDenormalIsIEEE(F, Ty))
859	return FCmpInst::FCMP_OGE;
860	break;
861	case fcPositive \| fcNegZero \| fcNegSubnormal:
862	if (inputDenormalIsDAZ(F, Ty))
863	return FCmpInst::FCMP_OGE;
864	break;
865	case fcPosSubnormal \| fcPosNormal \| fcPosInf:
866	if (inputDenormalIsIEEE(F, Ty))
867	return FCmpInst::FCMP_OGT;
868	break;
869	case fcNegative \| fcPosZero:
870	if (inputDenormalIsIEEE(F, Ty))
871	return FCmpInst::FCMP_OLE;
872	break;
873	case fcNegative \| fcPosZero \| fcPosSubnormal:
874	if (inputDenormalIsDAZ(F, Ty))
875	return FCmpInst::FCMP_OLE;
876	break;
877	case fcNegSubnormal \| fcNegNormal \| fcNegInf:
878	if (inputDenormalIsIEEE(F, Ty))
879	return FCmpInst::FCMP_OLT;
880	break;
881	case fcPosNormal \| fcPosInf:
882	if (inputDenormalIsDAZ(F, Ty))
883	return FCmpInst::FCMP_OGT;
884	break;
885	case fcNegNormal \| fcNegInf:
886	if (inputDenormalIsDAZ(F, Ty))
887	return FCmpInst::FCMP_OLT;
888	break;
889	case ~fcZero & ~fcNan:
890	if (inputDenormalIsIEEE(F, Ty))
891	return FCmpInst::FCMP_ONE;
892	break;
893	case ~(fcZero \| fcSubnormal) & ~fcNan:
894	if (inputDenormalIsDAZ(F, Ty))
895	return FCmpInst::FCMP_ONE;
896	break;
897	default:
898	break;
899	}
900
901	return FCmpInst::BAD_FCMP_PREDICATE;
902	}
903
904	Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
905	Value *Src0 = II.getArgOperand(i: `0`);
906	Value *Src1 = II.getArgOperand(i: `1`);
907	const ConstantInt *CMask = cast<ConstantInt>(Val: Src1);
908	FPClassTest Mask = static_cast<FPClassTest>(CMask->getZExtValue());
909	const bool IsUnordered = (Mask & fcNan) == fcNan;
910	const bool IsOrdered = (Mask & fcNan) == fcNone;
911	const FPClassTest OrderedMask = Mask & ~fcNan;
912	const FPClassTest OrderedInvertedMask = ~OrderedMask & ~fcNan;
913
914	const bool IsStrict =
915	II.getFunction()->getAttributes().hasFnAttr(Kind: Attribute::StrictFP);
916
917	Value *FNegSrc;
918	if (match(V: Src0, P: m_FNeg(X: m_Value(V&: FNegSrc)))) {
919	// is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask)
920
921	II.setArgOperand(i: `1`, v: ConstantInt::get(Ty: Src1->getType(), V: fneg(Mask)));
922	return replaceOperand(I&: II, OpNum: `0`, V: FNegSrc);
923	}
924
925	Value *FAbsSrc;
926	if (match(V: Src0, P: m_FAbs(Op0: m_Value(V&: FAbsSrc)))) {
927	II.setArgOperand(i: `1`, v: ConstantInt::get(Ty: Src1->getType(), V: inverse_fabs(Mask)));
928	return replaceOperand(I&: II, OpNum: `0`, V: FAbsSrc);
929	}
930
931	if ((OrderedMask == fcInf \|\| OrderedInvertedMask == fcInf) &&
932	(IsOrdered \|\| IsUnordered) && !IsStrict) {
933	// is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf
934	// is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf
935	// is.fpclass(x, fcInf\|fcNan) -> fcmp ueq fabs(x), +inf
936	// is.fpclass(x, ~(fcInf\|fcNan)) -> fcmp une fabs(x), +inf
937	Constant *Inf = ConstantFP::getInfinity(Ty: Src0->getType());
938	FCmpInst::Predicate Pred =
939	IsUnordered ? FCmpInst::FCMP_UEQ : FCmpInst::FCMP_OEQ;
940	if (OrderedInvertedMask == fcInf)
941	Pred = IsUnordered ? FCmpInst::FCMP_UNE : FCmpInst::FCMP_ONE;
942
943	Value *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Src0);
944	Value *CmpInf = Builder.CreateFCmp(P: Pred, LHS: Fabs, RHS: Inf);
945	CmpInf->takeName(V: &II);
946	return replaceInstUsesWith(I&: II, V: CmpInf);
947	}
948
949	if ((OrderedMask == fcPosInf \|\| OrderedMask == fcNegInf) &&
950	(IsOrdered \|\| IsUnordered) && !IsStrict) {
951	// is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf
952	// is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf
953	// is.fpclass(x, fcPosInf\|fcNan) -> fcmp ueq x, +inf
954	// is.fpclass(x, fcNegInf\|fcNan) -> fcmp ueq x, -inf
955	Constant *Inf =
956	ConstantFP::getInfinity(Ty: Src0->getType(), Negative: OrderedMask == fcNegInf);
957	Value *EqInf = IsUnordered ? Builder.CreateFCmpUEQ(LHS: Src0, RHS: Inf)
958	: Builder.CreateFCmpOEQ(LHS: Src0, RHS: Inf);
959
960	EqInf->takeName(V: &II);
961	return replaceInstUsesWith(I&: II, V: EqInf);
962	}
963
964	if ((OrderedInvertedMask == fcPosInf \|\| OrderedInvertedMask == fcNegInf) &&
965	(IsOrdered \|\| IsUnordered) && !IsStrict) {
966	// is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf
967	// is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf
968	// is.fpclass(x, ~fcPosInf\|fcNan) -> fcmp une x, +inf
969	// is.fpclass(x, ~fcNegInf\|fcNan) -> fcmp une x, -inf
970	Constant *Inf = ConstantFP::getInfinity(Ty: Src0->getType(),
971	Negative: OrderedInvertedMask == fcNegInf);
972	Value *NeInf = IsUnordered ? Builder.CreateFCmpUNE(LHS: Src0, RHS: Inf)
973	: Builder.CreateFCmpONE(LHS: Src0, RHS: Inf);
974	NeInf->takeName(V: &II);
975	return replaceInstUsesWith(I&: II, V: NeInf);
976	}
977
978	if (Mask == fcNan && !IsStrict) {
979	// Equivalent of isnan. Replace with standard fcmp if we don't care about FP
980	// exceptions.
981	Value *IsNan =
982	Builder.CreateFCmpUNO(LHS: Src0, RHS: ConstantFP::getZero(Ty: Src0->getType()));
983	IsNan->takeName(V: &II);
984	return replaceInstUsesWith(I&: II, V: IsNan);
985	}
986
987	if (Mask == (~fcNan & fcAllFlags) && !IsStrict) {
988	// Equivalent of !isnan. Replace with standard fcmp.
989	Value *FCmp =
990	Builder.CreateFCmpORD(LHS: Src0, RHS: ConstantFP::getZero(Ty: Src0->getType()));
991	FCmp->takeName(V: &II);
992	return replaceInstUsesWith(I&: II, V: FCmp);
993	}
994
995	FCmpInst::Predicate PredType = FCmpInst::BAD_FCMP_PREDICATE;
996
997	// Try to replace with an fcmp with 0
998	//
999	// is.fpclass(x, fcZero) -> fcmp oeq x, 0.0
1000	// is.fpclass(x, fcZero \| fcNan) -> fcmp ueq x, 0.0
1001	// is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0
1002	// is.fpclass(x, ~fcZero) -> fcmp une x, 0.0
1003	//
1004	// is.fpclass(x, fcPosSubnormal \| fcPosNormal \| fcPosInf) -> fcmp ogt x, 0.0
1005	// is.fpclass(x, fcPositive \| fcNegZero) -> fcmp oge x, 0.0
1006	//
1007	// is.fpclass(x, fcNegSubnormal \| fcNegNormal \| fcNegInf) -> fcmp olt x, 0.0
1008	// is.fpclass(x, fcNegative \| fcPosZero) -> fcmp ole x, 0.0
1009	//
1010	if (!IsStrict && (IsOrdered \|\| IsUnordered) &&
1011	(PredType = fpclassTestIsFCmp0(Mask: OrderedMask, F: *II.getFunction(),
1012	Ty: Src0->getType())) !=
1013	FCmpInst::BAD_FCMP_PREDICATE) {
1014	Constant *Zero = ConstantFP::getZero(Ty: Src0->getType());
1015	// Equivalent of == 0.
1016	Value *FCmp = Builder.CreateFCmp(
1017	P: IsUnordered ? FCmpInst::getUnorderedPredicate(Pred: PredType) : PredType,
1018	LHS: Src0, RHS: Zero);
1019
1020	FCmp->takeName(V: &II);
1021	return replaceInstUsesWith(I&: II, V: FCmp);
1022	}
1023
1024	KnownFPClass Known = computeKnownFPClass(Val: Src0, Interested: Mask, CtxI: &II);
1025
1026	// Clear test bits we know must be false from the source value.
1027	// fp_class (nnan x), qnan\|snan\|other -> fp_class (nnan x), other
1028	// fp_class (ninf x), ninf\|pinf\|other -> fp_class (ninf x), other
1029	if ((Mask & Known.KnownFPClasses) != Mask) {
1030	II.setArgOperand(
1031	i: `1`, v: ConstantInt::get(Ty: Src1->getType(), V: Mask & Known.KnownFPClasses));
1032	return &II;
1033	}
1034
1035	// If none of the tests which can return false are possible, fold to true.
1036	// fp_class (nnan x), ~(qnan\|snan) -> true
1037	// fp_class (ninf x), ~(ninf\|pinf) -> true
1038	if (Mask == Known.KnownFPClasses)
1039	return replaceInstUsesWith(I&: II, V: ConstantInt::get(Ty: II.getType(), V: true));
1040
1041	return nullptr;
1042	}
1043
1044	static std::optional<bool> getKnownSign(Value Op, const* SimplifyQuery &SQ) {
1045	KnownBits Known = computeKnownBits(V: Op, /Depth=/`0`, Q: SQ);
1046	if (Known.isNonNegative())
1047	return false;
1048	if (Known.isNegative())
1049	return true;
1050
1051	Value X, Y;
1052	if (match(V: Op, P: m_NSWSub(L: m_Value(V&: X), R: m_Value(V&: Y))))
1053	return isImpliedByDomCondition(Pred: ICmpInst::ICMP_SLT, LHS: X, RHS: Y, ContextI: SQ.CxtI, DL: SQ.DL);
1054
1055	return std::nullopt;
1056	}
1057
1058	static std::optional<bool> getKnownSignOrZero(Value *Op,
1059	const SimplifyQuery &SQ) {
1060	if (std::optional<bool> Sign = getKnownSign(Op, SQ))
1061	return Sign;
1062
1063	Value X, Y;
1064	if (match(V: Op, P: m_NSWSub(L: m_Value(V&: X), R: m_Value(V&: Y))))
1065	return isImpliedByDomCondition(Pred: ICmpInst::ICMP_SLE, LHS: X, RHS: Y, ContextI: SQ.CxtI, DL: SQ.DL);
1066
1067	return std::nullopt;
1068	}
1069
1070	/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
1071	static bool signBitMustBeTheSame(Value Op0, Value Op1,
1072	const SimplifyQuery &SQ) {
1073	std::optional<bool> Known1 = getKnownSign(Op: Op1, SQ);
1074	if (!Known1)
1075	return false;
1076	std::optional<bool> Known0 = getKnownSign(Op: Op0, SQ);
1077	if (!Known0)
1078	return false;
1079	return Known0 == Known1;
1080	}
1081
1082	/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
1083	/// can trigger other combines.
1084	static Instruction moveAddAfterMinMax(IntrinsicInst II,
1085	InstCombiner::BuilderTy &Builder) {
1086	Intrinsic::ID MinMaxID = II->getIntrinsicID();
1087	assert((MinMaxID == Intrinsic::smax \|\| MinMaxID == Intrinsic::smin \|\|
1088	MinMaxID == Intrinsic::umax \|\| MinMaxID == Intrinsic::umin) &&
1089	"Expected a min or max intrinsic");
1090
1091	// TODO: Match vectors with undef elements, but undef may not propagate.
1092	Value Op0 = II->getArgOperand(i: `0`), Op1 = II->getArgOperand(i: `1`);
1093	Value *X;
1094	const APInt C0, C1;
1095	if (!match(V: Op0, P: m_OneUse(SubPattern: m_Add(L: m_Value(V&: X), R: m_APInt(Res&: C0)))) \|\|
1096	!match(V: Op1, P: m_APInt(Res&: C1)))
1097	return nullptr;
1098
1099	// Check for necessary no-wrap and overflow constraints.
1100	bool IsSigned = MinMaxID == Intrinsic::smax \|\| MinMaxID == Intrinsic::smin;
1101	auto *Add = cast<BinaryOperator>(Val: Op0);
1102	if ((IsSigned && !Add->hasNoSignedWrap()) \|\|
1103	(!IsSigned && !Add->hasNoUnsignedWrap()))
1104	return nullptr;
1105
1106	// If the constant difference overflows, then instsimplify should reduce the
1107	// min/max to the add or C1.
1108	bool Overflow;
1109	APInt CDiff =
1110	IsSigned ? C1->ssub_ov(RHS: C0, Overflow) : C1->usub_ov(RHS: C0, Overflow);
1111	assert(!Overflow && "Expected simplify of min/max");
1112
1113	// min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
1114	// Note: the "mismatched" no-overflow setting does not propagate.
1115	Constant *NewMinMaxC = ConstantInt::get(Ty: II->getType(), V: CDiff);
1116	Value *NewMinMax = Builder.CreateBinaryIntrinsic(ID: MinMaxID, LHS: X, RHS: NewMinMaxC);
1117	return IsSigned ? BinaryOperator::CreateNSWAdd(V1: NewMinMax, V2: Add->getOperand(i_nocapture: `1`))
1118	: BinaryOperator::CreateNUWAdd(V1: NewMinMax, V2: Add->getOperand(i_nocapture: `1`));
1119	}
1120	/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
1121	Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
1122	Type *Ty = MinMax1.getType();
1123
1124	// We are looking for a tree of:
1125	// max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
1126	// Where the min and max could be reversed
1127	Instruction *MinMax2;
1128	BinaryOperator *AddSub;
1129	const APInt MinValue, MaxValue;
1130	if (match(V: &MinMax1, P: m_SMin(L: m_Instruction(I&: MinMax2), R: m_APInt(Res&: MaxValue)))) {
1131	if (!match(V: MinMax2, P: m_SMax(L: m_BinOp(I&: AddSub), R: m_APInt(Res&: MinValue))))
1132	return nullptr;
1133	} else if (match(V: &MinMax1,
1134	P: m_SMax(L: m_Instruction(I&: MinMax2), R: m_APInt(Res&: MinValue)))) {
1135	if (!match(V: MinMax2, P: m_SMin(L: m_BinOp(I&: AddSub), R: m_APInt(Res&: MaxValue))))
1136	return nullptr;
1137	} else
1138	return nullptr;
1139
1140	// Check that the constants clamp a saturate, and that the new type would be
1141	// sensible to convert to.
1142	if (!(MaxValue + `1`).isPowerOf2() \|\| -MinValue != *MaxValue + `1`)
1143	return nullptr;
1144	// In what bitwidth can this be treated as saturating arithmetics?
1145	unsigned NewBitWidth = (*MaxValue + `1`).logBase2() + `1`;
1146	// FIXME: This isn't quite right for vectors, but using the scalar type is a
1147	// good first approximation for what should be done there.
1148	if (!shouldChangeType(FromBitWidth: Ty->getScalarType()->getIntegerBitWidth(), ToBitWidth: NewBitWidth))
1149	return nullptr;
1150
1151	// Also make sure that the inner min/max and the add/sub have one use.
1152	if (!MinMax2->hasOneUse() \|\| !AddSub->hasOneUse())
1153	return nullptr;
1154
1155	// Create the new type (which can be a vector type)
1156	Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
1157
1158	Intrinsic::ID IntrinsicID;
1159	if (AddSub->getOpcode() == Instruction::Add)
1160	IntrinsicID = Intrinsic::sadd_sat;
1161	else if (AddSub->getOpcode() == Instruction::Sub)
1162	IntrinsicID = Intrinsic::ssub_sat;
1163	else
1164	return nullptr;
1165
1166	// The two operands of the add/sub must be nsw-truncatable to the NewTy. This
1167	// is usually achieved via a sext from a smaller type.
1168	if (ComputeMaxSignificantBits(Op: AddSub->getOperand(i_nocapture: `0`), Depth: `0`, CxtI: AddSub) >
1169	NewBitWidth \|\|
1170	ComputeMaxSignificantBits(Op: AddSub->getOperand(i_nocapture: `1`), Depth: `0`, CxtI: AddSub) > NewBitWidth)
1171	return nullptr;
1172
1173	// Finally create and return the sat intrinsic, truncated to the new type
1174	Function *F = Intrinsic::getDeclaration(M: MinMax1.getModule(), id: IntrinsicID, Tys: NewTy);
1175	Value *AT = Builder.CreateTrunc(V: AddSub->getOperand(i_nocapture: `0`), DestTy: NewTy);
1176	Value *BT = Builder.CreateTrunc(V: AddSub->getOperand(i_nocapture: `1`), DestTy: NewTy);
1177	Value *Sat = Builder.CreateCall(Callee: F, Args: {AT, BT});
1178	return CastInst::Create(Instruction::SExt, S: Sat, Ty);
1179	}
1180
1181
1182	/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
1183	/// can only be one of two possible constant values -- turn that into a select
1184	/// of constants.
1185	static Instruction foldClampRangeOfTwo(IntrinsicInst II,
1186	InstCombiner::BuilderTy &Builder) {
1187	Value I0 = II->getArgOperand(i: `0`), I1 = II->getArgOperand(i: `1`);
1188	Value *X;
1189	const APInt C0, C1;
1190	if (!match(V: I1, P: m_APInt(Res&: C1)) \|\| !I0->hasOneUse())
1191	return nullptr;
1192
1193	CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
1194	switch (II->getIntrinsicID()) {
1195	case Intrinsic::smax:
1196	if (match(V: I0, P: m_SMin(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && C0 == C1 + `1`)
1197	Pred = ICmpInst::ICMP_SGT;
1198	break;
1199	case Intrinsic::smin:
1200	if (match(V: I0, P: m_SMax(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && C1 == C0 + `1`)
1201	Pred = ICmpInst::ICMP_SLT;
1202	break;
1203	case Intrinsic::umax:
1204	if (match(V: I0, P: m_UMin(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && C0 == C1 + `1`)
1205	Pred = ICmpInst::ICMP_UGT;
1206	break;
1207	case Intrinsic::umin:
1208	if (match(V: I0, P: m_UMax(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && C1 == C0 + `1`)
1209	Pred = ICmpInst::ICMP_ULT;
1210	break;
1211	default:
1212	llvm_unreachable("Expected min/max intrinsic");
1213	}
1214	if (Pred == CmpInst::BAD_ICMP_PREDICATE)
1215	return nullptr;
1216
1217	// max (min X, 42), 41 --> X > 41 ? 42 : 41
1218	// min (max X, 42), 43 --> X < 43 ? 42 : 43
1219	Value *Cmp = Builder.CreateICmp(P: Pred, LHS: X, RHS: I1);
1220	return SelectInst::Create(C: Cmp, S1: ConstantInt::get(Ty: II->getType(), V: *C0), S2: I1);
1221	}
1222
1223	/// If this min/max has a constant operand and an operand that is a matching
1224	/// min/max with a constant operand, constant-fold the 2 constant operands.
1225	static Value reassociateMinMaxWithConstants(IntrinsicInst II,
1226	IRBuilderBase &Builder,
1227	const SimplifyQuery &SQ) {
1228	Intrinsic::ID MinMaxID = II->getIntrinsicID();
1229	auto *LHS = dyn_cast<MinMaxIntrinsic>(Val: II->getArgOperand(i: `0`));
1230	if (!LHS)
1231	return nullptr;
1232
1233	Constant C0, C1;
1234	if (!match(V: LHS->getArgOperand(i: `1`), P: m_ImmConstant(C&: C0)) \|\|
1235	!match(V: II->getArgOperand(i: `1`), P: m_ImmConstant(C&: C1)))
1236	return nullptr;
1237
1238	// max (max X, C0), C1 --> max X, (max C0, C1)
1239	// min (min X, C0), C1 --> min X, (min C0, C1)
1240	// umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1)
1241	// smin (umin X, nneg C0), nneg C1 --> umin X, (smin C0, C1)
1242	Intrinsic::ID InnerMinMaxID = LHS->getIntrinsicID();
1243	if (InnerMinMaxID != MinMaxID &&
1244	!(((MinMaxID == Intrinsic::umax && InnerMinMaxID == Intrinsic::smax) \|\|
1245	(MinMaxID == Intrinsic::smin && InnerMinMaxID == Intrinsic::umin)) &&
1246	isKnownNonNegative(V: C0, SQ) && isKnownNonNegative(V: C1, SQ)))
1247	return nullptr;
1248
1249	ICmpInst::Predicate Pred = MinMaxIntrinsic::getPredicate(ID: MinMaxID);
1250	Value *CondC = Builder.CreateICmp(P: Pred, LHS: C0, RHS: C1);
1251	Value *NewC = Builder.CreateSelect(C: CondC, True: C0, False: C1);
1252	return Builder.CreateIntrinsic(ID: InnerMinMaxID, Types: II->getType(),
1253	Args: {LHS->getArgOperand(i: `0`), NewC});
1254	}
1255
1256	/// If this min/max has a matching min/max operand with a constant, try to push
1257	/// the constant operand into this instruction. This can enable more folds.
1258	static Instruction *
1259	reassociateMinMaxWithConstantInOperand(IntrinsicInst *II,
1260	InstCombiner::BuilderTy &Builder) {
1261	// Match and capture a min/max operand candidate.
1262	Value X, Y;
1263	Constant *C;
1264	Instruction *Inner;
1265	if (!match(V: II, P: m_c_MaxOrMin(L: m_OneUse(SubPattern: m_CombineAnd(
1266	L: m_Instruction(I&: Inner),
1267	R: m_MaxOrMin(L: m_Value(V&: X), R: m_ImmConstant(C)))),
1268	R: m_Value(V&: Y))))
1269	return nullptr;
1270
1271	// The inner op must match. Check for constants to avoid infinite loops.
1272	Intrinsic::ID MinMaxID = II->getIntrinsicID();
1273	auto *InnerMM = dyn_cast<IntrinsicInst>(Val: Inner);
1274	if (!InnerMM \|\| InnerMM->getIntrinsicID() != MinMaxID \|\|
1275	match(V: X, P: m_ImmConstant()) \|\| match(V: Y, P: m_ImmConstant()))
1276	return nullptr;
1277
1278	// max (max X, C), Y --> max (max X, Y), C
1279	Function *MinMax =
1280	Intrinsic::getDeclaration(M: II->getModule(), id: MinMaxID, Tys: II->getType());
1281	Value *NewInner = Builder.CreateBinaryIntrinsic(ID: MinMaxID, LHS: X, RHS: Y);
1282	NewInner->takeName(V: Inner);
1283	return CallInst::Create(Func: MinMax, Args: {NewInner, C});
1284	}
1285
1286	/// Reduce a sequence of min/max intrinsics with a common operand.
1287	static Instruction factorizeMinMaxTree(IntrinsicInst II) {
1288	// Match 3 of the same min/max ops. Example: umin(umin(), umin()).
1289	auto *LHS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: `0`));
1290	auto *RHS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: `1`));
1291	Intrinsic::ID MinMaxID = II->getIntrinsicID();
1292	if (!LHS \|\| !RHS \|\| LHS->getIntrinsicID() != MinMaxID \|\|
1293	RHS->getIntrinsicID() != MinMaxID \|\|
1294	(!LHS->hasOneUse() && !RHS->hasOneUse()))
1295	return nullptr;
1296
1297	Value *A = LHS->getArgOperand(i: `0`);
1298	Value *B = LHS->getArgOperand(i: `1`);
1299	Value *C = RHS->getArgOperand(i: `0`);
1300	Value *D = RHS->getArgOperand(i: `1`);
1301
1302	// Look for a common operand.
1303	Value MinMaxOp = nullptr*;
1304	Value ThirdOp = nullptr*;
1305	if (LHS->hasOneUse()) {
1306	// If the LHS is only used in this chain and the RHS is used outside of it,
1307	// reuse the RHS min/max because that will eliminate the LHS.
1308	if (D == A \|\| C == A) {
1309	// min(min(a, b), min(c, a)) --> min(min(c, a), b)
1310	// min(min(a, b), min(a, d)) --> min(min(a, d), b)
1311	MinMaxOp = RHS;
1312	ThirdOp = B;
1313	} else if (D == B \|\| C == B) {
1314	// min(min(a, b), min(c, b)) --> min(min(c, b), a)
1315	// min(min(a, b), min(b, d)) --> min(min(b, d), a)
1316	MinMaxOp = RHS;
1317	ThirdOp = A;
1318	}
1319	} else {
1320	assert(RHS->hasOneUse() && "Expected one-use operand");
1321	// Reuse the LHS. This will eliminate the RHS.
1322	if (D == A \|\| D == B) {
1323	// min(min(a, b), min(c, a)) --> min(min(a, b), c)
1324	// min(min(a, b), min(c, b)) --> min(min(a, b), c)
1325	MinMaxOp = LHS;
1326	ThirdOp = C;
1327	} else if (C == A \|\| C == B) {
1328	// min(min(a, b), min(b, d)) --> min(min(a, b), d)
1329	// min(min(a, b), min(c, b)) --> min(min(a, b), d)
1330	MinMaxOp = LHS;
1331	ThirdOp = D;
1332	}
1333	}
1334
1335	if (!MinMaxOp \|\| !ThirdOp)
1336	return nullptr;
1337
1338	Module *Mod = II->getModule();
1339	Function *MinMax = Intrinsic::getDeclaration(M: Mod, id: MinMaxID, Tys: II->getType());
1340	return CallInst::Create(Func: MinMax, Args: { MinMaxOp, ThirdOp });
1341	}
1342
1343	/// If all arguments of the intrinsic are unary shuffles with the same mask,
1344	/// try to shuffle after the intrinsic.
1345	static Instruction *
1346	foldShuffledIntrinsicOperands(IntrinsicInst *II,
1347	InstCombiner::BuilderTy &Builder) {
1348	// TODO: This should be extended to handle other intrinsics like fshl, ctpop,
1349	// etc. Use llvm::isTriviallyVectorizable() and related to determine
1350	// which intrinsics are safe to shuffle?
1351	switch (II->getIntrinsicID()) {
1352	case Intrinsic::smax:
1353	case Intrinsic::smin:
1354	case Intrinsic::umax:
1355	case Intrinsic::umin:
1356	case Intrinsic::fma:
1357	case Intrinsic::fshl:
1358	case Intrinsic::fshr:
1359	break;
1360	default:
1361	return nullptr;
1362	}
1363
1364	Value *X;
1365	ArrayRef<int> Mask;
1366	if (!match(V: II->getArgOperand(i: `0`),
1367	P: m_Shuffle(v1: m_Value(V&: X), v2: m_Undef(), mask: m_Mask (Mask))))
1368	return nullptr;
1369
1370	// At least 1 operand must have 1 use because we are creating 2 instructions.
1371	if (none_of(Range: II->args(), P: [](Value V) { return* V->hasOneUse(); }))
1372	return nullptr;
1373
1374	// See if all arguments are shuffled with the same mask.
1375	SmallVector<Value *, `4`> NewArgs(II->arg_size());
1376	NewArgs [`0`] = X;
1377	Type *SrcTy = X->getType();
1378	for (unsigned i = `1`, e = II->arg_size(); i != e; ++i) {
1379	if (!match(V: II->getArgOperand(i),
1380	P: m_Shuffle(v1: m_Value(V&: X), v2: m_Undef(), mask: m_SpecificMask (Mask))) \|\|
1381	X->getType() != SrcTy)
1382	return nullptr;
1383	NewArgs [i] = X;
1384	}
1385
1386	// intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
1387	Instruction FPI = isa<FPMathOperator>(Val: II) ? II : nullptr*;
1388	Value *NewIntrinsic =
1389	Builder.CreateIntrinsic(ID: II->getIntrinsicID(), Types: SrcTy, Args: NewArgs, FMFSource: FPI);
1390	return new ShuffleVectorInst (NewIntrinsic, Mask);
1391	}
1392
1393	/// Fold the following cases and accepts bswap and bitreverse intrinsics:
1394	/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
1395	/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
1396	template <Intrinsic::ID IntrID>
1397	static Instruction foldBitOrderCrossLogicOp(Value V,
1398	InstCombiner::BuilderTy &Builder) {
1399	static_assert(IntrID == Intrinsic::bswap \|\| IntrID == Intrinsic::bitreverse,
1400	"This helper only supports BSWAP and BITREVERSE intrinsics");
1401
1402	Value X, Y;
1403	// Find bitwise logic op. Check that it is a BinaryOperator explicitly so we
1404	// don't match ConstantExpr that aren't meaningful for this transform.
1405	if (match(V, P: m_OneUse(SubPattern: m_BitwiseLogic(L: m_Value(V&: X), R: m_Value(V&: Y)))) &&
1406	isa<BinaryOperator>(Val: V)) {
1407	Value OldReorderX, OldReorderY;
1408	BinaryOperator::BinaryOps Op = cast<BinaryOperator>(Val: V)->getOpcode();
1409
1410	// If both X and Y are bswap/bitreverse, the transform reduces the number
1411	// of instructions even if there's multiuse.
1412	// If only one operand is bswap/bitreverse, we need to ensure the operand
1413	// have only one use.
1414	if (match(X, m_Intrinsic<IntrID>(m_Value(V&: OldReorderX))) &&
1415	match(Y, m_Intrinsic<IntrID>(m_Value(V&: OldReorderY)))) {
1416	return BinaryOperator::Create(Op, S1: OldReorderX, S2: OldReorderY);
1417	}
1418
1419	if (match(X, m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: OldReorderX))))) {
1420	Value *NewReorder = Builder.CreateUnaryIntrinsic(ID: IntrID, V: Y);
1421	return BinaryOperator::Create(Op, S1: OldReorderX, S2: NewReorder);
1422	}
1423
1424	if (match(Y, m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: OldReorderY))))) {
1425	Value *NewReorder = Builder.CreateUnaryIntrinsic(ID: IntrID, V: X);
1426	return BinaryOperator::Create(Op, S1: NewReorder, S2: OldReorderY);
1427	}
1428	}
1429	return nullptr;
1430	}
1431
1432	static Value simplifyReductionOperand(Value Arg, bool CanReorderLanes) {
1433	if (!CanReorderLanes)
1434	return nullptr;
1435
1436	Value *V;
1437	if (match(V: Arg, P: m_VecReverse(Op0: m_Value(V))))
1438	return V;
1439
1440	ArrayRef<int> Mask;
1441	if (!isa<FixedVectorType>(Val: Arg->getType()) \|\|
1442	!match(V: Arg, P: m_Shuffle(v1: m_Value(V), v2: m_Undef(), mask: m_Mask (Mask))) \|\|
1443	!cast<ShuffleVectorInst>(Val: Arg)->isSingleSource())
1444	return nullptr;
1445
1446	int Sz = Mask.size();
1447	SmallBitVector UsedIndices(Sz);
1448	for (int Idx : Mask) {
1449	if (Idx == PoisonMaskElem \|\| UsedIndices.test(Idx))
1450	return nullptr;
1451	UsedIndices.set(Idx);
1452	}
1453
1454	// Can remove shuffle iff just shuffled elements, no repeats, undefs, or
1455	// other changes.
1456	return UsedIndices.all() ? V : nullptr;
1457	}
1458
1459	/// Fold an unsigned minimum of trailing or leading zero bits counts:
1460	/// umin(cttz(CtOp, ZeroUndef), ConstOp) --> cttz(CtOp \| (1 << ConstOp))
1461	/// umin(ctlz(CtOp, ZeroUndef), ConstOp) --> ctlz(CtOp \| (SignedMin
1462	/// >> ConstOp))
1463	template <Intrinsic::ID IntrID>
1464	static Value *
1465	foldMinimumOverTrailingOrLeadingZeroCount(Value I0, Value I1,
1466	const DataLayout &DL,
1467	InstCombiner::BuilderTy &Builder) {
1468	static_assert(IntrID == Intrinsic::cttz \|\| IntrID == Intrinsic::ctlz,
1469	"This helper only supports cttz and ctlz intrinsics");
1470
1471	Value *CtOp;
1472	Value *ZeroUndef;
1473	if (!match(I0,
1474	m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: CtOp), m_Value(V&: ZeroUndef)))))
1475	return nullptr;
1476
1477	unsigned BitWidth = I1->getType()->getScalarSizeInBits();
1478	auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
1479	if (!match(I1, m_CheckedInt(LessBitWidth)))
1480	// We have a constant >= BitWidth (which can be handled by CVP)
1481	// or a non-splat vector with elements < and >= BitWidth
1482	return nullptr;
1483
1484	Type *Ty = I1->getType();
1485	Constant *NewConst = ConstantFoldBinaryOpOperands(
1486	Opcode: IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr,
1487	LHS: IntrID == Intrinsic::cttz
1488	? ConstantInt::get(Ty, V: `1`)
1489	: ConstantInt::get(Ty, V: APInt::getSignedMinValue(numBits: BitWidth)),
1490	RHS: cast<Constant>(Val: I1), DL);
1491	return Builder.CreateBinaryIntrinsic(
1492	ID: IntrID, LHS: Builder.CreateOr(LHS: CtOp, RHS: NewConst),
1493	RHS: ConstantInt::getTrue(Ty: ZeroUndef->getType()));
1494	}
1495
1496	/// CallInst simplification. This mostly only handles folding of intrinsic
1497	/// instructions. For normal calls, it allows visitCallBase to do the heavy
1498	/// lifting.
1499	Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
1500	// Don't try to simplify calls without uses. It will not do anything useful,
1501	// but will result in the following folds being skipped.
1502	if (!CI.use_empty()) {
1503	SmallVector<Value *, `4`> Args;
1504	Args.reserve(N: CI.arg_size());
1505	for (Value *Op : CI.args())
1506	Args.push_back(Elt: Op);
1507	if (Value *V = simplifyCall(Call: &CI, Callee: CI.getCalledOperand(), Args,
1508	Q: SQ.getWithInstruction(I: &CI)))
1509	return replaceInstUsesWith(I&: CI, V);
1510	}
1511
1512	if (Value *FreedOp = getFreedOperand(CB: &CI, TLI: &TLI))
1513	return visitFree(FI&: CI, FreedOp);
1514
1515	// If the caller function (i.e. us, the function that contains this CallInst)
1516	// is nounwind, mark the call as nounwind, even if the callee isn't.
1517	if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1518	CI.setDoesNotThrow();
1519	return &CI;
1520	}
1521
1522	IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: &CI);
1523	if (!II) return visitCallBase(Call&: CI);
1524
1525	// For atomic unordered mem intrinsics if len is not a positive or
1526	// not a multiple of element size then behavior is undefined.
1527	if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(Val: II))
1528	if (ConstantInt *NumBytes = dyn_cast<ConstantInt>(Val: AMI->getLength()))
1529	if (NumBytes->isNegative() \|\|
1530	(NumBytes->getZExtValue() % AMI->getElementSizeInBytes() != `0`)) {
1531	CreateNonTerminatorUnreachable(InsertAt: AMI);
1532	assert(AMI->getType()->isVoidTy() &&
1533	"non void atomic unordered mem intrinsic");
1534	return eraseInstFromFunction(I&: *AMI);
1535	}
1536
1537	// Intrinsics cannot occur in an invoke or a callbr, so handle them here
1538	// instead of in visitCallBase.
1539	if (auto *MI = dyn_cast<AnyMemIntrinsic>(Val: II)) {
1540	bool Changed = false;
1541
1542	// memmove/cpy/set of zero bytes is a noop.
1543	if (Constant *NumBytes = dyn_cast<Constant>(Val: MI->getLength())) {
1544	if (NumBytes->isNullValue())
1545	return eraseInstFromFunction(I&: CI);
1546	}
1547
1548	// No other transformations apply to volatile transfers.
1549	if (auto *M = dyn_cast<MemIntrinsic>(Val: MI))
1550	if (M->isVolatile())
1551	return nullptr;
1552
1553	// If we have a memmove and the source operation is a constant global,
1554	// then the source and dest pointers can't alias, so we can change this
1555	// into a call to memcpy.
1556	if (auto *MMI = dyn_cast<AnyMemMoveInst>(Val: MI)) {
1557	if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(Val: MMI->getSource()))
1558	if (GVSrc->isConstant()) {
1559	Module *M = CI.getModule();
1560	Intrinsic::ID MemCpyID =
1561	isa<AtomicMemMoveInst>(Val: MMI)
1562	? Intrinsic::memcpy_element_unordered_atomic
1563	: Intrinsic::memcpy;
1564	Type *Tys[`3`] = { CI.getArgOperand(i: `0`)->getType(),
1565	CI.getArgOperand(i: `1`)->getType(),
1566	CI.getArgOperand(i: `2`)->getType() };
1567	CI.setCalledFunction(Intrinsic::getDeclaration(M, id: MemCpyID, Tys));
1568	Changed = true;
1569	}
1570	}
1571
1572	if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(Val: MI)) {
1573	// memmove(x,x,size) -> noop.
1574	if (MTI->getSource() == MTI->getDest())
1575	return eraseInstFromFunction(I&: CI);
1576	}
1577
1578	// If we can determine a pointer alignment that is bigger than currently
1579	// set, update the alignment.
1580	if (auto *MTI = dyn_cast<AnyMemTransferInst>(Val: MI)) {
1581	if (Instruction *I = SimplifyAnyMemTransfer(MI: MTI))
1582	return I;
1583	} else if (auto *MSI = dyn_cast<AnyMemSetInst>(Val: MI)) {
1584	if (Instruction *I = SimplifyAnyMemSet(MI: MSI))
1585	return I;
1586	}
1587
1588	if (Changed) return II;
1589	}
1590
1591	// For fixed width vector result intrinsics, use the generic demanded vector
1592	// support.
1593	if (auto *IIFVTy = dyn_cast<FixedVectorType>(Val: II->getType())) {
1594	auto VWidth = IIFVTy->getNumElements();
1595	APInt PoisonElts(VWidth, `0`);
1596	APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
1597	if (Value *V = SimplifyDemandedVectorElts(V: II, DemandedElts: AllOnesEltMask, PoisonElts)) {
1598	if (V != II)
1599	return replaceInstUsesWith(I&: *II, V);
1600	return II;
1601	}
1602	}
1603
1604	if (II->isCommutative()) {
1605	if (auto Pair = matchSymmetricPair(LHS: II->getOperand(i_nocapture: `0`), RHS: II->getOperand(i_nocapture: `1`))) {
1606	replaceOperand(I&: *II, OpNum: `0`, V: Pair ->first);
1607	replaceOperand(I&: *II, OpNum: `1`, V: Pair ->second);
1608	return II;
1609	}
1610
1611	if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(Call&: CI))
1612	return NewCall;
1613	}
1614
1615	// Unused constrained FP intrinsic calls may have declared side effect, which
1616	// prevents it from being removed. In some cases however the side effect is
1617	// actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
1618	// returns a replacement, the call may be removed.
1619	if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(Val: CI)) {
1620	if (simplifyConstrainedFPCall(Call: &CI, Q: SQ.getWithInstruction(I: &CI)))
1621	return eraseInstFromFunction(I&: CI);
1622	}
1623
1624	Intrinsic::ID IID = II->getIntrinsicID();
1625	switch (IID) {
1626	case Intrinsic::objectsize: {
1627	SmallVector<Instruction *> InsertedInstructions;
1628	if (Value V = lowerObjectSizeCall(ObjectSize: II, DL, TLI: &TLI, AA, /MustSucceed=/*false,
1629	InsertedInstructions: &InsertedInstructions)) {
1630	for (Instruction *Inserted : InsertedInstructions)
1631	Worklist.add(I: Inserted);
1632	return replaceInstUsesWith(I&: CI, V);
1633	}
1634	return nullptr;
1635	}
1636	case Intrinsic::abs: {
1637	Value *IIOperand = II->getArgOperand(i: `0`);
1638	bool IntMinIsPoison = cast<Constant>(Val: II->getArgOperand(i: `1`))->isOneValue();
1639
1640	// abs(-x) -> abs(x)
1641	// TODO: Copy nsw if it was present on the neg?
1642	Value *X;
1643	if (match(V: IIOperand, P: m_Neg(V: m_Value(V&: X))))
1644	return replaceOperand(I&: *II, OpNum: `0`, V: X);
1645	if (match(V: IIOperand, P: m_Select(C: m_Value(), L: m_Value(V&: X), R: m_Neg(V: m_Deferred(V: X)))))
1646	return replaceOperand(I&: *II, OpNum: `0`, V: X);
1647	if (match(V: IIOperand, P: m_Select(C: m_Value(), L: m_Neg(V: m_Value(V&: X)), R: m_Deferred(V: X))))
1648	return replaceOperand(I&: *II, OpNum: `0`, V: X);
1649
1650	Value *Y;
1651	// abs(a abs(b)) -> abs(a * b)*
1652	if (match(V: IIOperand,
1653	P: m_OneUse(SubPattern: m_c_Mul(L: m_Value(V&: X),
1654	R: m_Intrinsic<Intrinsic::abs>(Op0: m_Value(V&: Y)))))) {
1655	bool NSW =
1656	cast<Instruction>(Val: IIOperand)->hasNoSignedWrap() && IntMinIsPoison;
1657	auto *XY = NSW ? Builder.CreateNSWMul(LHS: X, RHS: Y) : Builder.CreateMul(LHS: X, RHS: Y);
1658	return replaceOperand(I&: *II, OpNum: `0`, V: XY);
1659	}
1660
1661	if (std::optional<bool> Known =
1662	getKnownSignOrZero(Op: IIOperand, SQ: SQ.getWithInstruction(I: II))) {
1663	// abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)
1664	// abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)
1665	if (!*Known)
1666	return replaceInstUsesWith(I&: *II, V: IIOperand);
1667
1668	// abs(x) -> -x if x < 0
1669	// abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y)
1670	if (IntMinIsPoison)
1671	return BinaryOperator::CreateNSWNeg(Op: IIOperand);
1672	return BinaryOperator::CreateNeg(Op: IIOperand);
1673	}
1674
1675	// abs (sext X) --> zext (abs X)*
1676	// Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
1677	if (match(V: IIOperand, P: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: X))))) {
1678	Value *NarrowAbs =
1679	Builder.CreateBinaryIntrinsic(ID: Intrinsic::abs, LHS: X, RHS: Builder.getFalse());
1680	return CastInst::Create(Instruction::ZExt, S: NarrowAbs, Ty: II->getType());
1681	}
1682
1683	// Match a complicated way to check if a number is odd/even:
1684	// abs (srem X, 2) --> and X, 1
1685	const APInt *C;
1686	if (match(V: IIOperand, P: m_SRem(L: m_Value(V&: X), R: m_APInt(Res&: C))) && *C == `2`)
1687	return BinaryOperator::CreateAnd(V1: X, V2: ConstantInt::get(Ty: II->getType(), V: `1`));
1688
1689	break;
1690	}
1691	case Intrinsic::umin: {
1692	Value I0 = II->getArgOperand(i: `0`), I1 = II->getArgOperand(i: `1`);
1693	// umin(x, 1) == zext(x != 0)
1694	if (match(V: I1, P: m_One())) {
1695	assert(II->getType()->getScalarSizeInBits() != `1` &&
1696	"Expected simplify of umin with max constant");
1697	Value *Zero = Constant::getNullValue(Ty: I0->getType());
1698	Value *Cmp = Builder.CreateICmpNE(LHS: I0, RHS: Zero);
1699	return CastInst::Create(Instruction::ZExt, S: Cmp, Ty: II->getType());
1700	}
1701	// umin(cttz(x), const) --> cttz(x \| (1 << const))
1702	if (Value *FoldedCttz =
1703	foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::cttz>(
1704	I0, I1, DL, Builder))
1705	return replaceInstUsesWith(I&: *II, V: FoldedCttz);
1706	// umin(ctlz(x), const) --> ctlz(x \| (SignedMin >> const))
1707	if (Value *FoldedCtlz =
1708	foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::ctlz>(
1709	I0, I1, DL, Builder))
1710	return replaceInstUsesWith(I&: *II, V: FoldedCtlz);
1711	[[fallthrough]];
1712	}
1713	case Intrinsic::umax: {
1714	Value I0 = II->getArgOperand(i: `0`), I1 = II->getArgOperand(i: `1`);
1715	Value X, Y;
1716	if (match(V: I0, P: m_ZExt(Op: m_Value(V&: X))) && match(V: I1, P: m_ZExt(Op: m_Value(V&: Y))) &&
1717	(I0->hasOneUse() \|\| I1->hasOneUse()) && X->getType() == Y->getType()) {
1718	Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y);
1719	return CastInst::Create(Instruction::ZExt, S: NarrowMaxMin, Ty: II->getType());
1720	}
1721	Constant *C;
1722	if (match(V: I0, P: m_ZExt(Op: m_Value(V&: X))) && match(V: I1, P: m_Constant(C)) &&
1723	I0->hasOneUse()) {
1724	if (Constant *NarrowC = getLosslessUnsignedTrunc(C, TruncTy: X->getType())) {
1725	Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: NarrowC);
1726	return CastInst::Create(Instruction::ZExt, S: NarrowMaxMin, Ty: II->getType());
1727	}
1728	}
1729	// If both operands of unsigned min/max are sign-extended, it is still ok
1730	// to narrow the operation.
1731	[[fallthrough]];
1732	}
1733	case Intrinsic::smax:
1734	case Intrinsic::smin: {
1735	Value I0 = II->getArgOperand(i: `0`), I1 = II->getArgOperand(i: `1`);
1736	Value X, Y;
1737	if (match(V: I0, P: m_SExt(Op: m_Value(V&: X))) && match(V: I1, P: m_SExt(Op: m_Value(V&: Y))) &&
1738	(I0->hasOneUse() \|\| I1->hasOneUse()) && X->getType() == Y->getType()) {
1739	Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y);
1740	return CastInst::Create(Instruction::SExt, S: NarrowMaxMin, Ty: II->getType());
1741	}
1742
1743	Constant *C;
1744	if (match(V: I0, P: m_SExt(Op: m_Value(V&: X))) && match(V: I1, P: m_Constant(C)) &&
1745	I0->hasOneUse()) {
1746	if (Constant *NarrowC = getLosslessSignedTrunc(C, TruncTy: X->getType())) {
1747	Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: NarrowC);
1748	return CastInst::Create(Instruction::SExt, S: NarrowMaxMin, Ty: II->getType());
1749	}
1750	}
1751
1752	// umin(i1 X, i1 Y) -> and i1 X, Y
1753	// smax(i1 X, i1 Y) -> and i1 X, Y
1754	if ((IID == Intrinsic::umin \|\| IID == Intrinsic::smax) &&
1755	II->getType()->isIntOrIntVectorTy(BitWidth: `1`)) {
1756	return BinaryOperator::CreateAnd(V1: I0, V2: I1);
1757	}
1758
1759	// umax(i1 X, i1 Y) -> or i1 X, Y
1760	// smin(i1 X, i1 Y) -> or i1 X, Y
1761	if ((IID == Intrinsic::umax \|\| IID == Intrinsic::smin) &&
1762	II->getType()->isIntOrIntVectorTy(BitWidth: `1`)) {
1763	return BinaryOperator::CreateOr(V1: I0, V2: I1);
1764	}
1765
1766	if (IID == Intrinsic::smax \|\| IID == Intrinsic::smin) {
1767	// smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
1768	// smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
1769	// TODO: Canonicalize neg after min/max if I1 is constant.
1770	if (match(V: I0, P: m_NSWNeg(V: m_Value(V&: X))) && match(V: I1, P: m_NSWNeg(V: m_Value(V&: Y))) &&
1771	(I0->hasOneUse() \|\| I1->hasOneUse())) {
1772	Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMaxID: IID);
1773	Value *InvMaxMin = Builder.CreateBinaryIntrinsic(ID: InvID, LHS: X, RHS: Y);
1774	return BinaryOperator::CreateNSWNeg(Op: InvMaxMin);
1775	}
1776	}
1777
1778	// (umax X, (xor X, Pow2))
1779	// -> (or X, Pow2)
1780	// (umin X, (xor X, Pow2))
1781	// -> (and X, ~Pow2)
1782	// (smax X, (xor X, Pos_Pow2))
1783	// -> (or X, Pos_Pow2)
1784	// (smin X, (xor X, Pos_Pow2))
1785	// -> (and X, ~Pos_Pow2)
1786	// (smax X, (xor X, Neg_Pow2))
1787	// -> (and X, ~Neg_Pow2)
1788	// (smin X, (xor X, Neg_Pow2))
1789	// -> (or X, Neg_Pow2)
1790	if ((match(V: I0, P: m_c_Xor(L: m_Specific(V: I1), R: m_Value(V&: X))) \|\|
1791	match(V: I1, P: m_c_Xor(L: m_Specific(V: I0), R: m_Value(V&: X)))) &&
1792	isKnownToBeAPowerOfTwo(V: X, / OrZero / true)) {
1793	bool UseOr = IID == Intrinsic::smax \|\| IID == Intrinsic::umax;
1794	bool UseAndN = IID == Intrinsic::smin \|\| IID == Intrinsic::umin;
1795
1796	if (IID == Intrinsic::smax \|\| IID == Intrinsic::smin) {
1797	auto KnownSign = getKnownSign(Op: X, SQ: SQ.getWithInstruction(I: II));
1798	if (KnownSign == std::nullopt) {
1799	UseOr = false;
1800	UseAndN = false;
1801	} else if (KnownSign /* true is Signed. /) {
1802	UseOr ^= true;
1803	UseAndN ^= true;
1804	Type *Ty = I0->getType();
1805	// Negative power of 2 must be IntMin. It's possible to be able to
1806	// prove negative / power of 2 without actually having known bits, so
1807	// just get the value by hand.
1808	X = Constant::getIntegerValue(
1809	Ty, V: APInt::getSignedMinValue(numBits: Ty->getScalarSizeInBits()));
1810	}
1811	}
1812	if (UseOr)
1813	return BinaryOperator::CreateOr(V1: I0, V2: X);
1814	else if (UseAndN)
1815	return BinaryOperator::CreateAnd(V1: I0, V2: Builder.CreateNot(V: X));
1816	}
1817
1818	// If we can eliminate ~A and Y is free to invert:
1819	// max ~A, Y --> ~(min A, ~Y)
1820	//
1821	// Examples:
1822	// max ~A, ~Y --> ~(min A, Y)
1823	// max ~A, C --> ~(min A, ~C)
1824	// max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
1825	auto moveNotAfterMinMax = [&](Value X, Value Y) -> Instruction * {
1826	Value *A;
1827	if (match(V: X, P: m_OneUse(SubPattern: m_Not(V: m_Value(V&: A)))) &&
1828	!isFreeToInvert(V: A, WillInvertAllUses: A->hasOneUse())) {
1829	if (Value *NotY = getFreelyInverted(V: Y, WillInvertAllUses: Y->hasOneUse(), Builder: &Builder)) {
1830	Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMaxID: IID);
1831	Value *InvMaxMin = Builder.CreateBinaryIntrinsic(ID: InvID, LHS: A, RHS: NotY);
1832	return BinaryOperator::CreateNot(Op: InvMaxMin);
1833	}
1834	}
1835	return nullptr;
1836	};
1837
1838	if (Instruction *I = moveNotAfterMinMax (I0, I1))
1839	return I;
1840	if (Instruction *I = moveNotAfterMinMax (I1, I0))
1841	return I;
1842
1843	if (Instruction *I = moveAddAfterMinMax(II, Builder))
1844	return I;
1845
1846	// minmax (X & NegPow2C, Y & NegPow2C) --> minmax(X, Y) & NegPow2C
1847	const APInt *RHSC;
1848	if (match(V: I0, P: m_OneUse(SubPattern: m_And(L: m_Value(V&: X), R: m_NegatedPower2(V&: RHSC)))) &&
1849	match(V: I1, P: m_OneUse(SubPattern: m_And(L: m_Value(V&: Y), R: m_SpecificInt(V: *RHSC)))))
1850	return BinaryOperator::CreateAnd(V1: Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y),
1851	V2: ConstantInt::get(Ty: II->getType(), V: *RHSC));
1852
1853	// smax(X, -X) --> abs(X)
1854	// smin(X, -X) --> -abs(X)
1855	// umax(X, -X) --> -abs(X)
1856	// umin(X, -X) --> abs(X)
1857	if (isKnownNegation(X: I0, Y: I1)) {
1858	// We can choose either operand as the input to abs(), but if we can
1859	// eliminate the only use of a value, that's better for subsequent
1860	// transforms/analysis.
1861	if (I0->hasOneUse() && !I1->hasOneUse())
1862	std::swap(a&: I0, b&: I1);
1863
1864	// This is some variant of abs(). See if we can propagate 'nsw' to the abs
1865	// operation and potentially its negation.
1866	bool IntMinIsPoison = isKnownNegation(X: I0, Y: I1, / NeedNSW / true);
1867	Value *Abs = Builder.CreateBinaryIntrinsic(
1868	ID: Intrinsic::abs, LHS: I0,
1869	RHS: ConstantInt::getBool(Context&: II->getContext(), V: IntMinIsPoison));
1870
1871	// We don't have a "nabs" intrinsic, so negate if needed based on the
1872	// max/min operation.
1873	if (IID == Intrinsic::smin \|\| IID == Intrinsic::umax)
1874	Abs = Builder.CreateNeg(V: Abs, Name: "nabs", HasNSW: IntMinIsPoison);
1875	return replaceInstUsesWith(I&: CI, V: Abs);
1876	}
1877
1878	if (Instruction *Sel = foldClampRangeOfTwo(II, Builder))
1879	return Sel;
1880
1881	if (Instruction SAdd = matchSAddSubSat(MinMax1&: II))
1882	return SAdd;
1883
1884	if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder, SQ))
1885	return replaceInstUsesWith(I&: *II, V: NewMinMax);
1886
1887	if (Instruction *R = reassociateMinMaxWithConstantInOperand(II, Builder))
1888	return R;
1889
1890	if (Instruction *NewMinMax = factorizeMinMaxTree(II))
1891	return NewMinMax;
1892
1893	// Try to fold minmax with constant RHS based on range information
1894	if (match(V: I1, P: m_APIntAllowPoison(Res&: RHSC))) {
1895	ICmpInst::Predicate Pred =
1896	ICmpInst::getNonStrictPredicate(pred: MinMaxIntrinsic::getPredicate(ID: IID));
1897	bool IsSigned = MinMaxIntrinsic::isSigned(ID: IID);
1898	ConstantRange LHS_CR = computeConstantRangeIncludingKnownBits(
1899	V: I0, ForSigned: IsSigned, SQ: SQ.getWithInstruction(I: II));
1900	if (!LHS_CR.isFullSet()) {
1901	if (LHS_CR.icmp(Pred, Other: *RHSC))
1902	return replaceInstUsesWith(I&: *II, V: I0);
1903	if (LHS_CR.icmp(Pred: ICmpInst::getSwappedPredicate(pred: Pred), Other: *RHSC))
1904	return replaceInstUsesWith(I&: *II,
1905	V: ConstantInt::get(Ty: II->getType(), V: *RHSC));
1906	}
1907	}
1908
1909	break;
1910	}
1911	case Intrinsic::bitreverse: {
1912	Value *IIOperand = II->getArgOperand(i: `0`);
1913	// bitrev (zext i1 X to ?) --> X ? SignBitC : 0
1914	Value *X;
1915	if (match(V: IIOperand, P: m_ZExt(Op: m_Value(V&: X))) &&
1916	X->getType()->isIntOrIntVectorTy(BitWidth: `1`)) {
1917	Type *Ty = II->getType();
1918	APInt SignBit = APInt::getSignMask(BitWidth: Ty->getScalarSizeInBits());
1919	return SelectInst::Create(C: X, S1: ConstantInt::get(Ty, V: SignBit),
1920	S2: ConstantInt::getNullValue(Ty));
1921	}
1922
1923	if (Instruction *crossLogicOpFold =
1924	foldBitOrderCrossLogicOp<Intrinsic::bitreverse>(V: IIOperand, Builder))
1925	return crossLogicOpFold;
1926
1927	break;
1928	}
1929	case Intrinsic::bswap: {
1930	Value *IIOperand = II->getArgOperand(i: `0`);
1931
1932	// Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
1933	// inverse-shift-of-bswap:
1934	// bswap (shl X, Y) --> lshr (bswap X), Y
1935	// bswap (lshr X, Y) --> shl (bswap X), Y
1936	Value X, Y;
1937	if (match(V: IIOperand, P: m_OneUse(SubPattern: m_LogicalShift(L: m_Value(V&: X), R: m_Value(V&: Y))))) {
1938	unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
1939	if (MaskedValueIsZero(V: Y, Mask: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: `3`))) {
1940	Value *NewSwap = Builder.CreateUnaryIntrinsic(ID: Intrinsic::bswap, V: X);
1941	BinaryOperator::BinaryOps InverseShift =
1942	cast<BinaryOperator>(Val: IIOperand)->getOpcode() == Instruction::Shl
1943	? Instruction::LShr
1944	: Instruction::Shl;
1945	return BinaryOperator::Create(Op: InverseShift, S1: NewSwap, S2: Y);
1946	}
1947	}
1948
1949	KnownBits Known = computeKnownBits(V: IIOperand, Depth: `0`, CxtI: II);
1950	uint64_t LZ = alignDown(Value: Known.countMinLeadingZeros(), Align: `8`);
1951	uint64_t TZ = alignDown(Value: Known.countMinTrailingZeros(), Align: `8`);
1952	unsigned BW = Known.getBitWidth();
1953
1954	// bswap(x) -> shift(x) if x has exactly one "active byte"
1955	if (BW - LZ - TZ == `8`) {
1956	assert(LZ != TZ && "active byte cannot be in the middle");
1957	if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
1958	return BinaryOperator::CreateNUWShl(
1959	V1: IIOperand, V2: ConstantInt::get(Ty: IIOperand->getType(), V: LZ - TZ));
1960	// -> lshr(x) if the "active byte" is in the high part of x
1961	return BinaryOperator::CreateExactLShr(
1962	V1: IIOperand, V2: ConstantInt::get(Ty: IIOperand->getType(), V: TZ - LZ));
1963	}
1964
1965	// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
1966	if (match(V: IIOperand, P: m_Trunc(Op: m_BSwap(Op0: m_Value(V&: X))))) {
1967	unsigned C = X->getType()->getScalarSizeInBits() - BW;
1968	Value *CV = ConstantInt::get(Ty: X->getType(), V: C);
1969	Value *V = Builder.CreateLShr(LHS: X, RHS: CV);
1970	return new TruncInst (V, IIOperand->getType());
1971	}
1972
1973	if (Instruction *crossLogicOpFold =
1974	foldBitOrderCrossLogicOp<Intrinsic::bswap>(V: IIOperand, Builder)) {
1975	return crossLogicOpFold;
1976	}
1977
1978	// Try to fold into bitreverse if bswap is the root of the expression tree.
1979	if (Instruction BitOp = matchBSwapOrBitReverse(I&: II, /MatchBSwaps/ false,
1980	/MatchBitReversals/ true))
1981	return BitOp;
1982	break;
1983	}
1984	case Intrinsic::masked_load:
1985	if (Value SimplifiedMaskedOp = simplifyMaskedLoad(II&: II))
1986	return replaceInstUsesWith(I&: CI, V: SimplifiedMaskedOp);
1987	break;
1988	case Intrinsic::masked_store:
1989	return simplifyMaskedStore(II&: *II);
1990	case Intrinsic::masked_gather:
1991	return simplifyMaskedGather(II&: *II);
1992	case Intrinsic::masked_scatter:
1993	return simplifyMaskedScatter(II&: *II);
1994	case Intrinsic::launder_invariant_group:
1995	case Intrinsic::strip_invariant_group:
1996	if (auto SkippedBarrier = simplifyInvariantGroupIntrinsic(II&: II, IC&: *this))
1997	return replaceInstUsesWith(I&: *II, V: SkippedBarrier);
1998	break;
1999	case Intrinsic::powi:
2000	if (ConstantInt *Power = dyn_cast<ConstantInt>(Val: II->getArgOperand(i: `1`))) {
2001	// 0 and 1 are handled in instsimplify
2002	// powi(x, -1) -> 1/x
2003	if (Power->isMinusOne())
2004	return BinaryOperator::CreateFDivFMF(V1: ConstantFP::get(Ty: CI.getType(), V: `1.0`),
2005	V2: II->getArgOperand(i: `0`), FMFSource: II);
2006	// powi(x, 2) -> xx*
2007	if (Power->equalsInt(V: `2`))
2008	return BinaryOperator::CreateFMulFMF(V1: II->getArgOperand(i: `0`),
2009	V2: II->getArgOperand(i: `0`), FMFSource: II);
2010
2011	if (!Power->getValue()[`0`]) {
2012	Value *X;
2013	// If power is even:
2014	// powi(-x, p) -> powi(x, p)
2015	// powi(fabs(x), p) -> powi(x, p)
2016	// powi(copysign(x, y), p) -> powi(x, p)
2017	if (match(V: II->getArgOperand(i: `0`), P: m_FNeg(X: m_Value(V&: X))) \|\|
2018	match(V: II->getArgOperand(i: `0`), P: m_FAbs(Op0: m_Value(V&: X))) \|\|
2019	match(V: II->getArgOperand(i: `0`),
2020	P: m_Intrinsic<Intrinsic::copysign>(Op0: m_Value(V&: X), Op1: m_Value())))
2021	return replaceOperand(I&: *II, OpNum: `0`, V: X);
2022	}
2023	}
2024	break;
2025
2026	case Intrinsic::cttz:
2027	case Intrinsic::ctlz:
2028	if (auto I = foldCttzCtlz(II&: II, IC&: *this))
2029	return I;
2030	break;
2031
2032	case Intrinsic::ctpop:
2033	if (auto I = foldCtpop(II&: II, IC&: *this))
2034	return I;
2035	break;
2036
2037	case Intrinsic::fshl:
2038	case Intrinsic::fshr: {
2039	Value Op0 = II->getArgOperand(i: `0`), Op1 = II->getArgOperand(i: `1`);
2040	Type *Ty = II->getType();
2041	unsigned BitWidth = Ty->getScalarSizeInBits();
2042	Constant *ShAmtC;
2043	if (match(V: II->getArgOperand(i: `2`), P: m_ImmConstant(C&: ShAmtC))) {
2044	// Canonicalize a shift amount constant operand to modulo the bit-width.
2045	Constant *WidthC = ConstantInt::get(Ty, V: BitWidth);
2046	Constant *ModuloC =
2047	ConstantFoldBinaryOpOperands(Opcode: Instruction::URem, LHS: ShAmtC, RHS: WidthC, DL);
2048	if (!ModuloC)
2049	return nullptr;
2050	if (ModuloC != ShAmtC)
2051	return replaceOperand(I&: *II, OpNum: `2`, V: ModuloC);
2052
2053	assert(match(ConstantFoldCompareInstOperands(ICmpInst::ICMP_UGT, WidthC,
2054	ShAmtC, DL),
2055	m_One()) &&
2056	"Shift amount expected to be modulo bitwidth");
2057
2058	// Canonicalize funnel shift right by constant to funnel shift left. This
2059	// is not entirely arbitrary. For historical reasons, the backend may
2060	// recognize rotate left patterns but miss rotate right patterns.
2061	if (IID == Intrinsic::fshr) {
2062	// fshr X, Y, C --> fshl X, Y, (BitWidth - C) if C is not zero.
2063	if (!isKnownNonZero(V: ShAmtC, Q: SQ.getWithInstruction(I: II)))
2064	return nullptr;
2065
2066	Constant *LeftShiftC = ConstantExpr::getSub(C1: WidthC, C2: ShAmtC);
2067	Module *Mod = II->getModule();
2068	Function *Fshl = Intrinsic::getDeclaration(M: Mod, id: Intrinsic::fshl, Tys: Ty);
2069	return CallInst::Create(Func: Fshl, Args: { Op0, Op1, LeftShiftC });
2070	}
2071	assert(IID == Intrinsic::fshl &&
2072	"All funnel shifts by simple constants should go left");
2073
2074	// fshl(X, 0, C) --> shl X, C
2075	// fshl(X, undef, C) --> shl X, C
2076	if (match(V: Op1, P: m_ZeroInt()) \|\| match(V: Op1, P: m_Undef()))
2077	return BinaryOperator::CreateShl(V1: Op0, V2: ShAmtC);
2078
2079	// fshl(0, X, C) --> lshr X, (BW-C)
2080	// fshl(undef, X, C) --> lshr X, (BW-C)
2081	if (match(V: Op0, P: m_ZeroInt()) \|\| match(V: Op0, P: m_Undef()))
2082	return BinaryOperator::CreateLShr(V1: Op1,
2083	V2: ConstantExpr::getSub(C1: WidthC, C2: ShAmtC));
2084
2085	// fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
2086	if (Op0 == Op1 && BitWidth == `16` && match(V: ShAmtC, P: m_SpecificInt(V: `8`))) {
2087	Module *Mod = II->getModule();
2088	Function *Bswap = Intrinsic::getDeclaration(M: Mod, id: Intrinsic::bswap, Tys: Ty);
2089	return CallInst::Create(Func: Bswap, Args: { Op0 });
2090	}
2091	if (Instruction *BitOp =
2092	matchBSwapOrBitReverse(I&: II, /MatchBSwaps/* true,
2093	/MatchBitReversals/ true))
2094	return BitOp;
2095	}
2096
2097	// Left or right might be masked.
2098	if (SimplifyDemandedInstructionBits(Inst&: *II))
2099	return &CI;
2100
2101	// The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
2102	// so only the low bits of the shift amount are demanded if the bitwidth is
2103	// a power-of-2.
2104	if (!isPowerOf2_32(Value: BitWidth))
2105	break;
2106	APInt Op2Demanded = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: Log2_32_Ceil(Value: BitWidth));
2107	KnownBits Op2Known(BitWidth);
2108	if (SimplifyDemandedBits(I: II, OpNo: `2`, DemandedMask: Op2Demanded, Known&: Op2Known))
2109	return &CI;
2110	break;
2111	}
2112	case Intrinsic::ptrmask: {
2113	unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType());
2114	KnownBits Known(BitWidth);
2115	if (SimplifyDemandedInstructionBits(Inst&: *II, Known))
2116	return II;
2117
2118	Value InnerPtr, InnerMask;
2119	bool Changed = false;
2120	// Combine:
2121	// (ptrmask (ptrmask p, A), B)
2122	// -> (ptrmask p, (and A, B))
2123	if (match(V: II->getArgOperand(i: `0`),
2124	P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ptrmask>(Op0: m_Value(V&: InnerPtr),
2125	Op1: m_Value(V&: InnerMask))))) {
2126	assert(II->getArgOperand(`1`)->getType() == InnerMask->getType() &&
2127	"Mask types must match");
2128	// TODO: If InnerMask == Op1, we could copy attributes from inner
2129	// callsite -> outer callsite.
2130	Value *NewMask = Builder.CreateAnd(LHS: II->getArgOperand(i: `1`), RHS: InnerMask);
2131	replaceOperand(I&: CI, OpNum: `0`, V: InnerPtr);
2132	replaceOperand(I&: CI, OpNum: `1`, V: NewMask);
2133	Changed = true;
2134	}
2135
2136	// See if we can deduce non-null.
2137	if (!CI.hasRetAttr(Kind: Attribute::NonNull) &&
2138	(Known.isNonZero() \|\|
2139	isKnownNonZero(V: II, Q: getSimplifyQuery().getWithInstruction(I: II)))) {
2140	CI.addRetAttr(Kind: Attribute::NonNull);
2141	Changed = true;
2142	}
2143
2144	unsigned NewAlignmentLog =
2145	std::min(a: Value::MaxAlignmentExponent,
2146	b: std::min(a: BitWidth - `1`, b: Known.countMinTrailingZeros()));
2147	// Known bits will capture if we had alignment information associated with
2148	// the pointer argument.
2149	if (NewAlignmentLog > Log2(A: CI.getRetAlign().valueOrOne())) {
2150	CI.addRetAttr(Attr: Attribute::getWithAlignment(
2151	Context&: CI.getContext(), Alignment: Align (uint64_t(`1`) << NewAlignmentLog)));
2152	Changed = true;
2153	}
2154	if (Changed)
2155	return &CI;
2156	break;
2157	}
2158	case Intrinsic::uadd_with_overflow:
2159	case Intrinsic::sadd_with_overflow: {
2160	if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2161	return I;
2162
2163	// Given 2 constant operands whose sum does not overflow:
2164	// uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
2165	// saddo (X +nsw C0), C1 -> saddo X, C0 + C1
2166	Value *X;
2167	const APInt C0, C1;
2168	Value *Arg0 = II->getArgOperand(i: `0`);
2169	Value *Arg1 = II->getArgOperand(i: `1`);
2170	bool IsSigned = IID == Intrinsic::sadd_with_overflow;
2171	bool HasNWAdd = IsSigned
2172	? match(V: Arg0, P: m_NSWAddLike(L: m_Value(V&: X), R: m_APInt(Res&: C0)))
2173	: match(V: Arg0, P: m_NUWAddLike(L: m_Value(V&: X), R: m_APInt(Res&: C0)));
2174	if (HasNWAdd && match(V: Arg1, P: m_APInt(Res&: C1))) {
2175	bool Overflow;
2176	APInt NewC =
2177	IsSigned ? C1->sadd_ov(RHS: C0, Overflow) : C1->uadd_ov(RHS: C0, Overflow);
2178	if (!Overflow)
2179	return replaceInstUsesWith(
2180	I&: *II, V: Builder.CreateBinaryIntrinsic(
2181	ID: IID, LHS: X, RHS: ConstantInt::get(Ty: Arg1->getType(), V: NewC)));
2182	}
2183	break;
2184	}
2185
2186	case Intrinsic::umul_with_overflow:
2187	case Intrinsic::smul_with_overflow:
2188	case Intrinsic::usub_with_overflow:
2189	if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2190	return I;
2191	break;
2192
2193	case Intrinsic::ssub_with_overflow: {
2194	if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2195	return I;
2196
2197	Constant *C;
2198	Value *Arg0 = II->getArgOperand(i: `0`);
2199	Value *Arg1 = II->getArgOperand(i: `1`);
2200	// Given a constant C that is not the minimum signed value
2201	// for an integer of a given bit width:
2202	//
2203	// ssubo X, C -> saddo X, -C
2204	if (match(V: Arg1, P: m_Constant(C)) && C->isNotMinSignedValue()) {
2205	Value *NegVal = ConstantExpr::getNeg(C);
2206	// Build a saddo call that is equivalent to the discovered
2207	// ssubo call.
2208	return replaceInstUsesWith(
2209	I&: *II, V: Builder.CreateBinaryIntrinsic(ID: Intrinsic::sadd_with_overflow,
2210	LHS: Arg0, RHS: NegVal));
2211	}
2212
2213	break;
2214	}
2215
2216	case Intrinsic::uadd_sat:
2217	case Intrinsic::sadd_sat:
2218	case Intrinsic::usub_sat:
2219	case Intrinsic::ssub_sat: {
2220	SaturatingInst *SI = cast<SaturatingInst>(Val: II);
2221	Type *Ty = SI->getType();
2222	Value *Arg0 = SI->getLHS();
2223	Value *Arg1 = SI->getRHS();
2224
2225	// Make use of known overflow information.
2226	OverflowResult OR = computeOverflow(BinaryOp: SI->getBinaryOp(), IsSigned: SI->isSigned(),
2227	LHS: Arg0, RHS: Arg1, CxtI: SI);
2228	switch (OR) {
2229	case OverflowResult::MayOverflow:
2230	break;
2231	case OverflowResult::NeverOverflows:
2232	if (SI->isSigned())
2233	return BinaryOperator::CreateNSW(Opc: SI->getBinaryOp(), V1: Arg0, V2: Arg1);
2234	else
2235	return BinaryOperator::CreateNUW(Opc: SI->getBinaryOp(), V1: Arg0, V2: Arg1);
2236	case OverflowResult::AlwaysOverflowsLow: {
2237	unsigned BitWidth = Ty->getScalarSizeInBits();
2238	APInt Min = APSInt::getMinValue(numBits: BitWidth, Unsigned: !SI->isSigned());
2239	return replaceInstUsesWith(I&: *SI, V: ConstantInt::get(Ty, V: Min));
2240	}
2241	case OverflowResult::AlwaysOverflowsHigh: {
2242	unsigned BitWidth = Ty->getScalarSizeInBits();
2243	APInt Max = APSInt::getMaxValue(numBits: BitWidth, Unsigned: !SI->isSigned());
2244	return replaceInstUsesWith(I&: *SI, V: ConstantInt::get(Ty, V: Max));
2245	}
2246	}
2247
2248	// usub_sat((sub nuw C, A), C1) -> usub_sat(usub_sat(C, C1), A)
2249	// which after that:
2250	// usub_sat((sub nuw C, A), C1) -> usub_sat(C - C1, A) if C1 u< C
2251	// usub_sat((sub nuw C, A), C1) -> 0 otherwise
2252	Constant C, C1;
2253	Value *A;
2254	if (IID == Intrinsic::usub_sat &&
2255	match(V: Arg0, P: m_NUWSub(L: m_ImmConstant(C), R: m_Value(V&: A))) &&
2256	match(V: Arg1, P: m_ImmConstant(C&: C1))) {
2257	auto *NewC = Builder.CreateBinaryIntrinsic(ID: Intrinsic::usub_sat, LHS: C, RHS: C1);
2258	auto *NewSub =
2259	Builder.CreateBinaryIntrinsic(ID: Intrinsic::usub_sat, LHS: NewC, RHS: A);
2260	return replaceInstUsesWith(I&: *SI, V: NewSub);
2261	}
2262
2263	// ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
2264	if (IID == Intrinsic::ssub_sat && match(V: Arg1, P: m_Constant(C)) &&
2265	C->isNotMinSignedValue()) {
2266	Value *NegVal = ConstantExpr::getNeg(C);
2267	return replaceInstUsesWith(
2268	I&: *II, V: Builder.CreateBinaryIntrinsic(
2269	ID: Intrinsic::sadd_sat, LHS: Arg0, RHS: NegVal));
2270	}
2271
2272	// sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
2273	// sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
2274	// if Val and Val2 have the same sign
2275	if (auto *Other = dyn_cast<IntrinsicInst>(Val: Arg0)) {
2276	Value *X;
2277	const APInt Val, Val2;
2278	APInt NewVal;
2279	bool IsUnsigned =
2280	IID == Intrinsic::uadd_sat \|\| IID == Intrinsic::usub_sat;
2281	if (Other->getIntrinsicID() == IID &&
2282	match(V: Arg1, P: m_APInt(Res&: Val)) &&
2283	match(V: Other->getArgOperand(i: `0`), P: m_Value(V&: X)) &&
2284	match(V: Other->getArgOperand(i: `1`), P: m_APInt(Res&: Val2))) {
2285	if (IsUnsigned)
2286	NewVal = Val->uadd_sat(RHS: *Val2);
2287	else if (Val->isNonNegative() == Val2->isNonNegative()) {
2288	bool Overflow;
2289	NewVal = Val->sadd_ov(RHS: *Val2, Overflow);
2290	if (Overflow) {
2291	// Both adds together may add more than SignedMaxValue
2292	// without saturating the final result.
2293	break;
2294	}
2295	} else {
2296	// Cannot fold saturated addition with different signs.
2297	break;
2298	}
2299
2300	return replaceInstUsesWith(
2301	I&: *II, V: Builder.CreateBinaryIntrinsic(
2302	ID: IID, LHS: X, RHS: ConstantInt::get(Ty: II->getType(), V: NewVal)));
2303	}
2304	}
2305	break;
2306	}
2307
2308	case Intrinsic::minnum:
2309	case Intrinsic::maxnum:
2310	case Intrinsic::minimum:
2311	case Intrinsic::maximum: {
2312	Value *Arg0 = II->getArgOperand(i: `0`);
2313	Value *Arg1 = II->getArgOperand(i: `1`);
2314	Value X, Y;
2315	if (match(V: Arg0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Arg1, P: m_FNeg(X: m_Value(V&: Y))) &&
2316	(Arg0->hasOneUse() \|\| Arg1->hasOneUse())) {
2317	// If both operands are negated, invert the call and negate the result:
2318	// min(-X, -Y) --> -(max(X, Y))
2319	// max(-X, -Y) --> -(min(X, Y))
2320	Intrinsic::ID NewIID;
2321	switch (IID) {
2322	case Intrinsic::maxnum:
2323	NewIID = Intrinsic::minnum;
2324	break;
2325	case Intrinsic::minnum:
2326	NewIID = Intrinsic::maxnum;
2327	break;
2328	case Intrinsic::maximum:
2329	NewIID = Intrinsic::minimum;
2330	break;
2331	case Intrinsic::minimum:
2332	NewIID = Intrinsic::maximum;
2333	break;
2334	default:
2335	llvm_unreachable("unexpected intrinsic ID");
2336	}
2337	Value *NewCall = Builder.CreateBinaryIntrinsic(ID: NewIID, LHS: X, RHS: Y, FMFSource: II);
2338	Instruction *FNeg = UnaryOperator::CreateFNeg(V: NewCall);
2339	FNeg->copyIRFlags(V: II);
2340	return FNeg;
2341	}
2342
2343	// m(m(X, C2), C1) -> m(X, C)
2344	const APFloat C1, C2;
2345	if (auto *M = dyn_cast<IntrinsicInst>(Val: Arg0)) {
2346	if (M->getIntrinsicID() == IID && match(V: Arg1, P: m_APFloat(Res&: C1)) &&
2347	((match(V: M->getArgOperand(i: `0`), P: m_Value(V&: X)) &&
2348	match(V: M->getArgOperand(i: `1`), P: m_APFloat(Res&: C2))) \|\|
2349	(match(V: M->getArgOperand(i: `1`), P: m_Value(V&: X)) &&
2350	match(V: M->getArgOperand(i: `0`), P: m_APFloat(Res&: C2))))) {
2351	APFloat Res(`0.0`);
2352	switch (IID) {
2353	case Intrinsic::maxnum:
2354	Res = maxnum(A: C1, B: C2);
2355	break;
2356	case Intrinsic::minnum:
2357	Res = minnum(A: C1, B: C2);
2358	break;
2359	case Intrinsic::maximum:
2360	Res = maximum(A: C1, B: C2);
2361	break;
2362	case Intrinsic::minimum:
2363	Res = minimum(A: C1, B: C2);
2364	break;
2365	default:
2366	llvm_unreachable("unexpected intrinsic ID");
2367	}
2368	Value *V = Builder.CreateBinaryIntrinsic(
2369	ID: IID, LHS: X, RHS: ConstantFP::get(Ty: Arg0->getType(), V: Res), FMFSource: II);
2370	// TODO: Conservatively intersecting FMF. If Res == C2, the transform
2371	// was a simplification (so Arg0 and its original flags could
2372	// propagate?)
2373	if (auto *CI = dyn_cast<CallInst>(Val: V))
2374	CI->andIRFlags(V: M);
2375	return replaceInstUsesWith(I&: *II, V);
2376	}
2377	}
2378
2379	// m((fpext X), (fpext Y)) -> fpext (m(X, Y))
2380	if (match(V: Arg0, P: m_OneUse(SubPattern: m_FPExt(Op: m_Value(V&: X)))) &&
2381	match(V: Arg1, P: m_OneUse(SubPattern: m_FPExt(Op: m_Value(V&: Y)))) &&
2382	X->getType() == Y->getType()) {
2383	Value *NewCall =
2384	Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y, FMFSource: II, Name: II->getName());
2385	return new FPExtInst (NewCall, II->getType());
2386	}
2387
2388	// max X, -X --> fabs X
2389	// min X, -X --> -(fabs X)
2390	// TODO: Remove one-use limitation? That is obviously better for max,
2391	// hence why we don't check for one-use for that. However,
2392	// it would be an extra instruction for min (fnabs), but
2393	// that is still likely better for analysis and codegen.
2394	auto IsMinMaxOrXNegX = [IID, &X](Value Op0, Value Op1) {
2395	if (match(V: Op0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Op1, P: m_Specific(V: X)))
2396	return Op0->hasOneUse() \|\|
2397	(IID != Intrinsic::minimum && IID != Intrinsic::minnum);
2398	return false;
2399	};
2400
2401	if (IsMinMaxOrXNegX (Arg0, Arg1) \|\| IsMinMaxOrXNegX (Arg1, Arg0)) {
2402	Value *R = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: X, FMFSource: II);
2403	if (IID == Intrinsic::minimum \|\| IID == Intrinsic::minnum)
2404	R = Builder.CreateFNegFMF(V: R, FMFSource: II);
2405	return replaceInstUsesWith(I&: *II, V: R);
2406	}
2407
2408	break;
2409	}
2410	case Intrinsic::matrix_multiply: {
2411	// Optimize negation in matrix multiplication.
2412
2413	// -A -B -> A * B*
2414	Value A, B;
2415	if (match(V: II->getArgOperand(i: `0`), P: m_FNeg(X: m_Value(V&: A))) &&
2416	match(V: II->getArgOperand(i: `1`), P: m_FNeg(X: m_Value(V&: B)))) {
2417	replaceOperand(I&: *II, OpNum: `0`, V: A);
2418	replaceOperand(I&: *II, OpNum: `1`, V: B);
2419	return II;
2420	}
2421
2422	Value *Op0 = II->getOperand(i_nocapture: `0`);
2423	Value *Op1 = II->getOperand(i_nocapture: `1`);
2424	Value OpNotNeg, NegatedOp;
2425	unsigned NegatedOpArg, OtherOpArg;
2426	if (match(V: Op0, P: m_FNeg(X: m_Value(V&: OpNotNeg)))) {
2427	NegatedOp = Op0;
2428	NegatedOpArg = `0`;
2429	OtherOpArg = `1`;
2430	} else if (match(V: Op1, P: m_FNeg(X: m_Value(V&: OpNotNeg)))) {
2431	NegatedOp = Op1;
2432	NegatedOpArg = `1`;
2433	OtherOpArg = `0`;
2434	} else
2435	// Multiplication doesn't have a negated operand.
2436	break;
2437
2438	// Only optimize if the negated operand has only one use.
2439	if (!NegatedOp->hasOneUse())
2440	break;
2441
2442	Value *OtherOp = II->getOperand(i_nocapture: OtherOpArg);
2443	VectorType *RetTy = cast<VectorType>(Val: II->getType());
2444	VectorType *NegatedOpTy = cast<VectorType>(Val: NegatedOp->getType());
2445	VectorType *OtherOpTy = cast<VectorType>(Val: OtherOp->getType());
2446	ElementCount NegatedCount = NegatedOpTy->getElementCount();
2447	ElementCount OtherCount = OtherOpTy->getElementCount();
2448	ElementCount RetCount = RetTy->getElementCount();
2449	// (-A) B -> A * (-B), if it is cheaper to negate B and vice versa.*
2450	if (ElementCount::isKnownGT(LHS: NegatedCount, RHS: OtherCount) &&
2451	ElementCount::isKnownLT(LHS: OtherCount, RHS: RetCount)) {
2452	Value *InverseOtherOp = Builder.CreateFNeg(V: OtherOp);
2453	replaceOperand(I&: *II, OpNum: NegatedOpArg, V: OpNotNeg);
2454	replaceOperand(I&: *II, OpNum: OtherOpArg, V: InverseOtherOp);
2455	return II;
2456	}
2457	// (-A) B -> -(A * B), if it is cheaper to negate the result*
2458	if (ElementCount::isKnownGT(LHS: NegatedCount, RHS: RetCount)) {
2459	SmallVector<Value *, `5`> NewArgs(II->args());
2460	NewArgs [NegatedOpArg] = OpNotNeg;
2461	Instruction *NewMul =
2462	Builder.CreateIntrinsic(RetTy: II->getType(), ID: IID, Args: NewArgs, FMFSource: II);
2463	return replaceInstUsesWith(I&: *II, V: Builder.CreateFNegFMF(V: NewMul, FMFSource: II));
2464	}
2465	break;
2466	}
2467	case Intrinsic::fmuladd: {
2468	// Try to simplify the underlying FMul.
2469	if (Value *V = simplifyFMulInst(LHS: II->getArgOperand(i: `0`), RHS: II->getArgOperand(i: `1`),
2470	FMF: II->getFastMathFlags(),
2471	Q: SQ.getWithInstruction(I: II))) {
2472	auto *FAdd = BinaryOperator::CreateFAdd(V1: V, V2: II->getArgOperand(i: `2`));
2473	FAdd->copyFastMathFlags(I: II);
2474	return FAdd;
2475	}
2476
2477	[[fallthrough]];
2478	}
2479	case Intrinsic::fma: {
2480	// fma fneg(x), fneg(y), z -> fma x, y, z
2481	Value *Src0 = II->getArgOperand(i: `0`);
2482	Value *Src1 = II->getArgOperand(i: `1`);
2483	Value X, Y;
2484	if (match(V: Src0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Src1, P: m_FNeg(X: m_Value(V&: Y)))) {
2485	replaceOperand(I&: *II, OpNum: `0`, V: X);
2486	replaceOperand(I&: *II, OpNum: `1`, V: Y);
2487	return II;
2488	}
2489
2490	// fma fabs(x), fabs(x), z -> fma x, x, z
2491	if (match(V: Src0, P: m_FAbs(Op0: m_Value(V&: X))) &&
2492	match(V: Src1, P: m_FAbs(Op0: m_Specific(V: X)))) {
2493	replaceOperand(I&: *II, OpNum: `0`, V: X);
2494	replaceOperand(I&: *II, OpNum: `1`, V: X);
2495	return II;
2496	}
2497
2498	// Try to simplify the underlying FMul. We can only apply simplifications
2499	// that do not require rounding.
2500	if (Value *V = simplifyFMAFMul(LHS: II->getArgOperand(i: `0`), RHS: II->getArgOperand(i: `1`),
2501	FMF: II->getFastMathFlags(),
2502	Q: SQ.getWithInstruction(I: II))) {
2503	auto *FAdd = BinaryOperator::CreateFAdd(V1: V, V2: II->getArgOperand(i: `2`));
2504	FAdd->copyFastMathFlags(I: II);
2505	return FAdd;
2506	}
2507
2508	// fma x, y, 0 -> fmul x, y
2509	// This is always valid for -0.0, but requires nsz for +0.0 as
2510	// -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
2511	if (match(V: II->getArgOperand(i: `2`), P: m_NegZeroFP()) \|\|
2512	(match(V: II->getArgOperand(i: `2`), P: m_PosZeroFP()) &&
2513	II->getFastMathFlags().noSignedZeros()))
2514	return BinaryOperator::CreateFMulFMF(V1: Src0, V2: Src1, FMFSource: II);
2515
2516	break;
2517	}
2518	case Intrinsic::copysign: {
2519	Value Mag = II->getArgOperand(i: `0`), Sign = II->getArgOperand(i: `1`);
2520	if (std::optional<bool> KnownSignBit = computeKnownFPSignBit(
2521	V: Sign, /Depth=/`0`, SQ: getSimplifyQuery().getWithInstruction(I: II))) {
2522	if (*KnownSignBit) {
2523	// If we know that the sign argument is negative, reduce to FNABS:
2524	// copysign Mag, -Sign --> fneg (fabs Mag)
2525	Value *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Mag, FMFSource: II);
2526	return replaceInstUsesWith(I&: *II, V: Builder.CreateFNegFMF(V: Fabs, FMFSource: II));
2527	}
2528
2529	// If we know that the sign argument is positive, reduce to FABS:
2530	// copysign Mag, +Sign --> fabs Mag
2531	Value *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Mag, FMFSource: II);
2532	return replaceInstUsesWith(I&: *II, V: Fabs);
2533	}
2534
2535	// Propagate sign argument through nested calls:
2536	// copysign Mag, (copysign ?, X) --> copysign Mag, X
2537	Value *X;
2538	if (match(V: Sign, P: m_Intrinsic<Intrinsic::copysign>(Op0: m_Value(), Op1: m_Value(V&: X))))
2539	return replaceOperand(I&: *II, OpNum: `1`, V: X);
2540
2541	// Clear sign-bit of constant magnitude:
2542	// copysign -MagC, X --> copysign MagC, X
2543	// TODO: Support constant folding for fabs
2544	const APFloat *MagC;
2545	if (match(V: Mag, P: m_APFloat(Res&: MagC)) && MagC->isNegative()) {
2546	APFloat PosMagC = *MagC;
2547	PosMagC.clearSign();
2548	return replaceOperand(I&: *II, OpNum: `0`, V: ConstantFP::get(Ty: Mag->getType(), V: PosMagC));
2549	}
2550
2551	// Peek through changes of magnitude's sign-bit. This call rewrites those:
2552	// copysign (fabs X), Sign --> copysign X, Sign
2553	// copysign (fneg X), Sign --> copysign X, Sign
2554	if (match(V: Mag, P: m_FAbs(Op0: m_Value(V&: X))) \|\| match(V: Mag, P: m_FNeg(X: m_Value(V&: X))))
2555	return replaceOperand(I&: *II, OpNum: `0`, V: X);
2556
2557	break;
2558	}
2559	case Intrinsic::fabs: {
2560	Value Cond, TVal, *FVal;
2561	Value *Arg = II->getArgOperand(i: `0`);
2562	Value *X;
2563	// fabs (-X) --> fabs (X)
2564	if (match(V: Arg, P: m_FNeg(X: m_Value(V&: X)))) {
2565	CallInst *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: X, FMFSource: II);
2566	return replaceInstUsesWith(I&: CI, V: Fabs);
2567	}
2568
2569	if (match(V: Arg, P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: TVal), R: m_Value(V&: FVal)))) {
2570	// fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
2571	if (isa<Constant>(Val: TVal) \|\| isa<Constant>(Val: FVal)) {
2572	CallInst *AbsT = Builder.CreateCall(Callee: II->getCalledFunction(), Args: {TVal});
2573	CallInst *AbsF = Builder.CreateCall(Callee: II->getCalledFunction(), Args: {FVal});
2574	SelectInst *SI = SelectInst::Create(C: Cond, S1: AbsT, S2: AbsF);
2575	FastMathFlags FMF1 = II->getFastMathFlags();
2576	FastMathFlags FMF2 = cast<SelectInst>(Val: Arg)->getFastMathFlags();
2577	FMF2.setNoSignedZeros(false);
2578	SI->setFastMathFlags(FMF1 \| FMF2);
2579	return SI;
2580	}
2581	// fabs (select Cond, -FVal, FVal) --> fabs FVal
2582	if (match(V: TVal, P: m_FNeg(X: m_Specific(V: FVal))))
2583	return replaceOperand(I&: *II, OpNum: `0`, V: FVal);
2584	// fabs (select Cond, TVal, -TVal) --> fabs TVal
2585	if (match(V: FVal, P: m_FNeg(X: m_Specific(V: TVal))))
2586	return replaceOperand(I&: *II, OpNum: `0`, V: TVal);
2587	}
2588
2589	Value Magnitude, Sign;
2590	if (match(V: II->getArgOperand(i: `0`),
2591	P: m_CopySign(Op0: m_Value(V&: Magnitude), Op1: m_Value(V&: Sign)))) {
2592	// fabs (copysign x, y) -> (fabs x)
2593	CallInst *AbsSign =
2594	Builder.CreateCall(Callee: II->getCalledFunction(), Args: {Magnitude});
2595	AbsSign->copyFastMathFlags(I: II);
2596	return replaceInstUsesWith(I&: *II, V: AbsSign);
2597	}
2598
2599	[[fallthrough]];
2600	}
2601	case Intrinsic::ceil:
2602	case Intrinsic::floor:
2603	case Intrinsic::round:
2604	case Intrinsic::roundeven:
2605	case Intrinsic::nearbyint:
2606	case Intrinsic::rint:
2607	case Intrinsic::trunc: {
2608	Value *ExtSrc;
2609	if (match(V: II->getArgOperand(i: `0`), P: m_OneUse(SubPattern: m_FPExt(Op: m_Value(V&: ExtSrc))))) {
2610	// Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
2611	Value *NarrowII = Builder.CreateUnaryIntrinsic(ID: IID, V: ExtSrc, FMFSource: II);
2612	return new FPExtInst (NarrowII, II->getType());
2613	}
2614	break;
2615	}
2616	case Intrinsic::cos:
2617	case Intrinsic::amdgcn_cos: {
2618	Value X, Sign;
2619	Value *Src = II->getArgOperand(i: `0`);
2620	if (match(V: Src, P: m_FNeg(X: m_Value(V&: X))) \|\| match(V: Src, P: m_FAbs(Op0: m_Value(V&: X))) \|\|
2621	match(V: Src, P: m_CopySign(Op0: m_Value(V&: X), Op1: m_Value(V&: Sign)))) {
2622	// cos(-x) --> cos(x)
2623	// cos(fabs(x)) --> cos(x)
2624	// cos(copysign(x, y)) --> cos(x)
2625	return replaceOperand(I&: *II, OpNum: `0`, V: X);
2626	}
2627	break;
2628	}
2629	case Intrinsic::sin:
2630	case Intrinsic::amdgcn_sin: {
2631	Value *X;
2632	if (match(V: II->getArgOperand(i: `0`), P: m_OneUse(SubPattern: m_FNeg(X: m_Value(V&: X))))) {
2633	// sin(-x) --> -sin(x)
2634	Value *NewSin = Builder.CreateUnaryIntrinsic(ID: IID, V: X, FMFSource: II);
2635	return UnaryOperator::CreateFNegFMF(Op: NewSin, FMFSource: II);
2636	}
2637	break;
2638	}
2639	case Intrinsic::ldexp: {
2640	// ldexp(ldexp(x, a), b) -> ldexp(x, a + b)
2641	//
2642	// The danger is if the first ldexp would overflow to infinity or underflow
2643	// to zero, but the combined exponent avoids it. We ignore this with
2644	// reassoc.
2645	//
2646	// It's also safe to fold if we know both exponents are >= 0 or <= 0 since
2647	// it would just double down on the overflow/underflow which would occur
2648	// anyway.
2649	//
2650	// TODO: Could do better if we had range tracking for the input value
2651	// exponent. Also could broaden sign check to cover == 0 case.
2652	Value *Src = II->getArgOperand(i: `0`);
2653	Value *Exp = II->getArgOperand(i: `1`);
2654	Value *InnerSrc;
2655	Value *InnerExp;
2656	if (match(V: Src, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ldexp>(
2657	Op0: m_Value(V&: InnerSrc), Op1: m_Value(V&: InnerExp)))) &&
2658	Exp->getType() == InnerExp->getType()) {
2659	FastMathFlags FMF = II->getFastMathFlags();
2660	FastMathFlags InnerFlags = cast<FPMathOperator>(Val: Src)->getFastMathFlags();
2661
2662	if ((FMF.allowReassoc() && InnerFlags.allowReassoc()) \|\|
2663	signBitMustBeTheSame(Op0: Exp, Op1: InnerExp, SQ: SQ.getWithInstruction(I: II))) {
2664	// TODO: Add nsw/nuw probably safe if integer type exceeds exponent
2665	// width.
2666	Value *NewExp = Builder.CreateAdd(LHS: InnerExp, RHS: Exp);
2667	II->setArgOperand(i: `1`, v: NewExp);
2668	II->setFastMathFlags(InnerFlags); // Or the inner flags.
2669	return replaceOperand(I&: *II, OpNum: `0`, V: InnerSrc);
2670	}
2671	}
2672
2673	// ldexp(x, zext(i1 y)) -> fmul x, (select y, 2.0, 1.0)
2674	// ldexp(x, sext(i1 y)) -> fmul x, (select y, 0.5, 1.0)
2675	Value *ExtSrc;
2676	if (match(V: Exp, P: m_ZExt(Op: m_Value(V&: ExtSrc))) &&
2677	ExtSrc->getType()->getScalarSizeInBits() == `1`) {
2678	Value *Select =
2679	Builder.CreateSelect(C: ExtSrc, True: ConstantFP::get(Ty: II->getType(), V: `2.0`),
2680	False: ConstantFP::get(Ty: II->getType(), V: `1.0`));
2681	return BinaryOperator::CreateFMulFMF(V1: Src, V2: Select, FMFSource: II);
2682	}
2683	if (match(V: Exp, P: m_SExt(Op: m_Value(V&: ExtSrc))) &&
2684	ExtSrc->getType()->getScalarSizeInBits() == `1`) {
2685	Value *Select =
2686	Builder.CreateSelect(C: ExtSrc, True: ConstantFP::get(Ty: II->getType(), V: `0.5`),
2687	False: ConstantFP::get(Ty: II->getType(), V: `1.0`));
2688	return BinaryOperator::CreateFMulFMF(V1: Src, V2: Select, FMFSource: II);
2689	}
2690
2691	// ldexp(x, c ? exp : 0) -> c ? ldexp(x, exp) : x
2692	// ldexp(x, c ? 0 : exp) -> c ? x : ldexp(x, exp)
2693	///
2694	// TODO: If we cared, should insert a canonicalize for x
2695	Value SelectCond, SelectLHS, *SelectRHS;
2696	if (match(V: II->getArgOperand(i: `1`),
2697	P: m_OneUse(SubPattern: m_Select(C: m_Value(V&: SelectCond), L: m_Value(V&: SelectLHS),
2698	R: m_Value(V&: SelectRHS))))) {
2699	Value NewLdexp = nullptr*;
2700	Value Select = nullptr*;
2701	if (match(V: SelectRHS, P: m_ZeroInt())) {
2702	NewLdexp = Builder.CreateLdexp(Src, Exp: SelectLHS);
2703	Select = Builder.CreateSelect(C: SelectCond, True: NewLdexp, False: Src);
2704	} else if (match(V: SelectLHS, P: m_ZeroInt())) {
2705	NewLdexp = Builder.CreateLdexp(Src, Exp: SelectRHS);
2706	Select = Builder.CreateSelect(C: SelectCond, True: Src, False: NewLdexp);
2707	}
2708
2709	if (NewLdexp) {
2710	Select->takeName(V: II);
2711	cast<Instruction>(Val: NewLdexp)->copyFastMathFlags(I: II);
2712	return replaceInstUsesWith(I&: *II, V: Select);
2713	}
2714	}
2715
2716	break;
2717	}
2718	case Intrinsic::ptrauth_auth:
2719	case Intrinsic::ptrauth_resign: {
2720	// (sign\|resign) + (auth\|resign) can be folded by omitting the middle
2721	// sign+auth component if the key and discriminator match.
2722	bool NeedSign = II->getIntrinsicID() == Intrinsic::ptrauth_resign;
2723	Value *Ptr = II->getArgOperand(i: `0`);
2724	Value *Key = II->getArgOperand(i: `1`);
2725	Value *Disc = II->getArgOperand(i: `2`);
2726
2727	// AuthKey will be the key we need to end up authenticating against in
2728	// whatever we replace this sequence with.
2729	Value AuthKey = nullptr, AuthDisc = nullptr, *BasePtr;
2730	if (const auto *CI = dyn_cast<CallBase>(Val: Ptr)) {
2731	BasePtr = CI->getArgOperand(i: `0`);
2732	if (CI->getIntrinsicID() == Intrinsic::ptrauth_sign) {
2733	if (CI->getArgOperand(i: `1`) != Key \|\| CI->getArgOperand(i: `2`) != Disc)
2734	break;
2735	} else if (CI->getIntrinsicID() == Intrinsic::ptrauth_resign) {
2736	if (CI->getArgOperand(i: `3`) != Key \|\| CI->getArgOperand(i: `4`) != Disc)
2737	break;
2738	AuthKey = CI->getArgOperand(i: `1`);
2739	AuthDisc = CI->getArgOperand(i: `2`);
2740	} else
2741	break;
2742	} else if (const auto *PtrToInt = dyn_cast<PtrToIntOperator>(Val: Ptr)) {
2743	// ptrauth constants are equivalent to a call to @llvm.ptrauth.sign for
2744	// our purposes, so check for that too.
2745	const auto *CPA = dyn_cast<ConstantPtrAuth>(Val: PtrToInt->getOperand(i_nocapture: `0`));
2746	if (!CPA \|\| !CPA->isKnownCompatibleWith(Key, Discriminator: Disc, DL))
2747	break;
2748
2749	// resign(ptrauth(p,ks,ds),ks,ds,kr,dr) -> ptrauth(p,kr,dr)
2750	if (NeedSign && isa<ConstantInt>(Val: II->getArgOperand(i: `4`))) {
2751	auto *SignKey = cast<ConstantInt>(Val: II->getArgOperand(i: `3`));
2752	auto *SignDisc = cast<ConstantInt>(Val: II->getArgOperand(i: `4`));
2753	auto *SignAddrDisc = ConstantPointerNull::get(T: Builder.getPtrTy());
2754	auto *NewCPA = ConstantPtrAuth::get(Ptr: CPA->getPointer(), Key: SignKey,
2755	Disc: SignDisc, AddrDisc: SignAddrDisc);
2756	replaceInstUsesWith(
2757	I&: *II, V: ConstantExpr::getPointerCast(C: NewCPA, Ty: II->getType()));
2758	return eraseInstFromFunction(I&: *II);
2759	}
2760
2761	// auth(ptrauth(p,k,d),k,d) -> p
2762	BasePtr = Builder.CreatePtrToInt(V: CPA->getPointer(), DestTy: II->getType());
2763	} else
2764	break;
2765
2766	unsigned NewIntrin;
2767	if (AuthKey && NeedSign) {
2768	// resign(0,1) + resign(1,2) = resign(0, 2)
2769	NewIntrin = Intrinsic::ptrauth_resign;
2770	} else if (AuthKey) {
2771	// resign(0,1) + auth(1) = auth(0)
2772	NewIntrin = Intrinsic::ptrauth_auth;
2773	} else if (NeedSign) {
2774	// sign(0) + resign(0, 1) = sign(1)
2775	NewIntrin = Intrinsic::ptrauth_sign;
2776	} else {
2777	// sign(0) + auth(0) = nop
2778	replaceInstUsesWith(I&: *II, V: BasePtr);
2779	return eraseInstFromFunction(I&: *II);
2780	}
2781
2782	SmallVector<Value *, `4`> CallArgs;
2783	CallArgs.push_back(Elt: BasePtr);
2784	if (AuthKey) {
2785	CallArgs.push_back(Elt: AuthKey);
2786	CallArgs.push_back(Elt: AuthDisc);
2787	}
2788
2789	if (NeedSign) {
2790	CallArgs.push_back(Elt: II->getArgOperand(i: `3`));
2791	CallArgs.push_back(Elt: II->getArgOperand(i: `4`));
2792	}
2793
2794	Function *NewFn = Intrinsic::getDeclaration(M: II->getModule(), id: NewIntrin);
2795	return CallInst::Create(Func: NewFn, Args: CallArgs);
2796	}
2797	case Intrinsic::arm_neon_vtbl1:
2798	case Intrinsic::aarch64_neon_tbl1:
2799	if (Value V = simplifyNeonTbl1(II: II, Builder))
2800	return replaceInstUsesWith(I&: *II, V);
2801	break;
2802
2803	case Intrinsic::arm_neon_vmulls:
2804	case Intrinsic::arm_neon_vmullu:
2805	case Intrinsic::aarch64_neon_smull:
2806	case Intrinsic::aarch64_neon_umull: {
2807	Value *Arg0 = II->getArgOperand(i: `0`);
2808	Value *Arg1 = II->getArgOperand(i: `1`);
2809
2810	// Handle mul by zero first:
2811	if (isa<ConstantAggregateZero>(Val: Arg0) \|\| isa<ConstantAggregateZero>(Val: Arg1)) {
2812	return replaceInstUsesWith(I&: CI, V: ConstantAggregateZero::get(Ty: II->getType()));
2813	}
2814
2815	// Check for constant LHS & RHS - in this case we just simplify.
2816	bool Zext = (IID == Intrinsic::arm_neon_vmullu \|\|
2817	IID == Intrinsic::aarch64_neon_umull);
2818	VectorType *NewVT = cast<VectorType>(Val: II->getType());
2819	if (Constant *CV0 = dyn_cast<Constant>(Val: Arg0)) {
2820	if (Constant *CV1 = dyn_cast<Constant>(Val: Arg1)) {
2821	Value V0 = Builder.CreateIntCast(V: CV0, DestTy: NewVT, /isSigned=/*!Zext);
2822	Value V1 = Builder.CreateIntCast(V: CV1, DestTy: NewVT, /isSigned=/*!Zext);
2823	return replaceInstUsesWith(I&: CI, V: Builder.CreateMul(LHS: V0, RHS: V1));
2824	}
2825
2826	// Couldn't simplify - canonicalize constant to the RHS.
2827	std::swap(a&: Arg0, b&: Arg1);
2828	}
2829
2830	// Handle mul by one:
2831	if (Constant *CV1 = dyn_cast<Constant>(Val: Arg1))
2832	if (ConstantInt *Splat =
2833	dyn_cast_or_null<ConstantInt>(Val: CV1->getSplatValue()))
2834	if (Splat->isOne())
2835	return CastInst::CreateIntegerCast(S: Arg0, Ty: II->getType(),
2836	/isSigned=/!Zext);
2837
2838	break;
2839	}
2840	case Intrinsic::arm_neon_aesd:
2841	case Intrinsic::arm_neon_aese:
2842	case Intrinsic::aarch64_crypto_aesd:
2843	case Intrinsic::aarch64_crypto_aese: {
2844	Value *DataArg = II->getArgOperand(i: `0`);
2845	Value *KeyArg = II->getArgOperand(i: `1`);
2846
2847	// Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
2848	Value Data, Key;
2849	if (match(V: KeyArg, P: m_ZeroInt()) &&
2850	match(V: DataArg, P: m_Xor(L: m_Value(V&: Data), R: m_Value(V&: Key)))) {
2851	replaceOperand(I&: *II, OpNum: `0`, V: Data);
2852	replaceOperand(I&: *II, OpNum: `1`, V: Key);
2853	return II;
2854	}
2855	break;
2856	}
2857	case Intrinsic::hexagon_V6_vandvrt:
2858	case Intrinsic::hexagon_V6_vandvrt_128B: {
2859	// Simplify Q -> V -> Q conversion.
2860	if (auto Op0 = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: `0`))) {
2861	Intrinsic::ID ID0 = Op0->getIntrinsicID();
2862	if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
2863	ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
2864	break;
2865	Value Bytes = Op0->getArgOperand(i: `1`), Mask = II->getArgOperand(i: `1`);
2866	uint64_t Bytes1 = computeKnownBits(V: Bytes, Depth: `0`, CxtI: Op0).One.getZExtValue();
2867	uint64_t Mask1 = computeKnownBits(V: Mask, Depth: `0`, CxtI: II).One.getZExtValue();
2868	// Check if every byte has common bits in Bytes and Mask.
2869	uint64_t C = Bytes1 & Mask1;
2870	if ((C & `0xFF`) && (C & `0xFF00`) && (C & `0xFF0000`) && (C & `0xFF000000`))
2871	return replaceInstUsesWith(I&: *II, V: Op0->getArgOperand(i: `0`));
2872	}
2873	break;
2874	}
2875	case Intrinsic::stackrestore: {
2876	enum class ClassifyResult {
2877	None,
2878	Alloca,
2879	StackRestore,
2880	CallWithSideEffects,
2881	};
2882	auto Classify = [](const Instruction *I) {
2883	if (isa<AllocaInst>(Val: I))
2884	return ClassifyResult::Alloca;
2885
2886	if (auto *CI = dyn_cast<CallInst>(Val: I)) {
2887	if (auto *II = dyn_cast<IntrinsicInst>(Val: CI)) {
2888	if (II->getIntrinsicID() == Intrinsic::stackrestore)
2889	return ClassifyResult::StackRestore;
2890
2891	if (II->mayHaveSideEffects())
2892	return ClassifyResult::CallWithSideEffects;
2893	} else {
2894	// Consider all non-intrinsic calls to be side effects
2895	return ClassifyResult::CallWithSideEffects;
2896	}
2897	}
2898
2899	return ClassifyResult::None;
2900	};
2901
2902	// If the stacksave and the stackrestore are in the same BB, and there is
2903	// no intervening call, alloca, or stackrestore of a different stacksave,
2904	// remove the restore. This can happen when variable allocas are DCE'd.
2905	if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: `0`))) {
2906	if (SS->getIntrinsicID() == Intrinsic::stacksave &&
2907	SS->getParent() == II->getParent()) {
2908	BasicBlock::iterator BI(SS);
2909	bool CannotRemove = false;
2910	for (++BI; &*BI != II; ++BI) {
2911	switch (Classify (&*BI)) {
2912	case ClassifyResult::None:
2913	// So far so good, look at next instructions.
2914	break;
2915
2916	case ClassifyResult::StackRestore:
2917	// If we found an intervening stackrestore for a different
2918	// stacksave, we can't remove the stackrestore. Otherwise, continue.
2919	if (cast<IntrinsicInst>(Val&: *BI).getArgOperand(i: `0`) != SS)
2920	CannotRemove = true;
2921	break;
2922
2923	case ClassifyResult::Alloca:
2924	case ClassifyResult::CallWithSideEffects:
2925	// If we found an alloca, a non-intrinsic call, or an intrinsic
2926	// call with side effects, we can't remove the stackrestore.
2927	CannotRemove = true;
2928	break;
2929	}
2930	if (CannotRemove)
2931	break;
2932	}
2933
2934	if (!CannotRemove)
2935	return eraseInstFromFunction(I&: CI);
2936	}
2937	}
2938
2939	// Scan down this block to see if there is another stack restore in the
2940	// same block without an intervening call/alloca.
2941	BasicBlock::iterator BI(II);
2942	Instruction *TI = II->getParent()->getTerminator();
2943	bool CannotRemove = false;
2944	for (++BI; &*BI != TI; ++BI) {
2945	switch (Classify (&*BI)) {
2946	case ClassifyResult::None:
2947	// So far so good, look at next instructions.
2948	break;
2949
2950	case ClassifyResult::StackRestore:
2951	// If there is a stackrestore below this one, remove this one.
2952	return eraseInstFromFunction(I&: CI);
2953
2954	case ClassifyResult::Alloca:
2955	case ClassifyResult::CallWithSideEffects:
2956	// If we found an alloca, a non-intrinsic call, or an intrinsic call
2957	// with side effects (such as llvm.stacksave and llvm.read_register),
2958	// we can't remove the stack restore.
2959	CannotRemove = true;
2960	break;
2961	}
2962	if (CannotRemove)
2963	break;
2964	}
2965
2966	// If the stack restore is in a return, resume, or unwind block and if there
2967	// are no allocas or calls between the restore and the return, nuke the
2968	// restore.
2969	if (!CannotRemove && (isa<ReturnInst>(Val: TI) \|\| isa<ResumeInst>(Val: TI)))
2970	return eraseInstFromFunction(I&: CI);
2971	break;
2972	}
2973	case Intrinsic::lifetime_end:
2974	// Asan needs to poison memory to detect invalid access which is possible
2975	// even for empty lifetime range.
2976	if (II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeAddress) \|\|
2977	II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeMemory) \|\|
2978	II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeHWAddress))
2979	break;
2980
2981	if (removeTriviallyEmptyRange(EndI&: II, IC&: this, IsStart: [](const IntrinsicInst &I) {
2982	return I.getIntrinsicID() == Intrinsic::lifetime_start;
2983	}))
2984	return nullptr;
2985	break;
2986	case Intrinsic::assume: {
2987	Value *IIOperand = II->getArgOperand(i: `0`);
2988	SmallVector<OperandBundleDef, `4`> OpBundles;
2989	II->getOperandBundlesAsDefs(Defs&: OpBundles);
2990
2991	/// This will remove the boolean Condition from the assume given as
2992	/// argument and remove the assume if it becomes useless.
2993	/// always returns nullptr for use as a return values.
2994	auto RemoveConditionFromAssume = [&](Instruction Assume) -> Instruction {
2995	assert(isa<AssumeInst>(Assume));
2996	if (isAssumeWithEmptyBundle(Assume: *cast<AssumeInst>(Val: II)))
2997	return eraseInstFromFunction(I&: CI);
2998	replaceUse(U&: II->getOperandUse(i: `0`), NewValue: ConstantInt::getTrue(Context&: II->getContext()));
2999	return nullptr;
3000	};
3001	// Remove an assume if it is followed by an identical assume.
3002	// TODO: Do we need this? Unless there are conflicting assumptions, the
3003	// computeKnownBits(IIOperand) below here eliminates redundant assumes.
3004	Instruction *Next = II->getNextNonDebugInstruction();
3005	if (match(V: Next, P: m_Intrinsic<Intrinsic::assume>(Op0: m_Specific(V: IIOperand))))
3006	return RemoveConditionFromAssume (Next);
3007
3008	// Canonicalize assume(a && b) -> assume(a); assume(b);
3009	// Note: New assumption intrinsics created here are registered by
3010	// the InstCombineIRInserter object.
3011	FunctionType *AssumeIntrinsicTy = II->getFunctionType();
3012	Value *AssumeIntrinsic = II->getCalledOperand();
3013	Value A, B;
3014	if (match(V: IIOperand, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3015	Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic, Args: A, OpBundles,
3016	Name: II->getName());
3017	Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic, Args: B, Name: II->getName());
3018	return eraseInstFromFunction(I&: *II);
3019	}
3020	// assume(!(a \|\| b)) -> assume(!a); assume(!b);
3021	if (match(V: IIOperand, P: m_Not(V: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B))))) {
3022	Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic,
3023	Args: Builder.CreateNot(V: A), OpBundles, Name: II->getName());
3024	Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic,
3025	Args: Builder.CreateNot(V: B), Name: II->getName());
3026	return eraseInstFromFunction(I&: *II);
3027	}
3028
3029	// assume( (load addr) != null ) -> add 'nonnull' metadata to load
3030	// (if assume is valid at the load)
3031	CmpInst::Predicate Pred;
3032	Instruction *LHS;
3033	if (match(V: IIOperand, P: m_ICmp(Pred, L: m_Instruction(I&: LHS), R: m_Zero())) &&
3034	Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load &&
3035	LHS->getType()->isPointerTy() &&
3036	isValidAssumeForContext(I: II, CxtI: LHS, DT: &DT)) {
3037	MDNode *MD = MDNode::get(Context&: II->getContext(), MDs: std::nullopt);
3038	LHS->setMetadata(KindID: LLVMContext::MD_nonnull, Node: MD);
3039	LHS->setMetadata(KindID: LLVMContext::MD_noundef, Node: MD);
3040	return RemoveConditionFromAssume (II);
3041
3042	// TODO: apply nonnull return attributes to calls and invokes
3043	// TODO: apply range metadata for range check patterns?
3044	}
3045
3046	// Separate storage assumptions apply to the underlying allocations, not any
3047	// particular pointer within them. When evaluating the hints for AA purposes
3048	// we getUnderlyingObject them; by precomputing the answers here we can
3049	// avoid having to do so repeatedly there.
3050	for (unsigned Idx = `0`; Idx < II->getNumOperandBundles(); Idx++) {
3051	OperandBundleUse OBU = II->getOperandBundleAt(Index: Idx);
3052	if (OBU.getTagName() == "separate_storage") {
3053	assert(OBU.Inputs.size() == `2`);
3054	auto MaybeSimplifyHint = [&](const Use &U) {
3055	Value *Hint = U.get();
3056	// Not having a limit is safe because InstCombine removes unreachable
3057	// code.
3058	Value UnderlyingObject = getUnderlyingObject(V: Hint, /MaxLookup/* `0`);
3059	if (Hint != UnderlyingObject)
3060	replaceUse(U&: const_cast<Use &>(U), NewValue: UnderlyingObject);
3061	};
3062	MaybeSimplifyHint (OBU.Inputs [`0`]);
3063	MaybeSimplifyHint (OBU.Inputs [`1`]);
3064	}
3065	}
3066
3067	// Convert nonnull assume like:
3068	// %A = icmp ne i32 %PTR, null*
3069	// call void @llvm.assume(i1 %A)
3070	// into
3071	// call void @llvm.assume(i1 true) [ "nonnull"(i32 %PTR) ]*
3072	if (EnableKnowledgeRetention &&
3073	match(V: IIOperand, P: m_Cmp(Pred, L: m_Value(V&: A), R: m_Zero())) &&
3074	Pred == CmpInst::ICMP_NE && A->getType()->isPointerTy()) {
3075	if (auto *Replacement = buildAssumeFromKnowledge(
3076	Knowledge: {RetainedKnowledge{.AttrKind: Attribute::NonNull, .ArgValue: `0`, .WasOn: A}}, CtxI: Next, AC: &AC, DT: &DT)) {
3077
3078	Replacement->insertBefore(InsertPos: Next);
3079	AC.registerAssumption(CI: Replacement);
3080	return RemoveConditionFromAssume (II);
3081	}
3082	}
3083
3084	// Convert alignment assume like:
3085	// %B = ptrtoint i32 %A to i64*
3086	// %C = and i64 %B, Constant
3087	// %D = icmp eq i64 %C, 0
3088	// call void @llvm.assume(i1 %D)
3089	// into
3090	// call void @llvm.assume(i1 true) [ "align"(i32 [[A]], i64 Constant + 1)]*
3091	uint64_t AlignMask;
3092	if (EnableKnowledgeRetention &&
3093	match(V: IIOperand,
3094	P: m_Cmp(Pred, L: m_And(L: m_Value(V&: A), R: m_ConstantInt(V&: AlignMask)),
3095	R: m_Zero())) &&
3096	Pred == CmpInst::ICMP_EQ) {
3097	if (isPowerOf2_64(Value: AlignMask + `1`)) {
3098	uint64_t Offset = `0`;
3099	match(V: A, P: m_Add(L: m_Value(V&: A), R: m_ConstantInt(V&: Offset)));
3100	if (match(V: A, P: m_PtrToInt(Op: m_Value(V&: A)))) {
3101	/// Note: this doesn't preserve the offset information but merges
3102	/// offset and alignment.
3103	/// TODO: we can generate a GEP instead of merging the alignment with
3104	/// the offset.
3105	RetainedKnowledge RK{.AttrKind: Attribute::Alignment,
3106	.ArgValue: (unsigned)MinAlign(A: Offset, B: AlignMask + `1`), .WasOn: A};
3107	if (auto *Replacement =
3108	buildAssumeFromKnowledge(Knowledge: RK, CtxI: Next, AC: &AC, DT: &DT)) {
3109
3110	Replacement->insertAfter(InsertPos: II);
3111	AC.registerAssumption(CI: Replacement);
3112	}
3113	return RemoveConditionFromAssume (II);
3114	}
3115	}
3116	}
3117
3118	/// Canonicalize Knowledge in operand bundles.
3119	if (EnableKnowledgeRetention && II->hasOperandBundles()) {
3120	for (unsigned Idx = `0`; Idx < II->getNumOperandBundles(); Idx++) {
3121	auto &BOI = II->bundle_op_info_begin()[Idx];
3122	RetainedKnowledge RK =
3123	llvm::getKnowledgeFromBundle(Assume&: cast<AssumeInst>(Val&: *II), BOI);
3124	if (BOI.End - BOI.Begin > `2`)
3125	continue; // Prevent reducing knowledge in an align with offset since
3126	// extracting a RetainedKnowledge from them looses offset
3127	// information
3128	RetainedKnowledge CanonRK =
3129	llvm::simplifyRetainedKnowledge(Assume: cast<AssumeInst>(Val: II), RK,
3130	AC: &getAssumptionCache(),
3131	DT: &getDominatorTree());
3132	if (CanonRK == RK)
3133	continue;
3134	if (!CanonRK) {
3135	if (BOI.End - BOI.Begin > `0`) {
3136	Worklist.pushValue(V: II->op_begin()[BOI.Begin]);
3137	Value::dropDroppableUse(U&: II->op_begin()[BOI.Begin]);
3138	}
3139	continue;
3140	}
3141	assert(RK.AttrKind == CanonRK.AttrKind);
3142	if (BOI.End - BOI.Begin > `0`)
3143	II->op_begin()[BOI.Begin].set(CanonRK.WasOn);
3144	if (BOI.End - BOI.Begin > `1`)
3145	II->op_begin()[BOI.Begin + `1`].set(ConstantInt::get(
3146	Ty: Type::getInt64Ty(C&: II->getContext()), V: CanonRK.ArgValue));
3147	if (RK.WasOn)
3148	Worklist.pushValue(V: RK.WasOn);
3149	return II;
3150	}
3151	}
3152
3153	// If there is a dominating assume with the same condition as this one,
3154	// then this one is redundant, and should be removed.
3155	KnownBits Known(`1`);
3156	computeKnownBits(V: IIOperand, Known, Depth: `0`, CxtI: II);
3157	if (Known.isAllOnes() && isAssumeWithEmptyBundle(Assume: cast<AssumeInst>(Val&: *II)))
3158	return eraseInstFromFunction(I&: *II);
3159
3160	// assume(false) is unreachable.
3161	if (match(V: IIOperand, P: m_CombineOr(L: m_Zero(), R: m_Undef()))) {
3162	CreateNonTerminatorUnreachable(InsertAt: II);
3163	return eraseInstFromFunction(I&: *II);
3164	}
3165
3166	// Update the cache of affected values for this assumption (we might be
3167	// here because we just simplified the condition).
3168	AC.updateAffectedValues(CI: cast<AssumeInst>(Val: II));
3169	break;
3170	}
3171	case Intrinsic::experimental_guard: {
3172	// Is this guard followed by another guard? We scan forward over a small
3173	// fixed window of instructions to handle common cases with conditions
3174	// computed between guards.
3175	Instruction *NextInst = II->getNextNonDebugInstruction();
3176	for (unsigned i = `0`; i < GuardWideningWindow; i++) {
3177	// Note: Using context-free form to avoid compile time blow up
3178	if (!isSafeToSpeculativelyExecute(I: NextInst))
3179	break;
3180	NextInst = NextInst->getNextNonDebugInstruction();
3181	}
3182	Value NextCond = nullptr*;
3183	if (match(V: NextInst,
3184	P: m_Intrinsic<Intrinsic::experimental_guard>(Op0: m_Value(V&: NextCond)))) {
3185	Value *CurrCond = II->getArgOperand(i: `0`);
3186
3187	// Remove a guard that it is immediately preceded by an identical guard.
3188	// Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3189	if (CurrCond != NextCond) {
3190	Instruction *MoveI = II->getNextNonDebugInstruction();
3191	while (MoveI != NextInst) {
3192	auto *Temp = MoveI;
3193	MoveI = MoveI->getNextNonDebugInstruction();
3194	Temp->moveBefore(MovePos: II);
3195	}
3196	replaceOperand(I&: *II, OpNum: `0`, V: Builder.CreateAnd(LHS: CurrCond, RHS: NextCond));
3197	}
3198	eraseInstFromFunction(I&: *NextInst);
3199	return II;
3200	}
3201	break;
3202	}
3203	case Intrinsic::vector_insert: {
3204	Value *Vec = II->getArgOperand(i: `0`);
3205	Value *SubVec = II->getArgOperand(i: `1`);
3206	Value *Idx = II->getArgOperand(i: `2`);
3207	auto *DstTy = dyn_cast<FixedVectorType>(Val: II->getType());
3208	auto *VecTy = dyn_cast<FixedVectorType>(Val: Vec->getType());
3209	auto *SubVecTy = dyn_cast<FixedVectorType>(Val: SubVec->getType());
3210
3211	// Only canonicalize if the destination vector, Vec, and SubVec are all
3212	// fixed vectors.
3213	if (DstTy && VecTy && SubVecTy) {
3214	unsigned DstNumElts = DstTy->getNumElements();
3215	unsigned VecNumElts = VecTy->getNumElements();
3216	unsigned SubVecNumElts = SubVecTy->getNumElements();
3217	unsigned IdxN = cast<ConstantInt>(Val: Idx)->getZExtValue();
3218
3219	// An insert that entirely overwrites Vec with SubVec is a nop.
3220	if (VecNumElts == SubVecNumElts)
3221	return replaceInstUsesWith(I&: CI, V: SubVec);
3222
3223	// Widen SubVec into a vector of the same width as Vec, since
3224	// shufflevector requires the two input vectors to be the same width.
3225	// Elements beyond the bounds of SubVec within the widened vector are
3226	// undefined.
3227	SmallVector<int, `8`> WidenMask;
3228	unsigned i;
3229	for (i = `0`; i != SubVecNumElts; ++i)
3230	WidenMask.push_back(Elt: i);
3231	for (; i != VecNumElts; ++i)
3232	WidenMask.push_back(Elt: PoisonMaskElem);
3233
3234	Value *WidenShuffle = Builder.CreateShuffleVector(V: SubVec, Mask: WidenMask);
3235
3236	SmallVector<int, `8`> Mask;
3237	for (unsigned i = `0`; i != IdxN; ++i)
3238	Mask.push_back(Elt: i);
3239	for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
3240	Mask.push_back(Elt: i);
3241	for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
3242	Mask.push_back(Elt: i);
3243
3244	Value *Shuffle = Builder.CreateShuffleVector(V1: Vec, V2: WidenShuffle, Mask);
3245	return replaceInstUsesWith(I&: CI, V: Shuffle);
3246	}
3247	break;
3248	}
3249	case Intrinsic::vector_extract: {
3250	Value *Vec = II->getArgOperand(i: `0`);
3251	Value *Idx = II->getArgOperand(i: `1`);
3252
3253	Type *ReturnType = II->getType();
3254	// (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
3255	// ExtractIdx)
3256	unsigned ExtractIdx = cast<ConstantInt>(Val: Idx)->getZExtValue();
3257	Value InsertTuple, InsertIdx, *InsertValue;
3258	if (match(V: Vec, P: m_Intrinsic<Intrinsic::vector_insert>(Op0: m_Value(V&: InsertTuple),
3259	Op1: m_Value(V&: InsertValue),
3260	Op2: m_Value(V&: InsertIdx))) &&
3261	InsertValue->getType() == ReturnType) {
3262	unsigned Index = cast<ConstantInt>(Val: InsertIdx)->getZExtValue();
3263	// Case where we get the same index right after setting it.
3264	// extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
3265	// InsertValue
3266	if (ExtractIdx == Index)
3267	return replaceInstUsesWith(I&: CI, V: InsertValue);
3268	// If we are getting a different index than what was set in the
3269	// insert.vector intrinsic. We can just set the input tuple to the one up
3270	// in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
3271	// InsertIndex), ExtractIndex)
3272	// --> extract.vector(InsertTuple, ExtractIndex)
3273	else
3274	return replaceOperand(I&: CI, OpNum: `0`, V: InsertTuple);
3275	}
3276
3277	auto *DstTy = dyn_cast<VectorType>(Val: ReturnType);
3278	auto *VecTy = dyn_cast<VectorType>(Val: Vec->getType());
3279
3280	if (DstTy && VecTy) {
3281	auto DstEltCnt = DstTy->getElementCount();
3282	auto VecEltCnt = VecTy->getElementCount();
3283	unsigned IdxN = cast<ConstantInt>(Val: Idx)->getZExtValue();
3284
3285	// Extracting the entirety of Vec is a nop.
3286	if (DstEltCnt == VecTy->getElementCount()) {
3287	replaceInstUsesWith(I&: CI, V: Vec);
3288	return eraseInstFromFunction(I&: CI);
3289	}
3290
3291	// Only canonicalize to shufflevector if the destination vector and
3292	// Vec are fixed vectors.
3293	if (VecEltCnt.isScalable() \|\| DstEltCnt.isScalable())
3294	break;
3295
3296	SmallVector<int, `8`> Mask;
3297	for (unsigned i = `0`; i != DstEltCnt.getKnownMinValue(); ++i)
3298	Mask.push_back(Elt: IdxN + i);
3299
3300	Value *Shuffle = Builder.CreateShuffleVector(V: Vec, Mask);
3301	return replaceInstUsesWith(I&: CI, V: Shuffle);
3302	}
3303	break;
3304	}
3305	case Intrinsic::vector_reverse: {
3306	Value BO0, BO1, X, Y;
3307	Value *Vec = II->getArgOperand(i: `0`);
3308	if (match(V: Vec, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: BO0), R: m_Value(V&: BO1))))) {
3309	auto *OldBinOp = cast<BinaryOperator>(Val: Vec);
3310	if (match(V: BO0, P: m_VecReverse(Op0: m_Value(V&: X)))) {
3311	// rev(binop rev(X), rev(Y)) --> binop X, Y
3312	if (match(V: BO1, P: m_VecReverse(Op0: m_Value(V&: Y))))
3313	return replaceInstUsesWith(I&: CI, V: BinaryOperator::CreateWithCopiedFlags(
3314	Opc: OldBinOp->getOpcode(), V1: X, V2: Y,
3315	CopyO: OldBinOp, Name: OldBinOp->getName(),
3316	InsertBefore: II->getIterator()));
3317	// rev(binop rev(X), BO1Splat) --> binop X, BO1Splat
3318	if (isSplatValue(V: BO1))
3319	return replaceInstUsesWith(I&: CI, V: BinaryOperator::CreateWithCopiedFlags(
3320	Opc: OldBinOp->getOpcode(), V1: X, V2: BO1,
3321	CopyO: OldBinOp, Name: OldBinOp->getName(),
3322	InsertBefore: II->getIterator()));
3323	}
3324	// rev(binop BO0Splat, rev(Y)) --> binop BO0Splat, Y
3325	if (match(V: BO1, P: m_VecReverse(Op0: m_Value(V&: Y))) && isSplatValue(V: BO0))
3326	return replaceInstUsesWith(I&: CI,
3327	V: BinaryOperator::CreateWithCopiedFlags(
3328	Opc: OldBinOp->getOpcode(), V1: BO0, V2: Y, CopyO: OldBinOp,
3329	Name: OldBinOp->getName(), InsertBefore: II->getIterator()));
3330	}
3331	// rev(unop rev(X)) --> unop X
3332	if (match(V: Vec, P: m_OneUse(SubPattern: m_UnOp(X: m_VecReverse(Op0: m_Value(V&: X)))))) {
3333	auto *OldUnOp = cast<UnaryOperator>(Val: Vec);
3334	auto *NewUnOp = UnaryOperator::CreateWithCopiedFlags(
3335	Opc: OldUnOp->getOpcode(), V: X, CopyO: OldUnOp, Name: OldUnOp->getName(),
3336	InsertBefore: II->getIterator());
3337	return replaceInstUsesWith(I&: CI, V: NewUnOp);
3338	}
3339	break;
3340	}
3341	case Intrinsic::vector_reduce_or:
3342	case Intrinsic::vector_reduce_and: {
3343	// Canonicalize logical or/and reductions:
3344	// Or reduction for i1 is represented as:
3345	// %val = bitcast <ReduxWidth x i1> to iReduxWidth
3346	// %res = cmp ne iReduxWidth %val, 0
3347	// And reduction for i1 is represented as:
3348	// %val = bitcast <ReduxWidth x i1> to iReduxWidth
3349	// %res = cmp eq iReduxWidth %val, 11111
3350	Value *Arg = II->getArgOperand(i: `0`);
3351	Value *Vect;
3352
3353	if (Value *NewOp =
3354	simplifyReductionOperand(Arg, /CanReorderLanes=/true)) {
3355	replaceUse(U&: II->getOperandUse(i: `0`), NewValue: NewOp);
3356	return II;
3357	}
3358
3359	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3360	if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType()))
3361	if (FTy->getElementType() == Builder.getInt1Ty()) {
3362	Value *Res = Builder.CreateBitCast(
3363	V: Vect, DestTy: Builder.getIntNTy(N: FTy->getNumElements()));
3364	if (IID == Intrinsic::vector_reduce_and) {
3365	Res = Builder.CreateICmpEQ(
3366	LHS: Res, RHS: ConstantInt::getAllOnesValue(Ty: Res->getType()));
3367	} else {
3368	assert(IID == Intrinsic::vector_reduce_or &&
3369	"Expected or reduction.");
3370	Res = Builder.CreateIsNotNull(Arg: Res);
3371	}
3372	if (Arg != Vect)
3373	Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res,
3374	DestTy: II->getType());
3375	return replaceInstUsesWith(I&: CI, V: Res);
3376	}
3377	}
3378	[[fallthrough]];
3379	}
3380	case Intrinsic::vector_reduce_add: {
3381	if (IID == Intrinsic::vector_reduce_add) {
3382	// Convert vector_reduce_add(ZExt(<n x i1>)) to
3383	// ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
3384	// Convert vector_reduce_add(SExt(<n x i1>)) to
3385	// -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
3386	// Convert vector_reduce_add(<n x i1>) to
3387	// Trunc(ctpop(bitcast <n x i1> to in)).
3388	Value *Arg = II->getArgOperand(i: `0`);
3389	Value *Vect;
3390
3391	if (Value *NewOp =
3392	simplifyReductionOperand(Arg, /CanReorderLanes=/true)) {
3393	replaceUse(U&: II->getOperandUse(i: `0`), NewValue: NewOp);
3394	return II;
3395	}
3396
3397	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3398	if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType()))
3399	if (FTy->getElementType() == Builder.getInt1Ty()) {
3400	Value *V = Builder.CreateBitCast(
3401	V: Vect, DestTy: Builder.getIntNTy(N: FTy->getNumElements()));
3402	Value *Res = Builder.CreateUnaryIntrinsic(ID: Intrinsic::ctpop, V);
3403	if (Res->getType() != II->getType())
3404	Res = Builder.CreateZExtOrTrunc(V: Res, DestTy: II->getType());
3405	if (Arg != Vect &&
3406	cast<Instruction>(Val: Arg)->getOpcode() == Instruction::SExt)
3407	Res = Builder.CreateNeg(V: Res);
3408	return replaceInstUsesWith(I&: CI, V: Res);
3409	}
3410	}
3411	}
3412	[[fallthrough]];
3413	}
3414	case Intrinsic::vector_reduce_xor: {
3415	if (IID == Intrinsic::vector_reduce_xor) {
3416	// Exclusive disjunction reduction over the vector with
3417	// (potentially-extended) i1 element type is actually a
3418	// (potentially-extended) arithmetic `add` reduction over the original
3419	// non-extended value:
3420	// vector_reduce_xor(?ext(<n x i1>))
3421	// -->
3422	// ?ext(vector_reduce_add(<n x i1>))
3423	Value *Arg = II->getArgOperand(i: `0`);
3424	Value *Vect;
3425
3426	if (Value *NewOp =
3427	simplifyReductionOperand(Arg, /CanReorderLanes=/true)) {
3428	replaceUse(U&: II->getOperandUse(i: `0`), NewValue: NewOp);
3429	return II;
3430	}
3431
3432	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3433	if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType()))
3434	if (VTy->getElementType() == Builder.getInt1Ty()) {
3435	Value *Res = Builder.CreateAddReduce(Src: Vect);
3436	if (Arg != Vect)
3437	Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res,
3438	DestTy: II->getType());
3439	return replaceInstUsesWith(I&: CI, V: Res);
3440	}
3441	}
3442	}
3443	[[fallthrough]];
3444	}
3445	case Intrinsic::vector_reduce_mul: {
3446	if (IID == Intrinsic::vector_reduce_mul) {
3447	// Multiplicative reduction over the vector with (potentially-extended)
3448	// i1 element type is actually a (potentially zero-extended)
3449	// logical `and` reduction over the original non-extended value:
3450	// vector_reduce_mul(?ext(<n x i1>))
3451	// -->
3452	// zext(vector_reduce_and(<n x i1>))
3453	Value *Arg = II->getArgOperand(i: `0`);
3454	Value *Vect;
3455
3456	if (Value *NewOp =
3457	simplifyReductionOperand(Arg, /CanReorderLanes=/true)) {
3458	replaceUse(U&: II->getOperandUse(i: `0`), NewValue: NewOp);
3459	return II;
3460	}
3461
3462	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3463	if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType()))
3464	if (VTy->getElementType() == Builder.getInt1Ty()) {
3465	Value *Res = Builder.CreateAndReduce(Src: Vect);
3466	if (Res->getType() != II->getType())
3467	Res = Builder.CreateZExt(V: Res, DestTy: II->getType());
3468	return replaceInstUsesWith(I&: CI, V: Res);
3469	}
3470	}
3471	}
3472	[[fallthrough]];
3473	}
3474	case Intrinsic::vector_reduce_umin:
3475	case Intrinsic::vector_reduce_umax: {
3476	if (IID == Intrinsic::vector_reduce_umin \|\|
3477	IID == Intrinsic::vector_reduce_umax) {
3478	// UMin/UMax reduction over the vector with (potentially-extended)
3479	// i1 element type is actually a (potentially-extended)
3480	// logical `and`/`or` reduction over the original non-extended value:
3481	// vector_reduce_u{min,max}(?ext(<n x i1>))
3482	// -->
3483	// ?ext(vector_reduce_{and,or}(<n x i1>))
3484	Value *Arg = II->getArgOperand(i: `0`);
3485	Value *Vect;
3486
3487	if (Value *NewOp =
3488	simplifyReductionOperand(Arg, /CanReorderLanes=/true)) {
3489	replaceUse(U&: II->getOperandUse(i: `0`), NewValue: NewOp);
3490	return II;
3491	}
3492
3493	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3494	if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType()))
3495	if (VTy->getElementType() == Builder.getInt1Ty()) {
3496	Value *Res = IID == Intrinsic::vector_reduce_umin
3497	? Builder.CreateAndReduce(Src: Vect)
3498	: Builder.CreateOrReduce(Src: Vect);
3499	if (Arg != Vect)
3500	Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res,
3501	DestTy: II->getType());
3502	return replaceInstUsesWith(I&: CI, V: Res);
3503	}
3504	}
3505	}
3506	[[fallthrough]];
3507	}
3508	case Intrinsic::vector_reduce_smin:
3509	case Intrinsic::vector_reduce_smax: {
3510	if (IID == Intrinsic::vector_reduce_smin \|\|
3511	IID == Intrinsic::vector_reduce_smax) {
3512	// SMin/SMax reduction over the vector with (potentially-extended)
3513	// i1 element type is actually a (potentially-extended)
3514	// logical `and`/`or` reduction over the original non-extended value:
3515	// vector_reduce_s{min,max}(<n x i1>)
3516	// -->
3517	// vector_reduce_{or,and}(<n x i1>)
3518	// and
3519	// vector_reduce_s{min,max}(sext(<n x i1>))
3520	// -->
3521	// sext(vector_reduce_{or,and}(<n x i1>))
3522	// and
3523	// vector_reduce_s{min,max}(zext(<n x i1>))
3524	// -->
3525	// zext(vector_reduce_{and,or}(<n x i1>))
3526	Value *Arg = II->getArgOperand(i: `0`);
3527	Value *Vect;
3528
3529	if (Value *NewOp =
3530	simplifyReductionOperand(Arg, /CanReorderLanes=/true)) {
3531	replaceUse(U&: II->getOperandUse(i: `0`), NewValue: NewOp);
3532	return II;
3533	}
3534
3535	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3536	if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType()))
3537	if (VTy->getElementType() == Builder.getInt1Ty()) {
3538	Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
3539	if (Arg != Vect)
3540	ExtOpc = cast<CastInst>(Val: Arg)->getOpcode();
3541	Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
3542	(ExtOpc == Instruction::CastOps::ZExt))
3543	? Builder.CreateAndReduce(Src: Vect)
3544	: Builder.CreateOrReduce(Src: Vect);
3545	if (Arg != Vect)
3546	Res = Builder.CreateCast(Op: ExtOpc, V: Res, DestTy: II->getType());
3547	return replaceInstUsesWith(I&: CI, V: Res);
3548	}
3549	}
3550	}
3551	[[fallthrough]];
3552	}
3553	case Intrinsic::vector_reduce_fmax:
3554	case Intrinsic::vector_reduce_fmin:
3555	case Intrinsic::vector_reduce_fadd:
3556	case Intrinsic::vector_reduce_fmul: {
3557	bool CanReorderLanes = (IID != Intrinsic::vector_reduce_fadd &&
3558	IID != Intrinsic::vector_reduce_fmul) \|\|
3559	II->hasAllowReassoc();
3560	const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd \|\|
3561	IID == Intrinsic::vector_reduce_fmul)
3562	? `1`
3563	: `0`;
3564	Value *Arg = II->getArgOperand(i: ArgIdx);
3565	if (Value *NewOp = simplifyReductionOperand(Arg, CanReorderLanes)) {
3566	replaceUse(U&: II->getOperandUse(i: ArgIdx), NewValue: NewOp);
3567	return nullptr;
3568	}
3569	break;
3570	}
3571	case Intrinsic::is_fpclass: {
3572	if (Instruction I = foldIntrinsicIsFPClass(II&: II))
3573	return I;
3574	break;
3575	}
3576	case Intrinsic::threadlocal_address: {
3577	Align MinAlign = getKnownAlignment(V: II->getArgOperand(i: `0`), DL, CxtI: II, AC: &AC, DT: &DT);
3578	MaybeAlign Align = II->getRetAlign();
3579	if (MinAlign > Align.valueOrOne()) {
3580	II->addRetAttr(Attr: Attribute::getWithAlignment(Context&: II->getContext(), Alignment: MinAlign));
3581	return II;
3582	}
3583	break;
3584	}
3585	default: {
3586	// Handle target specific intrinsics
3587	std::optional<Instruction > V = targetInstCombineIntrinsic(II&: II);
3588	if (V)
3589	return *V;
3590	break;
3591	}
3592	}
3593
3594	// Try to fold intrinsic into select operands. This is legal if:
3595	// The intrinsic is speculatable.*
3596	// The select condition is not a vector, or the intrinsic does not*
3597	// perform cross-lane operations.
3598	switch (IID) {
3599	case Intrinsic::ctlz:
3600	case Intrinsic::cttz:
3601	case Intrinsic::ctpop:
3602	case Intrinsic::umin:
3603	case Intrinsic::umax:
3604	case Intrinsic::smin:
3605	case Intrinsic::smax:
3606	case Intrinsic::usub_sat:
3607	case Intrinsic::uadd_sat:
3608	case Intrinsic::ssub_sat:
3609	case Intrinsic::sadd_sat:
3610	for (Value *Op : II->args())
3611	if (auto *Sel = dyn_cast<SelectInst>(Val: Op))
3612	if (Instruction R = FoldOpIntoSelect(Op&: II, SI: Sel))
3613	return R;
3614	[[fallthrough]];
3615	default:
3616	break;
3617	}
3618
3619	if (Instruction *Shuf = foldShuffledIntrinsicOperands(II, Builder))
3620	return Shuf;
3621
3622	// Some intrinsics (like experimental_gc_statepoint) can be used in invoke
3623	// context, so it is handled in visitCallBase and we should trigger it.
3624	return visitCallBase(Call&: *II);
3625	}
3626
3627	// Fence instruction simplification
3628	Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) {
3629	auto *NFI = dyn_cast<FenceInst>(Val: FI.getNextNonDebugInstruction());
3630	// This check is solely here to handle arbitrary target-dependent syncscopes.
3631	// TODO: Can remove if does not matter in practice.
3632	if (NFI && FI.isIdenticalTo(I: NFI))
3633	return eraseInstFromFunction(I&: FI);
3634
3635	// Returns true if FI1 is identical or stronger fence than FI2.
3636	auto isIdenticalOrStrongerFence = [](FenceInst FI1, FenceInst FI2) {
3637	auto FI1SyncScope = FI1->getSyncScopeID();
3638	// Consider same scope, where scope is global or single-thread.
3639	if (FI1SyncScope != FI2->getSyncScopeID() \|\|
3640	(FI1SyncScope != SyncScope::System &&
3641	FI1SyncScope != SyncScope::SingleThread))
3642	return false;
3643
3644	return isAtLeastOrStrongerThan(AO: FI1->getOrdering(), Other: FI2->getOrdering());
3645	};
3646	if (NFI && isIdenticalOrStrongerFence (NFI, &FI))
3647	return eraseInstFromFunction(I&: FI);
3648
3649	if (auto *PFI = dyn_cast_or_null<FenceInst>(Val: FI.getPrevNonDebugInstruction()))
3650	if (isIdenticalOrStrongerFence (PFI, &FI))
3651	return eraseInstFromFunction(I&: FI);
3652	return nullptr;
3653	}
3654
3655	// InvokeInst simplification
3656	Instruction *InstCombinerImpl::visitInvokeInst(InvokeInst &II) {
3657	return visitCallBase(Call&: II);
3658	}
3659
3660	// CallBrInst simplification
3661	Instruction *InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) {
3662	return visitCallBase(Call&: CBI);
3663	}
3664
3665	Instruction InstCombinerImpl::tryOptimizeCall(CallInst CI) {
3666	if (!CI->getCalledFunction()) return nullptr;
3667
3668	// Skip optimizing notail and musttail calls so
3669	// LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
3670	// LibCallSimplifier::optimizeCall should try to preseve tail calls though.
3671	if (CI->isMustTailCall() \|\| CI->isNoTailCall())
3672	return nullptr;
3673
3674	auto InstCombineRAUW = [this](Instruction From, Value With) {
3675	replaceInstUsesWith(I&: *From, V: With);
3676	};
3677	auto InstCombineErase = [this](Instruction *I) {
3678	eraseInstFromFunction(I&: *I);
3679	};
3680	LibCallSimplifier Simplifier(DL, &TLI, &AC, ORE, BFI, PSI, InstCombineRAUW,
3681	InstCombineErase);
3682	if (Value *With = Simplifier.optimizeCall(CI, B&: Builder)) {
3683	++NumSimplified;
3684	return CI->use_empty() ? CI : replaceInstUsesWith(I&: *CI, V: With);
3685	}
3686
3687	return nullptr;
3688	}
3689
3690	static IntrinsicInst findInitTrampolineFromAlloca(Value TrampMem) {
3691	// Strip off at most one level of pointer casts, looking for an alloca. This
3692	// is good enough in practice and simpler than handling any number of casts.
3693	Value *Underlying = TrampMem->stripPointerCasts();
3694	if (Underlying != TrampMem &&
3695	(!Underlying->hasOneUse() \|\| Underlying->user_back() != TrampMem))
3696	return nullptr;
3697	if (!isa<AllocaInst>(Val: Underlying))
3698	return nullptr;
3699
3700	IntrinsicInst InitTrampoline = nullptr*;
3701	for (User *U : TrampMem->users()) {
3702	IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: U);
3703	if (!II)
3704	return nullptr;
3705	if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
3706	if (InitTrampoline)
3707	// More than one init_trampoline writes to this value. Give up.
3708	return nullptr;
3709	InitTrampoline = II;
3710	continue;
3711	}
3712	if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
3713	// Allow any number of calls to adjust.trampoline.
3714	continue;
3715	return nullptr;
3716	}
3717
3718	// No call to init.trampoline found.
3719	if (!InitTrampoline)
3720	return nullptr;
3721
3722	// Check that the alloca is being used in the expected way.
3723	if (InitTrampoline->getOperand(i_nocapture: `0`) != TrampMem)
3724	return nullptr;
3725
3726	return InitTrampoline;
3727	}
3728
3729	static IntrinsicInst findInitTrampolineFromBB(IntrinsicInst AdjustTramp,
3730	Value *TrampMem) {
3731	// Visit all the previous instructions in the basic block, and try to find a
3732	// init.trampoline which has a direct path to the adjust.trampoline.
3733	for (BasicBlock::iterator I = AdjustTramp->getIterator(),
3734	E = AdjustTramp->getParent()->begin();
3735	I != E;) {
3736	Instruction Inst = &--I;
3737	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val&: I))
3738	if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
3739	II->getOperand(i_nocapture: `0`) == TrampMem)
3740	return II;
3741	if (Inst->mayWriteToMemory())
3742	return nullptr;
3743	}
3744	return nullptr;
3745	}
3746
3747	// Given a call to llvm.adjust.trampoline, find and return the corresponding
3748	// call to llvm.init.trampoline if the call to the trampoline can be optimized
3749	// to a direct call to a function. Otherwise return NULL.
3750	static IntrinsicInst findInitTrampoline(Value Callee) {
3751	Callee = Callee->stripPointerCasts();
3752	IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Val: Callee);
3753	if (!AdjustTramp \|\|
3754	AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
3755	return nullptr;
3756
3757	Value *TrampMem = AdjustTramp->getOperand(i_nocapture: `0`);
3758
3759	if (IntrinsicInst *IT = findInitTrampolineFromAlloca(TrampMem))
3760	return IT;
3761	if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
3762	return IT;
3763	return nullptr;
3764	}
3765
3766	bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
3767	const TargetLibraryInfo *TLI) {
3768	// Note: We only handle cases which can't be driven from generic attributes
3769	// here. So, for example, nonnull and noalias (which are common properties
3770	// of some allocation functions) are expected to be handled via annotation
3771	// of the respective allocator declaration with generic attributes.
3772	bool Changed = false;
3773
3774	if (!Call.getType()->isPointerTy())
3775	return Changed;
3776
3777	std::optional<APInt> Size = getAllocSize(CB: &Call, TLI);
3778	if (Size && *Size != `0`) {
3779	// TODO: We really should just emit deref_or_null here and then
3780	// let the generic inference code combine that with nonnull.
3781	if (Call.hasRetAttr(Kind: Attribute::NonNull)) {
3782	Changed = !Call.hasRetAttr(Kind: Attribute::Dereferenceable);
3783	Call.addRetAttr(Attr: Attribute::getWithDereferenceableBytes(
3784	Context&: Call.getContext(), Bytes: Size ->getLimitedValue()));
3785	} else {
3786	Changed = !Call.hasRetAttr(Kind: Attribute::DereferenceableOrNull);
3787	Call.addRetAttr(Attr: Attribute::getWithDereferenceableOrNullBytes(
3788	Context&: Call.getContext(), Bytes: Size ->getLimitedValue()));
3789	}
3790	}
3791
3792	// Add alignment attribute if alignment is a power of two constant.
3793	Value *Alignment = getAllocAlignment(V: &Call, TLI);
3794	if (!Alignment)
3795	return Changed;
3796
3797	ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Val: Alignment);
3798	if (AlignOpC && AlignOpC->getValue().ult(RHS: llvm::Value::MaximumAlignment)) {
3799	uint64_t AlignmentVal = AlignOpC->getZExtValue();
3800	if (llvm::isPowerOf2_64(Value: AlignmentVal)) {
3801	Align ExistingAlign = Call.getRetAlign().valueOrOne();
3802	Align NewAlign = Align (AlignmentVal);
3803	if (NewAlign > ExistingAlign) {
3804	Call.addRetAttr(
3805	Attr: Attribute::getWithAlignment(Context&: Call.getContext(), Alignment: NewAlign));
3806	Changed = true;
3807	}
3808	}
3809	}
3810	return Changed;
3811	}
3812
3813	/// Improvements for call, callbr and invoke instructions.
3814	Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
3815	bool Changed = annotateAnyAllocSite(Call, TLI: &TLI);
3816
3817	// Mark any parameters that are known to be non-null with the nonnull
3818	// attribute. This is helpful for inlining calls to functions with null
3819	// checks on their arguments.
3820	SmallVector<unsigned, `4`> ArgNos;
3821	unsigned ArgNo = `0`;
3822
3823	for (Value *V : Call.args()) {
3824	if (V->getType()->isPointerTy() &&
3825	!Call.paramHasAttr(ArgNo, Kind: Attribute::NonNull) &&
3826	isKnownNonZero(V, Q: getSimplifyQuery().getWithInstruction(I: &Call)))
3827	ArgNos.push_back(Elt: ArgNo);
3828	ArgNo++;
3829	}
3830
3831	assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
3832
3833	if (!ArgNos.empty()) {
3834	AttributeList AS = Call.getAttributes();
3835	LLVMContext &Ctx = Call.getContext();
3836	AS = AS.addParamAttribute(C&: Ctx, ArgNos,
3837	A: Attribute::get(Context&: Ctx, Kind: Attribute::NonNull));
3838	Call.setAttributes(AS);
3839	Changed = true;
3840	}
3841
3842	// If the callee is a pointer to a function, attempt to move any casts to the
3843	// arguments of the call/callbr/invoke.
3844	Value *Callee = Call.getCalledOperand();
3845	Function *CalleeF = dyn_cast<Function>(Val: Callee);
3846	if ((!CalleeF \|\| CalleeF->getFunctionType() != Call.getFunctionType()) &&
3847	transformConstExprCastCall(Call))
3848	return nullptr;
3849
3850	if (CalleeF) {
3851	// Remove the convergent attr on calls when the callee is not convergent.
3852	if (Call.isConvergent() && !CalleeF->isConvergent() &&
3853	!CalleeF->isIntrinsic()) {
3854	LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
3855	<< "\n");
3856	Call.setNotConvergent();
3857	return &Call;
3858	}
3859
3860	// If the call and callee calling conventions don't match, and neither one
3861	// of the calling conventions is compatible with C calling convention
3862	// this call must be unreachable, as the call is undefined.
3863	if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
3864	!(CalleeF->getCallingConv() == llvm::CallingConv::C &&
3865	TargetLibraryInfoImpl::isCallingConvCCompatible(CI: &Call)) &&
3866	!(Call.getCallingConv() == llvm::CallingConv::C &&
3867	TargetLibraryInfoImpl::isCallingConvCCompatible(Callee: CalleeF))) &&
3868	// Only do this for calls to a function with a body. A prototype may
3869	// not actually end up matching the implementation's calling conv for a
3870	// variety of reasons (e.g. it may be written in assembly).
3871	!CalleeF->isDeclaration()) {
3872	Instruction *OldCall = &Call;
3873	CreateNonTerminatorUnreachable(InsertAt: OldCall);
3874	// If OldCall does not return void then replaceInstUsesWith poison.
3875	// This allows ValueHandlers and custom metadata to adjust itself.
3876	if (!OldCall->getType()->isVoidTy())
3877	replaceInstUsesWith(I&: *OldCall, V: PoisonValue::get(T: OldCall->getType()));
3878	if (isa<CallInst>(Val: OldCall))
3879	return eraseInstFromFunction(I&: *OldCall);
3880
3881	// We cannot remove an invoke or a callbr, because it would change thexi
3882	// CFG, just change the callee to a null pointer.
3883	cast<CallBase>(Val: OldCall)->setCalledFunction(
3884	FTy: CalleeF->getFunctionType(),
3885	Fn: Constant::getNullValue(Ty: CalleeF->getType()));
3886	return nullptr;
3887	}
3888	}
3889
3890	// Calling a null function pointer is undefined if a null address isn't
3891	// dereferenceable.
3892	if ((isa<ConstantPointerNull>(Val: Callee) &&
3893	!NullPointerIsDefined(F: Call.getFunction())) \|\|
3894	isa<UndefValue>(Val: Callee)) {
3895	// If Call does not return void then replaceInstUsesWith poison.
3896	// This allows ValueHandlers and custom metadata to adjust itself.
3897	if (!Call.getType()->isVoidTy())
3898	replaceInstUsesWith(I&: Call, V: PoisonValue::get(T: Call.getType()));
3899
3900	if (Call.isTerminator()) {
3901	// Can't remove an invoke or callbr because we cannot change the CFG.
3902	return nullptr;
3903	}
3904
3905	// This instruction is not reachable, just remove it.
3906	CreateNonTerminatorUnreachable(InsertAt: &Call);
3907	return eraseInstFromFunction(I&: Call);
3908	}
3909
3910	if (IntrinsicInst *II = findInitTrampoline(Callee))
3911	return transformCallThroughTrampoline(Call, Tramp&: *II);
3912
3913	if (isa<InlineAsm>(Val: Callee) && !Call.doesNotThrow()) {
3914	InlineAsm *IA = cast<InlineAsm>(Val: Callee);
3915	if (!IA->canThrow()) {
3916	// Normal inline asm calls cannot throw - mark them
3917	// 'nounwind'.
3918	Call.setDoesNotThrow();
3919	Changed = true;
3920	}
3921	}
3922
3923	// Try to optimize the call if possible, we require DataLayout for most of
3924	// this. None of these calls are seen as possibly dead so go ahead and
3925	// delete the instruction now.
3926	if (CallInst *CI = dyn_cast<CallInst>(Val: &Call)) {
3927	Instruction *I = tryOptimizeCall(CI);
3928	// If we changed something return the result, etc. Otherwise let
3929	// the fallthrough check.
3930	if (I) return eraseInstFromFunction(I&: *I);
3931	}
3932
3933	if (!Call.use_empty() && !Call.isMustTailCall())
3934	if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
3935	Type *CallTy = Call.getType();
3936	Type *RetArgTy = ReturnedArg->getType();
3937	if (RetArgTy->canLosslesslyBitCastTo(Ty: CallTy))
3938	return replaceInstUsesWith(
3939	I&: Call, V: Builder.CreateBitOrPointerCast(V: ReturnedArg, DestTy: CallTy));
3940	}
3941
3942	// Drop unnecessary kcfi operand bundles from calls that were converted
3943	// into direct calls.
3944	auto Bundle = Call.getOperandBundle(ID: LLVMContext::OB_kcfi);
3945	if (Bundle && !Call.isIndirectCall()) {
3946	DEBUG_WITH_TYPE(DEBUG_TYPE "-kcfi", {
3947	if (CalleeF) {
3948	ConstantInt FunctionType = nullptr*;
3949	ConstantInt *ExpectedType = cast<ConstantInt>(Bundle->Inputs[`0`]);
3950
3951	if (MDNode *MD = CalleeF->getMetadata(LLVMContext::MD_kcfi_type))
3952	FunctionType = mdconst::extract<ConstantInt>(MD->getOperand(`0`));
3953
3954	if (FunctionType &&
3955	FunctionType->getZExtValue() != ExpectedType->getZExtValue())
3956	dbgs() << Call.getModule()->getName()
3957	<< ": warning: kcfi: " << Call.getCaller()->getName()
3958	<< ": call to " << CalleeF->getName()
3959	<< " using a mismatching function pointer type\n";
3960	}
3961	});
3962
3963	return CallBase::removeOperandBundle(CB: &Call, ID: LLVMContext::OB_kcfi);
3964	}
3965
3966	if (isRemovableAlloc(V: &Call, TLI: &TLI))
3967	return visitAllocSite(FI&: Call);
3968
3969	// Handle intrinsics which can be used in both call and invoke context.
3970	switch (Call.getIntrinsicID()) {
3971	case Intrinsic::experimental_gc_statepoint: {
3972	GCStatepointInst &GCSP = *cast<GCStatepointInst>(Val: &Call);
3973	SmallPtrSet<Value *, `32`> LiveGcValues;
3974	for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
3975	GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
3976
3977	// Remove the relocation if unused.
3978	if (GCR.use_empty()) {
3979	eraseInstFromFunction(I&: GCR);
3980	continue;
3981	}
3982
3983	Value *DerivedPtr = GCR.getDerivedPtr();
3984	Value *BasePtr = GCR.getBasePtr();
3985
3986	// Undef is undef, even after relocation.
3987	if (isa<UndefValue>(Val: DerivedPtr) \|\| isa<UndefValue>(Val: BasePtr)) {
3988	replaceInstUsesWith(I&: GCR, V: UndefValue::get(T: GCR.getType()));
3989	eraseInstFromFunction(I&: GCR);
3990	continue;
3991	}
3992
3993	if (auto *PT = dyn_cast<PointerType>(Val: GCR.getType())) {
3994	// The relocation of null will be null for most any collector.
3995	// TODO: provide a hook for this in GCStrategy. There might be some
3996	// weird collector this property does not hold for.
3997	if (isa<ConstantPointerNull>(Val: DerivedPtr)) {
3998	// Use null-pointer of gc_relocate's type to replace it.
3999	replaceInstUsesWith(I&: GCR, V: ConstantPointerNull::get(T: PT));
4000	eraseInstFromFunction(I&: GCR);
4001	continue;
4002	}
4003
4004	// isKnownNonNull -> nonnull attribute
4005	if (!GCR.hasRetAttr(Kind: Attribute::NonNull) &&
4006	isKnownNonZero(V: DerivedPtr,
4007	Q: getSimplifyQuery().getWithInstruction(I: &Call))) {
4008	GCR.addRetAttr(Kind: Attribute::NonNull);
4009	// We discovered new fact, re-check users.
4010	Worklist.pushUsersToWorkList(I&: GCR);
4011	}
4012	}
4013
4014	// If we have two copies of the same pointer in the statepoint argument
4015	// list, canonicalize to one. This may let us common gc.relocates.
4016	if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
4017	GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
4018	auto *OpIntTy = GCR.getOperand(i_nocapture: `2`)->getType();
4019	GCR.setOperand(i_nocapture: `2`, Val_nocapture: ConstantInt::get(Ty: OpIntTy, V: GCR.getBasePtrIndex()));
4020	}
4021
4022	// TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
4023	// Canonicalize on the type from the uses to the defs
4024
4025	// TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
4026	LiveGcValues.insert(Ptr: BasePtr);
4027	LiveGcValues.insert(Ptr: DerivedPtr);
4028	}
4029	std::optional<OperandBundleUse> Bundle =
4030	GCSP.getOperandBundle(ID: LLVMContext::OB_gc_live);
4031	unsigned NumOfGCLives = LiveGcValues.size();
4032	if (!Bundle \|\| NumOfGCLives == Bundle ->Inputs.size())
4033	break;
4034	// We can reduce the size of gc live bundle.
4035	DenseMap<Value , unsigned*> Val2Idx;
4036	std::vector<Value *> NewLiveGc;
4037	for (Value *V : Bundle ->Inputs) {
4038	if (Val2Idx.count(Val: V))
4039	continue;
4040	if (LiveGcValues.count(Ptr: V)) {
4041	Val2Idx [V] = NewLiveGc.size();
4042	NewLiveGc.push_back(x: V);
4043	} else
4044	Val2Idx [V] = NumOfGCLives;
4045	}
4046	// Update all gc.relocates
4047	for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4048	GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4049	Value *BasePtr = GCR.getBasePtr();
4050	assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
4051	"Missed live gc for base pointer");
4052	auto *OpIntTy1 = GCR.getOperand(i_nocapture: `1`)->getType();
4053	GCR.setOperand(i_nocapture: `1`, Val_nocapture: ConstantInt::get(Ty: OpIntTy1, V: Val2Idx [BasePtr]));
4054	Value *DerivedPtr = GCR.getDerivedPtr();
4055	assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
4056	"Missed live gc for derived pointer");
4057	auto *OpIntTy2 = GCR.getOperand(i_nocapture: `2`)->getType();
4058	GCR.setOperand(i_nocapture: `2`, Val_nocapture: ConstantInt::get(Ty: OpIntTy2, V: Val2Idx [DerivedPtr]));
4059	}
4060	// Create new statepoint instruction.
4061	OperandBundleDef NewBundle("gc-live", NewLiveGc);
4062	return CallBase::Create(CB: &Call, Bundle: NewBundle);
4063	}
4064	default: { break; }
4065	}
4066
4067	return Changed ? &Call : nullptr;
4068	}
4069
4070	/// If the callee is a constexpr cast of a function, attempt to move the cast to
4071	/// the arguments of the call/invoke.
4072	/// CallBrInst is not supported.
4073	bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
4074	auto *Callee =
4075	dyn_cast<Function>(Val: Call.getCalledOperand()->stripPointerCasts());
4076	if (!Callee)
4077	return false;
4078
4079	assert(!isa<CallBrInst>(Call) &&
4080	"CallBr's don't have a single point after a def to insert at");
4081
4082	// If this is a call to a thunk function, don't remove the cast. Thunks are
4083	// used to transparently forward all incoming parameters and outgoing return
4084	// values, so it's important to leave the cast in place.
4085	if (Callee->hasFnAttribute(Kind: "thunk"))
4086	return false;
4087
4088	// If this is a call to a naked function, the assembly might be
4089	// using an argument, or otherwise rely on the frame layout,
4090	// the function prototype will mismatch.
4091	if (Callee->hasFnAttribute(Kind: Attribute::Naked))
4092	return false;
4093
4094	// If this is a musttail call, the callee's prototype must match the caller's
4095	// prototype with the exception of pointee types. The code below doesn't
4096	// implement that, so we can't do this transform.
4097	// TODO: Do the transform if it only requires adding pointer casts.
4098	if (Call.isMustTailCall())
4099	return false;
4100
4101	Instruction *Caller = &Call;
4102	const AttributeList &CallerPAL = Call.getAttributes();
4103
4104	// Okay, this is a cast from a function to a different type. Unless doing so
4105	// would cause a type conversion of one of our arguments, change this call to
4106	// be a direct call with arguments casted to the appropriate types.
4107	FunctionType *FT = Callee->getFunctionType();
4108	Type *OldRetTy = Caller->getType();
4109	Type *NewRetTy = FT->getReturnType();
4110
4111	// Check to see if we are changing the return type...
4112	if (OldRetTy != NewRetTy) {
4113
4114	if (NewRetTy->isStructTy())
4115	return false; // TODO: Handle multiple return values.
4116
4117	if (!CastInst::isBitOrNoopPointerCastable(SrcTy: NewRetTy, DestTy: OldRetTy, DL)) {
4118	if (Callee->isDeclaration())
4119	return false; // Cannot transform this return value.
4120
4121	if (!Caller->use_empty() &&
4122	// void -> non-void is handled specially
4123	!NewRetTy->isVoidTy())
4124	return false; // Cannot transform this return value.
4125	}
4126
4127	if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
4128	AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
4129	if (RAttrs.overlaps(AM: AttributeFuncs::typeIncompatible(Ty: NewRetTy)))
4130	return false; // Attribute not compatible with transformed value.
4131	}
4132
4133	// If the callbase is an invoke instruction, and the return value is
4134	// used by a PHI node in a successor, we cannot change the return type of
4135	// the call because there is no place to put the cast instruction (without
4136	// breaking the critical edge). Bail out in this case.
4137	if (!Caller->use_empty()) {
4138	BasicBlock PhisNotSupportedBlock = nullptr*;
4139	if (auto *II = dyn_cast<InvokeInst>(Val: Caller))
4140	PhisNotSupportedBlock = II->getNormalDest();
4141	if (PhisNotSupportedBlock)
4142	for (User *U : Caller->users())
4143	if (PHINode *PN = dyn_cast<PHINode>(Val: U))
4144	if (PN->getParent() == PhisNotSupportedBlock)
4145	return false;
4146	}
4147	}
4148
4149	unsigned NumActualArgs = Call.arg_size();
4150	unsigned NumCommonArgs = std::min(a: FT->getNumParams(), b: NumActualArgs);
4151
4152	// Prevent us turning:
4153	// declare void @takes_i32_inalloca(i32 inalloca)*
4154	// call void bitcast (void (i32)* @takes_i32_inalloca to void (i32))(i32 0)
4155	//
4156	// into:
4157	// call void @takes_i32_inalloca(i32 null)*
4158	//
4159	// Similarly, avoid folding away bitcasts of byval calls.
4160	if (Callee->getAttributes().hasAttrSomewhere(Kind: Attribute::InAlloca) \|\|
4161	Callee->getAttributes().hasAttrSomewhere(Kind: Attribute::Preallocated))
4162	return false;
4163
4164	auto AI = Call.arg_begin();
4165	for (unsigned i = `0`, e = NumCommonArgs; i != e; ++i, ++AI) {
4166	Type *ParamTy = FT->getParamType(i);
4167	Type ActTy = (AI)->getType();
4168
4169	if (!CastInst::isBitOrNoopPointerCastable(SrcTy: ActTy, DestTy: ParamTy, DL))
4170	return false; // Cannot transform this parameter value.
4171
4172	// Check if there are any incompatible attributes we cannot drop safely.
4173	if (AttrBuilder (FT->getContext(), CallerPAL.getParamAttrs(ArgNo: i))
4174	.overlaps(AM: AttributeFuncs::typeIncompatible(
4175	Ty: ParamTy, ASK: AttributeFuncs::ASK_UNSAFE_TO_DROP)))
4176	return false; // Attribute not compatible with transformed value.
4177
4178	if (Call.isInAllocaArgument(ArgNo: i) \|\|
4179	CallerPAL.hasParamAttr(ArgNo: i, Kind: Attribute::Preallocated))
4180	return false; // Cannot transform to and from inalloca/preallocated.
4181
4182	if (CallerPAL.hasParamAttr(ArgNo: i, Kind: Attribute::SwiftError))
4183	return false;
4184
4185	if (CallerPAL.hasParamAttr(ArgNo: i, Kind: Attribute::ByVal) !=
4186	Callee->getAttributes().hasParamAttr(ArgNo: i, Kind: Attribute::ByVal))
4187	return false; // Cannot transform to or from byval.
4188	}
4189
4190	if (Callee->isDeclaration()) {
4191	// Do not delete arguments unless we have a function body.
4192	if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
4193	return false;
4194
4195	// If the callee is just a declaration, don't change the varargsness of the
4196	// call. We don't want to introduce a varargs call where one doesn't
4197	// already exist.
4198	if (FT->isVarArg() != Call.getFunctionType()->isVarArg())
4199	return false;
4200
4201	// If both the callee and the cast type are varargs, we still have to make
4202	// sure the number of fixed parameters are the same or we have the same
4203	// ABI issues as if we introduce a varargs call.
4204	if (FT->isVarArg() && Call.getFunctionType()->isVarArg() &&
4205	FT->getNumParams() != Call.getFunctionType()->getNumParams())
4206	return false;
4207	}
4208
4209	if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
4210	!CallerPAL.isEmpty()) {
4211	// In this case we have more arguments than the new function type, but we
4212	// won't be dropping them. Check that these extra arguments have attributes
4213	// that are compatible with being a vararg call argument.
4214	unsigned SRetIdx;
4215	if (CallerPAL.hasAttrSomewhere(Kind: Attribute::StructRet, Index: &SRetIdx) &&
4216	SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
4217	return false;
4218	}
4219
4220	// Okay, we decided that this is a safe thing to do: go ahead and start
4221	// inserting cast instructions as necessary.
4222	SmallVector<Value *, `8`> Args;
4223	SmallVector<AttributeSet, `8`> ArgAttrs;
4224	Args.reserve(N: NumActualArgs);
4225	ArgAttrs.reserve(N: NumActualArgs);
4226
4227	// Get any return attributes.
4228	AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
4229
4230	// If the return value is not being used, the type may not be compatible
4231	// with the existing attributes. Wipe out any problematic attributes.
4232	RAttrs.remove(AM: AttributeFuncs::typeIncompatible(Ty: NewRetTy));
4233
4234	LLVMContext &Ctx = Call.getContext();
4235	AI = Call.arg_begin();
4236	for (unsigned i = `0`; i != NumCommonArgs; ++i, ++AI) {
4237	Type *ParamTy = FT->getParamType(i);
4238
4239	Value NewArg = AI;
4240	if ((*AI)->getType() != ParamTy)
4241	NewArg = Builder.CreateBitOrPointerCast(V: *AI, DestTy: ParamTy);
4242	Args.push_back(Elt: NewArg);
4243
4244	// Add any parameter attributes except the ones incompatible with the new
4245	// type. Note that we made sure all incompatible ones are safe to drop.
4246	AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
4247	Ty: ParamTy, ASK: AttributeFuncs::ASK_SAFE_TO_DROP);
4248	ArgAttrs.push_back(
4249	Elt: CallerPAL.getParamAttrs(ArgNo: i).removeAttributes(C&: Ctx, AttrsToRemove: IncompatibleAttrs));
4250	}
4251
4252	// If the function takes more arguments than the call was taking, add them
4253	// now.
4254	for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
4255	Args.push_back(Elt: Constant::getNullValue(Ty: FT->getParamType(i)));
4256	ArgAttrs.push_back(Elt: AttributeSet ());
4257	}
4258
4259	// If we are removing arguments to the function, emit an obnoxious warning.
4260	if (FT->getNumParams() < NumActualArgs) {
4261	// TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
4262	if (FT->isVarArg()) {
4263	// Add all of the arguments in their promoted form to the arg list.
4264	for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
4265	Type PTy = getPromotedType(Ty: (AI)->getType());
4266	Value NewArg = AI;
4267	if (PTy != (*AI)->getType()) {
4268	// Must promote to pass through va_arg area!
4269	Instruction::CastOps opcode =
4270	CastInst::getCastOpcode(Val: AI, SrcIsSigned: false, Ty: PTy, DstIsSigned: false*);
4271	NewArg = Builder.CreateCast(Op: opcode, V: *AI, DestTy: PTy);
4272	}
4273	Args.push_back(Elt: NewArg);
4274
4275	// Add any parameter attributes.
4276	ArgAttrs.push_back(Elt: CallerPAL.getParamAttrs(ArgNo: i));
4277	}
4278	}
4279	}
4280
4281	AttributeSet FnAttrs = CallerPAL.getFnAttrs();
4282
4283	if (NewRetTy->isVoidTy())
4284	Caller->setName(""); // Void type should not have a name.
4285
4286	assert((ArgAttrs.size() == FT->getNumParams() \|\| FT->isVarArg()) &&
4287	"missing argument attributes");
4288	AttributeList NewCallerPAL = AttributeList::get(
4289	C&: Ctx, FnAttrs, RetAttrs: AttributeSet::get(C&: Ctx, B: RAttrs), ArgAttrs);
4290
4291	SmallVector<OperandBundleDef, `1`> OpBundles;
4292	Call.getOperandBundlesAsDefs(Defs&: OpBundles);
4293
4294	CallBase *NewCall;
4295	if (InvokeInst *II = dyn_cast<InvokeInst>(Val: Caller)) {
4296	NewCall = Builder.CreateInvoke(Callee, NormalDest: II->getNormalDest(),
4297	UnwindDest: II->getUnwindDest(), Args, OpBundles);
4298	} else {
4299	NewCall = Builder.CreateCall(Callee, Args, OpBundles);
4300	cast<CallInst>(Val: NewCall)->setTailCallKind(
4301	cast<CallInst>(Val: Caller)->getTailCallKind());
4302	}
4303	NewCall->takeName(V: Caller);
4304	NewCall->setCallingConv(Call.getCallingConv());
4305	NewCall->setAttributes(NewCallerPAL);
4306
4307	// Preserve prof metadata if any.
4308	NewCall->copyMetadata(SrcInst: *Caller, WL: {LLVMContext::MD_prof});
4309
4310	// Insert a cast of the return type as necessary.
4311	Instruction *NC = NewCall;
4312	Value *NV = NC;
4313	if (OldRetTy != NV->getType() && !Caller->use_empty()) {
4314	if (!NV->getType()->isVoidTy()) {
4315	NV = NC = CastInst::CreateBitOrPointerCast(S: NC, Ty: OldRetTy);
4316	NC->setDebugLoc(Caller->getDebugLoc());
4317
4318	auto OptInsertPt = NewCall->getInsertionPointAfterDef();
4319	assert(OptInsertPt && "No place to insert cast");
4320	InsertNewInstBefore(New: NC, Old: *OptInsertPt);
4321	Worklist.pushUsersToWorkList(I&: *Caller);
4322	} else {
4323	NV = PoisonValue::get(T: Caller->getType());
4324	}
4325	}
4326
4327	if (!Caller->use_empty())
4328	replaceInstUsesWith(I&: *Caller, V: NV);
4329	else if (Caller->hasValueHandle()) {
4330	if (OldRetTy == NV->getType())
4331	ValueHandleBase::ValueIsRAUWd(Old: Caller, New: NV);
4332	else
4333	// We cannot call ValueIsRAUWd with a different type, and the
4334	// actual tracked value will disappear.
4335	ValueHandleBase::ValueIsDeleted(V: Caller);
4336	}
4337
4338	eraseInstFromFunction(I&: *Caller);
4339	return true;
4340	}
4341
4342	/// Turn a call to a function created by init_trampoline / adjust_trampoline
4343	/// intrinsic pair into a direct call to the underlying function.
4344	Instruction *
4345	InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
4346	IntrinsicInst &Tramp) {
4347	FunctionType *FTy = Call.getFunctionType();
4348	AttributeList Attrs = Call.getAttributes();
4349
4350	// If the call already has the 'nest' attribute somewhere then give up -
4351	// otherwise 'nest' would occur twice after splicing in the chain.
4352	if (Attrs.hasAttrSomewhere(Kind: Attribute::Nest))
4353	return nullptr;
4354
4355	Function *NestF = cast<Function>(Val: Tramp.getArgOperand(i: `1`)->stripPointerCasts());
4356	FunctionType *NestFTy = NestF->getFunctionType();
4357
4358	AttributeList NestAttrs = NestF->getAttributes();
4359	if (!NestAttrs.isEmpty()) {
4360	unsigned NestArgNo = `0`;
4361	Type NestTy = nullptr*;
4362	AttributeSet NestAttr;
4363
4364	// Look for a parameter marked with the 'nest' attribute.
4365	for (FunctionType::param_iterator I = NestFTy->param_begin(),
4366	E = NestFTy->param_end();
4367	I != E; ++NestArgNo, ++I) {
4368	AttributeSet AS = NestAttrs.getParamAttrs(ArgNo: NestArgNo);
4369	if (AS.hasAttribute(Kind: Attribute::Nest)) {
4370	// Record the parameter type and any other attributes.
4371	NestTy = *I;
4372	NestAttr = AS;
4373	break;
4374	}
4375	}
4376
4377	if (NestTy) {
4378	std::vector<Value*> NewArgs;
4379	std::vector<AttributeSet> NewArgAttrs;
4380	NewArgs.reserve(n: Call.arg_size() + `1`);
4381	NewArgAttrs.reserve(n: Call.arg_size());
4382
4383	// Insert the nest argument into the call argument list, which may
4384	// mean appending it. Likewise for attributes.
4385
4386	{
4387	unsigned ArgNo = `0`;
4388	auto I = Call.arg_begin(), E = Call.arg_end();
4389	do {
4390	if (ArgNo == NestArgNo) {
4391	// Add the chain argument and attributes.
4392	Value *NestVal = Tramp.getArgOperand(i: `2`);
4393	if (NestVal->getType() != NestTy)
4394	NestVal = Builder.CreateBitCast(V: NestVal, DestTy: NestTy, Name: "nest");
4395	NewArgs.push_back(x: NestVal);
4396	NewArgAttrs.push_back(x: NestAttr);
4397	}
4398
4399	if (I == E)
4400	break;
4401
4402	// Add the original argument and attributes.
4403	NewArgs.push_back(x: *I);
4404	NewArgAttrs.push_back(x: Attrs.getParamAttrs(ArgNo));
4405
4406	++ArgNo;
4407	++I;
4408	} while (true);
4409	}
4410
4411	// The trampoline may have been bitcast to a bogus type (FTy).
4412	// Handle this by synthesizing a new function type, equal to FTy
4413	// with the chain parameter inserted.
4414
4415	std::vector<Type*> NewTypes;
4416	NewTypes.reserve(n: FTy->getNumParams()+`1`);
4417
4418	// Insert the chain's type into the list of parameter types, which may
4419	// mean appending it.
4420	{
4421	unsigned ArgNo = `0`;
4422	FunctionType::param_iterator I = FTy->param_begin(),
4423	E = FTy->param_end();
4424
4425	do {
4426	if (ArgNo == NestArgNo)
4427	// Add the chain's type.
4428	NewTypes.push_back(x: NestTy);
4429
4430	if (I == E)
4431	break;
4432
4433	// Add the original type.
4434	NewTypes.push_back(x: *I);
4435
4436	++ArgNo;
4437	++I;
4438	} while (true);
4439	}
4440
4441	// Replace the trampoline call with a direct call. Let the generic
4442	// code sort out any function type mismatches.
4443	FunctionType *NewFTy =
4444	FunctionType::get(Result: FTy->getReturnType(), Params: NewTypes, isVarArg: FTy->isVarArg());
4445	AttributeList NewPAL =
4446	AttributeList::get(C&: FTy->getContext(), FnAttrs: Attrs.getFnAttrs(),
4447	RetAttrs: Attrs.getRetAttrs(), ArgAttrs: NewArgAttrs);
4448
4449	SmallVector<OperandBundleDef, `1`> OpBundles;
4450	Call.getOperandBundlesAsDefs(Defs&: OpBundles);
4451
4452	Instruction *NewCaller;
4453	if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &Call)) {
4454	NewCaller = InvokeInst::Create(Ty: NewFTy, Func: NestF, IfNormal: II->getNormalDest(),
4455	IfException: II->getUnwindDest(), Args: NewArgs, Bundles: OpBundles);
4456	cast<InvokeInst>(Val: NewCaller)->setCallingConv(II->getCallingConv());
4457	cast<InvokeInst>(Val: NewCaller)->setAttributes(NewPAL);
4458	} else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Val: &Call)) {
4459	NewCaller =
4460	CallBrInst::Create(Ty: NewFTy, Func: NestF, DefaultDest: CBI->getDefaultDest(),
4461	IndirectDests: CBI->getIndirectDests(), Args: NewArgs, Bundles: OpBundles);
4462	cast<CallBrInst>(Val: NewCaller)->setCallingConv(CBI->getCallingConv());
4463	cast<CallBrInst>(Val: NewCaller)->setAttributes(NewPAL);
4464	} else {
4465	NewCaller = CallInst::Create(Ty: NewFTy, Func: NestF, Args: NewArgs, Bundles: OpBundles);
4466	cast<CallInst>(Val: NewCaller)->setTailCallKind(
4467	cast<CallInst>(Val&: Call).getTailCallKind());
4468	cast<CallInst>(Val: NewCaller)->setCallingConv(
4469	cast<CallInst>(Val&: Call).getCallingConv());
4470	cast<CallInst>(Val: NewCaller)->setAttributes(NewPAL);
4471	}
4472	NewCaller->setDebugLoc(Call.getDebugLoc());
4473
4474	return NewCaller;
4475	}
4476	}
4477
4478	// Replace the trampoline call with a direct call. Since there is no 'nest'
4479	// parameter, there is no need to adjust the argument list. Let the generic
4480	// code sort out any function type mismatches.
4481	Call.setCalledFunction(FTy, Fn: NestF);
4482	return &Call;
4483	}
4484

Browse the source code of llvm_projects/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp