InstCombineCalls.cpp source code [llvm_projects/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp]

1	//===- InstCombineCalls.cpp -----------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the visitCall, visitInvoke, and visitCallBr functions.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "InstCombineInternal.h"
14	#include "llvm/ADT/APFloat.h"
15	#include "llvm/ADT/APInt.h"
16	#include "llvm/ADT/APSInt.h"
17	#include "llvm/ADT/ArrayRef.h"
18	#include "llvm/ADT/STLFunctionalExtras.h"
19	#include "llvm/ADT/SmallBitVector.h"
20	#include "llvm/ADT/SmallVector.h"
21	#include "llvm/ADT/Statistic.h"
22	#include "llvm/ADT/StringExtras.h"
23	#include "llvm/Analysis/AliasAnalysis.h"
24	#include "llvm/Analysis/AssumeBundleQueries.h"
25	#include "llvm/Analysis/AssumptionCache.h"
26	#include "llvm/Analysis/InstructionSimplify.h"
27	#include "llvm/Analysis/Loads.h"
28	#include "llvm/Analysis/MemoryBuiltins.h"
29	#include "llvm/Analysis/ValueTracking.h"
30	#include "llvm/Analysis/VectorUtils.h"
31	#include "llvm/IR/AttributeMask.h"
32	#include "llvm/IR/Attributes.h"
33	#include "llvm/IR/BasicBlock.h"
34	#include "llvm/IR/Constant.h"
35	#include "llvm/IR/Constants.h"
36	#include "llvm/IR/DataLayout.h"
37	#include "llvm/IR/DebugInfo.h"
38	#include "llvm/IR/DerivedTypes.h"
39	#include "llvm/IR/Function.h"
40	#include "llvm/IR/GlobalVariable.h"
41	#include "llvm/IR/InlineAsm.h"
42	#include "llvm/IR/InstrTypes.h"
43	#include "llvm/IR/Instruction.h"
44	#include "llvm/IR/Instructions.h"
45	#include "llvm/IR/IntrinsicInst.h"
46	#include "llvm/IR/Intrinsics.h"
47	#include "llvm/IR/IntrinsicsAArch64.h"
48	#include "llvm/IR/IntrinsicsAMDGPU.h"
49	#include "llvm/IR/IntrinsicsARM.h"
50	#include "llvm/IR/IntrinsicsHexagon.h"
51	#include "llvm/IR/LLVMContext.h"
52	#include "llvm/IR/Metadata.h"
53	#include "llvm/IR/PatternMatch.h"
54	#include "llvm/IR/ProfDataUtils.h"
55	#include "llvm/IR/Statepoint.h"
56	#include "llvm/IR/Type.h"
57	#include "llvm/IR/User.h"
58	#include "llvm/IR/Value.h"
59	#include "llvm/IR/ValueHandle.h"
60	#include "llvm/Support/AtomicOrdering.h"
61	#include "llvm/Support/Casting.h"
62	#include "llvm/Support/CommandLine.h"
63	#include "llvm/Support/Compiler.h"
64	#include "llvm/Support/Debug.h"
65	#include "llvm/Support/ErrorHandling.h"
66	#include "llvm/Support/KnownBits.h"
67	#include "llvm/Support/KnownFPClass.h"
68	#include "llvm/Support/MathExtras.h"
69	#include "llvm/Support/TypeSize.h"
70	#include "llvm/Support/raw_ostream.h"
71	#include "llvm/Transforms/InstCombine/InstCombiner.h"
72	#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
73	#include "llvm/Transforms/Utils/Local.h"
74	#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
75	#include <algorithm>
76	#include <cassert>
77	#include <cstdint>
78	#include <optional>
79	#include <utility>
80	#include <vector>
81
82	#define DEBUG_TYPE "instcombine"
83	#include "llvm/Transforms/Utils/InstructionWorklist.h"
84
85	using namespace llvm;
86	using namespace PatternMatch;
87
88	STATISTIC(NumSimplified, "Number of library calls simplified");
89
90	static cl::opt<unsigned> GuardWideningWindow(
91	"instcombine-guard-widening-window",
92	cl::init(Val: `3`),
93	cl::desc ("How wide an instruction window to bypass looking for "
94	"another guard"));
95
96	/// Return the specified type promoted as it would be to pass though a va_arg
97	/// area.
98	static Type getPromotedType(Type Ty) {
99	if (IntegerType* ITy = dyn_cast<IntegerType>(Val: Ty)) {
100	if (ITy->getBitWidth() < `32`)
101	return Type::getInt32Ty(C&: Ty->getContext());
102	}
103	return Ty;
104	}
105
106	/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
107	/// TODO: This should probably be integrated with visitAllocSites, but that
108	/// requires a deeper change to allow either unread or unwritten objects.
109	static bool hasUndefSource(AnyMemTransferInst *MI) {
110	auto *Src = MI->getRawSource();
111	while (isa<GetElementPtrInst>(Val: Src)) {
112	if (!Src->hasOneUse())
113	return false;
114	Src = cast<Instruction>(Val: Src)->getOperand(i: `0`);
115	}
116	return isa<AllocaInst>(Val: Src) && Src->hasOneUse();
117	}
118
119	Instruction InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst MI) {
120	Align DstAlign = getKnownAlignment(V: MI->getRawDest(), DL, CxtI: MI, AC: &AC, DT: &DT);
121	MaybeAlign CopyDstAlign = MI->getDestAlign();
122	if (!CopyDstAlign \|\| *CopyDstAlign < DstAlign) {
123	MI->setDestAlignment(DstAlign);
124	return MI;
125	}
126
127	Align SrcAlign = getKnownAlignment(V: MI->getRawSource(), DL, CxtI: MI, AC: &AC, DT: &DT);
128	MaybeAlign CopySrcAlign = MI->getSourceAlign();
129	if (!CopySrcAlign \|\| *CopySrcAlign < SrcAlign) {
130	MI->setSourceAlignment(SrcAlign);
131	return MI;
132	}
133
134	// If we have a store to a location which is known constant, we can conclude
135	// that the store must be storing the constant value (else the memory
136	// wouldn't be constant), and this must be a noop.
137	if (!isModSet(MRI: AA->getModRefInfoMask(P: MI->getDest()))) {
138	// Set the size of the copy to 0, it will be deleted on the next iteration.
139	MI->setLength((uint64_t)`0`);
140	return MI;
141	}
142
143	// If the source is provably undef, the memcpy/memmove doesn't do anything
144	// (unless the transfer is volatile).
145	if (hasUndefSource(MI) && !MI->isVolatile()) {
146	// Set the size of the copy to 0, it will be deleted on the next iteration.
147	MI->setLength((uint64_t)`0`);
148	return MI;
149	}
150
151	// If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
152	// load/store.
153	ConstantInt *MemOpLength = dyn_cast<ConstantInt>(Val: MI->getLength());
154	if (!MemOpLength) return nullptr;
155
156	// Source and destination pointer types are always "i8" for intrinsic. See*
157	// if the size is something we can handle with a single primitive load/store.
158	// A single load+store correctly handles overlapping memory in the memmove
159	// case.
160	uint64_t Size = MemOpLength->getLimitedValue();
161	assert(Size && "0-sized memory transferring should be removed already.");
162
163	if (Size > `8` \|\| (Size&(Size-`1`)))
164	return nullptr; // If not 1/2/4/8 bytes, exit.
165
166	// If it is an atomic and alignment is less than the size then we will
167	// introduce the unaligned memory access which will be later transformed
168	// into libcall in CodeGen. This is not evident performance gain so disable
169	// it now.
170	if (MI->isAtomic())
171	if (CopyDstAlign < Size \|\| CopySrcAlign < Size)
172	return nullptr;
173
174	// Use an integer load+store unless we can find something better.
175	IntegerType* IntType = IntegerType::get(C&: MI->getContext(), NumBits: Size<<`3`);
176
177	// If the memcpy has metadata describing the members, see if we can get the
178	// TBAA, scope and noalias tags describing our copy.
179	AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(AccessSize: Size);
180
181	Value *Src = MI->getArgOperand(i: `1`);
182	Value *Dest = MI->getArgOperand(i: `0`);
183	LoadInst *L = Builder.CreateLoad(Ty: IntType, Ptr: Src);
184	// Alignment from the mem intrinsic will be better, so use it.
185	L->setAlignment(*CopySrcAlign);
186	L->setAAMetadata(AACopyMD);
187	MDNode *LoopMemParallelMD =
188	MI->getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access);
189	if (LoopMemParallelMD)
190	L->setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access, Node: LoopMemParallelMD);
191	MDNode *AccessGroupMD = MI->getMetadata(KindID: LLVMContext::MD_access_group);
192	if (AccessGroupMD)
193	L->setMetadata(KindID: LLVMContext::MD_access_group, Node: AccessGroupMD);
194
195	StoreInst *S = Builder.CreateStore(Val: L, Ptr: Dest);
196	// Alignment from the mem intrinsic will be better, so use it.
197	S->setAlignment(*CopyDstAlign);
198	S->setAAMetadata(AACopyMD);
199	if (LoopMemParallelMD)
200	S->setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access, Node: LoopMemParallelMD);
201	if (AccessGroupMD)
202	S->setMetadata(KindID: LLVMContext::MD_access_group, Node: AccessGroupMD);
203	S->copyMetadata(SrcInst: *MI, WL: LLVMContext::MD_DIAssignID);
204
205	if (auto *MT = dyn_cast<MemTransferInst>(Val: MI)) {
206	// non-atomics can be volatile
207	L->setVolatile(MT->isVolatile());
208	S->setVolatile(MT->isVolatile());
209	}
210	if (MI->isAtomic()) {
211	// atomics have to be unordered
212	L->setOrdering(AtomicOrdering::Unordered);
213	S->setOrdering(AtomicOrdering::Unordered);
214	}
215
216	// Set the size of the copy to 0, it will be deleted on the next iteration.
217	MI->setLength((uint64_t)`0`);
218	return MI;
219	}
220
221	Instruction InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst MI) {
222	const Align KnownAlignment =
223	getKnownAlignment(V: MI->getDest(), DL, CxtI: MI, AC: &AC, DT: &DT);
224	MaybeAlign MemSetAlign = MI->getDestAlign();
225	if (!MemSetAlign \|\| *MemSetAlign < KnownAlignment) {
226	MI->setDestAlignment(KnownAlignment);
227	return MI;
228	}
229
230	// If we have a store to a location which is known constant, we can conclude
231	// that the store must be storing the constant value (else the memory
232	// wouldn't be constant), and this must be a noop.
233	if (!isModSet(MRI: AA->getModRefInfoMask(P: MI->getDest()))) {
234	// Set the size of the copy to 0, it will be deleted on the next iteration.
235	MI->setLength((uint64_t)`0`);
236	return MI;
237	}
238
239	// Remove memset with an undef value.
240	// FIXME: This is technically incorrect because it might overwrite a poison
241	// value. Change to PoisonValue once #52930 is resolved.
242	if (isa<UndefValue>(Val: MI->getValue())) {
243	// Set the size of the copy to 0, it will be deleted on the next iteration.
244	MI->setLength((uint64_t)`0`);
245	return MI;
246	}
247
248	// Extract the length and alignment and fill if they are constant.
249	ConstantInt *LenC = dyn_cast<ConstantInt>(Val: MI->getLength());
250	ConstantInt *FillC = dyn_cast<ConstantInt>(Val: MI->getValue());
251	if (!LenC \|\| !FillC \|\| !FillC->getType()->isIntegerTy(Bitwidth: `8`))
252	return nullptr;
253	const uint64_t Len = LenC->getLimitedValue();
254	assert(Len && "0-sized memory setting should be removed already.");
255	const Align Alignment = MI->getDestAlign().valueOrOne();
256
257	// If it is an atomic and alignment is less than the size then we will
258	// introduce the unaligned memory access which will be later transformed
259	// into libcall in CodeGen. This is not evident performance gain so disable
260	// it now.
261	if (MI->isAtomic() && Alignment < Len)
262	return nullptr;
263
264	// memset(s,c,n) -> store s, c (for n=1,2,4,8)
265	if (Len <= `8` && isPowerOf2_32(Value: (uint32_t)Len)) {
266	Value *Dest = MI->getDest();
267
268	// Extract the fill value and store.
269	Constant *FillVal = ConstantInt::get(
270	Context&: MI->getContext(), V: APInt::getSplat(NewLen: Len * `8`, V: FillC->getValue()));
271	StoreInst *S = Builder.CreateStore(Val: FillVal, Ptr: Dest, isVolatile: MI->isVolatile());
272	S->copyMetadata(SrcInst: *MI, WL: LLVMContext::MD_DIAssignID);
273	for (DbgVariableRecord *DbgAssign : at::getDVRAssignmentMarkers(Inst: S)) {
274	if (llvm::is_contained(Range: DbgAssign->location_ops(), Element: FillC))
275	DbgAssign->replaceVariableLocationOp(OldValue: FillC, NewValue: FillVal);
276	}
277
278	S->setAlignment(Alignment);
279	if (MI->isAtomic())
280	S->setOrdering(AtomicOrdering::Unordered);
281
282	// Set the size of the copy to 0, it will be deleted on the next iteration.
283	MI->setLength((uint64_t)`0`);
284	return MI;
285	}
286
287	return nullptr;
288	}
289
290	// TODO, Obvious Missing Transforms:
291	// Narrow width by halfs excluding zero/undef lanes*
292	Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
293	Value *LoadPtr = II.getArgOperand(i: `0`);
294	const Align Alignment = II.getParamAlign(ArgNo: `0`).valueOrOne();
295
296	// If the mask is all ones or undefs, this is a plain vector load of the 1st
297	// argument.
298	if (maskIsAllOneOrUndef(Mask: II.getArgOperand(i: `1`))) {
299	LoadInst *L = Builder.CreateAlignedLoad(Ty: II.getType(), Ptr: LoadPtr, Align: Alignment,
300	Name: "unmaskedload");
301	L->copyMetadata(SrcInst: II);
302	return L;
303	}
304
305	// If we can unconditionally load from this address, replace with a
306	// load/select idiom. TODO: use DT for context sensitive query
307	if (isDereferenceablePointer(V: LoadPtr, Ty: II.getType(),
308	DL: II.getDataLayout(), CtxI: &II, AC: &AC)) {
309	LoadInst *LI = Builder.CreateAlignedLoad(Ty: II.getType(), Ptr: LoadPtr, Align: Alignment,
310	Name: "unmaskedload");
311	LI->copyMetadata(SrcInst: II);
312	return Builder.CreateSelect(C: II.getArgOperand(i: `1`), True: LI, False: II.getArgOperand(i: `2`));
313	}
314
315	return nullptr;
316	}
317
318	// TODO, Obvious Missing Transforms:
319	// Single constant active lane -> store*
320	// Narrow width by halfs excluding zero/undef lanes*
321	Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
322	Value *StorePtr = II.getArgOperand(i: `1`);
323	Align Alignment = II.getParamAlign(ArgNo: `1`).valueOrOne();
324	auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: `2`));
325	if (!ConstMask)
326	return nullptr;
327
328	// If the mask is all zeros, this instruction does nothing.
329	if (maskIsAllZeroOrUndef(Mask: ConstMask))
330	return eraseInstFromFunction(I&: II);
331
332	// If the mask is all ones, this is a plain vector store of the 1st argument.
333	if (maskIsAllOneOrUndef(Mask: ConstMask)) {
334	StoreInst *S =
335	new StoreInst (II.getArgOperand(i: `0`), StorePtr, false, Alignment);
336	S->copyMetadata(SrcInst: II);
337	return S;
338	}
339
340	if (isa<ScalableVectorType>(Val: ConstMask->getType()))
341	return nullptr;
342
343	// Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
344	APInt DemandedElts = possiblyDemandedEltsInMask(Mask: ConstMask);
345	APInt PoisonElts(DemandedElts.getBitWidth(), `0`);
346	if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: `0`), DemandedElts,
347	PoisonElts))
348	return replaceOperand(I&: II, OpNum: `0`, V);
349
350	return nullptr;
351	}
352
353	// TODO, Obvious Missing Transforms:
354	// Single constant active lane load -> load*
355	// Dereferenceable address & few lanes -> scalarize speculative load/selects*
356	// Adjacent vector addresses -> masked.load*
357	// Narrow width by halfs excluding zero/undef lanes*
358	// Vector incrementing address -> vector masked load*
359	Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
360	auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: `1`));
361	if (!ConstMask)
362	return nullptr;
363
364	// Vector splat address w/known mask -> scalar load
365	// Fold the gather to load the source vector first lane
366	// because it is reloading the same value each time
367	if (ConstMask->isAllOnesValue())
368	if (auto *SplatPtr = getSplatValue(V: II.getArgOperand(i: `0`))) {
369	auto *VecTy = cast<VectorType>(Val: II.getType());
370	const Align Alignment = II.getParamAlign(ArgNo: `0`).valueOrOne();
371	LoadInst *L = Builder.CreateAlignedLoad(Ty: VecTy->getElementType(), Ptr: SplatPtr,
372	Align: Alignment, Name: "load.scalar");
373	Value *Shuf =
374	Builder.CreateVectorSplat(EC: VecTy->getElementCount(), V: L, Name: "broadcast");
375	return replaceInstUsesWith(I&: II, V: cast<Instruction>(Val: Shuf));
376	}
377
378	return nullptr;
379	}
380
381	// TODO, Obvious Missing Transforms:
382	// Single constant active lane -> store*
383	// Adjacent vector addresses -> masked.store*
384	// Narrow store width by halfs excluding zero/undef lanes*
385	// Vector incrementing address -> vector masked store*
386	Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
387	auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: `2`));
388	if (!ConstMask)
389	return nullptr;
390
391	// If the mask is all zeros, a scatter does nothing.
392	if (maskIsAllZeroOrUndef(Mask: ConstMask))
393	return eraseInstFromFunction(I&: II);
394
395	// Vector splat address -> scalar store
396	if (auto *SplatPtr = getSplatValue(V: II.getArgOperand(i: `1`))) {
397	// scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
398	if (auto *SplatValue = getSplatValue(V: II.getArgOperand(i: `0`))) {
399	if (maskContainsAllOneOrUndef(Mask: ConstMask)) {
400	Align Alignment = II.getParamAlign(ArgNo: `1`).valueOrOne();
401	StoreInst S = new* StoreInst (SplatValue, SplatPtr, /IsVolatile=/false,
402	Alignment);
403	S->copyMetadata(SrcInst: II);
404	return S;
405	}
406	}
407	// scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
408	// lastlane), ptr
409	if (ConstMask->isAllOnesValue()) {
410	Align Alignment = II.getParamAlign(ArgNo: `1`).valueOrOne();
411	VectorType *WideLoadTy = cast<VectorType>(Val: II.getArgOperand(i: `1`)->getType());
412	ElementCount VF = WideLoadTy->getElementCount();
413	Value *RunTimeVF = Builder.CreateElementCount(Ty: Builder.getInt32Ty(), EC: VF);
414	Value *LastLane = Builder.CreateSub(LHS: RunTimeVF, RHS: Builder.getInt32(C: `1`));
415	Value *Extract =
416	Builder.CreateExtractElement(Vec: II.getArgOperand(i: `0`), Idx: LastLane);
417	StoreInst *S =
418	new StoreInst (Extract, SplatPtr, /IsVolatile=/false, Alignment);
419	S->copyMetadata(SrcInst: II);
420	return S;
421	}
422	}
423	if (isa<ScalableVectorType>(Val: ConstMask->getType()))
424	return nullptr;
425
426	// Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
427	APInt DemandedElts = possiblyDemandedEltsInMask(Mask: ConstMask);
428	APInt PoisonElts(DemandedElts.getBitWidth(), `0`);
429	if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: `0`), DemandedElts,
430	PoisonElts))
431	return replaceOperand(I&: II, OpNum: `0`, V);
432	if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: `1`), DemandedElts,
433	PoisonElts))
434	return replaceOperand(I&: II, OpNum: `1`, V);
435
436	return nullptr;
437	}
438
439	/// This function transforms launder.invariant.group and strip.invariant.group
440	/// like:
441	/// launder(launder(%x)) -> launder(%x) (the result is not the argument)
442	/// launder(strip(%x)) -> launder(%x)
443	/// strip(strip(%x)) -> strip(%x) (the result is not the argument)
444	/// strip(launder(%x)) -> strip(%x)
445	/// This is legal because it preserves the most recent information about
446	/// the presence or absence of invariant.group.
447	static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II,
448	InstCombinerImpl &IC) {
449	auto *Arg = II.getArgOperand(i: `0`);
450	auto *StrippedArg = Arg->stripPointerCasts();
451	auto *StrippedInvariantGroupsArg = StrippedArg;
452	while (auto *Intr = dyn_cast<IntrinsicInst>(Val: StrippedInvariantGroupsArg)) {
453	if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
454	Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
455	break;
456	StrippedInvariantGroupsArg = Intr->getArgOperand(i: `0`)->stripPointerCasts();
457	}
458	if (StrippedArg == StrippedInvariantGroupsArg)
459	return nullptr; // No launders/strips to remove.
460
461	Value Result = nullptr*;
462
463	if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
464	Result = IC.Builder.CreateLaunderInvariantGroup(Ptr: StrippedInvariantGroupsArg);
465	else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
466	Result = IC.Builder.CreateStripInvariantGroup(Ptr: StrippedInvariantGroupsArg);
467	else
468	llvm_unreachable(
469	"simplifyInvariantGroupIntrinsic only handles launder and strip");
470	if (Result->getType()->getPointerAddressSpace() !=
471	II.getType()->getPointerAddressSpace())
472	Result = IC.Builder.CreateAddrSpaceCast(V: Result, DestTy: II.getType());
473
474	return cast<Instruction>(Val: Result);
475	}
476
477	static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
478	assert((II.getIntrinsicID() == Intrinsic::cttz \|\|
479	II.getIntrinsicID() == Intrinsic::ctlz) &&
480	"Expected cttz or ctlz intrinsic");
481	bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
482	Value *Op0 = II.getArgOperand(i: `0`);
483	Value *Op1 = II.getArgOperand(i: `1`);
484	Value *X;
485	// ctlz(bitreverse(x)) -> cttz(x)
486	// cttz(bitreverse(x)) -> ctlz(x)
487	if (match(V: Op0, P: m_BitReverse(Op0: m_Value(V&: X)))) {
488	Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
489	Function *F =
490	Intrinsic::getOrInsertDeclaration(M: II.getModule(), id: ID, Tys: II.getType());
491	return CallInst::Create(Func: F, Args: {X, II.getArgOperand(i: `1`)});
492	}
493
494	if (II.getType()->isIntOrIntVectorTy(BitWidth: `1`)) {
495	// ctlz/cttz i1 Op0 --> not Op0
496	if (match(V: Op1, P: m_Zero()))
497	return BinaryOperator::CreateNot(Op: Op0);
498	// If zero is poison, then the input can be assumed to be "true", so the
499	// instruction simplifies to "false".
500	assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
501	return IC.replaceInstUsesWith(I&: II, V: ConstantInt::getNullValue(Ty: II.getType()));
502	}
503
504	// If ctlz/cttz is only used as a shift amount, set is_zero_poison to true.
505	if (II.hasOneUse() && match(V: Op1, P: m_Zero()) &&
506	match(V: II.user_back(), P: m_Shift(L: m_Value(), R: m_Specific(V: &II)))) {
507	II.dropUBImplyingAttrsAndMetadata();
508	return IC.replaceOperand(I&: II, OpNum: `1`, V: IC.Builder.getTrue());
509	}
510
511	Constant *C;
512
513	if (IsTZ) {
514	// cttz(-x) -> cttz(x)
515	if (match(V: Op0, P: m_Neg(V: m_Value(V&: X))))
516	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
517
518	// cttz(-x & x) -> cttz(x)
519	if (match(V: Op0, P: m_c_And(L: m_Neg(V: m_Value(V&: X)), R: m_Deferred(V: X))))
520	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
521
522	// cttz(sext(x)) -> cttz(zext(x))
523	if (match(V: Op0, P: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: X))))) {
524	auto *Zext = IC.Builder.CreateZExt(V: X, DestTy: II.getType());
525	auto *CttzZext =
526	IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: Zext, RHS: Op1);
527	return IC.replaceInstUsesWith(I&: II, V: CttzZext);
528	}
529
530	// Zext doesn't change the number of trailing zeros, so narrow:
531	// cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
532	if (match(V: Op0, P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X)))) && match(V: Op1, P: m_One())) {
533	auto *Cttz = IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: X,
534	RHS: IC.Builder.getTrue());
535	auto *ZextCttz = IC.Builder.CreateZExt(V: Cttz, DestTy: II.getType());
536	return IC.replaceInstUsesWith(I&: II, V: ZextCttz);
537	}
538
539	// cttz(abs(x)) -> cttz(x)
540	// cttz(nabs(x)) -> cttz(x)
541	Value *Y;
542	SelectPatternFlavor SPF = matchSelectPattern(V: Op0, LHS&: X, RHS&: Y).Flavor;
543	if (SPF == SPF_ABS \|\| SPF == SPF_NABS)
544	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
545
546	if (match(V: Op0, P: m_Intrinsic<Intrinsic::abs>(Op0: m_Value(V&: X))))
547	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
548
549	// cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
550	if (match(V: Op0, P: m_Shl(L: m_ImmConstant(C), R: m_Value(V&: X))) &&
551	match(V: Op1, P: m_One())) {
552	Value *ConstCttz =
553	IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: C, RHS: Op1);
554	return BinaryOperator::CreateAdd(V1: ConstCttz, V2: X);
555	}
556
557	// cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)
558	if (match(V: Op0, P: m_Exact(SubPattern: m_LShr(L: m_ImmConstant(C), R: m_Value(V&: X)))) &&
559	match(V: Op1, P: m_One())) {
560	Value *ConstCttz =
561	IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: C, RHS: Op1);
562	return BinaryOperator::CreateSub(V1: ConstCttz, V2: X);
563	}
564
565	// cttz(add(lshr(UINT_MAX, %val), 1)) --> sub(width, %val)
566	if (match(V: Op0, P: m_Add(L: m_LShr(L: m_AllOnes(), R: m_Value(V&: X)), R: m_One()))) {
567	Value *Width =
568	ConstantInt::get(Ty: II.getType(), V: II.getType()->getScalarSizeInBits());
569	return BinaryOperator::CreateSub(V1: Width, V2: X);
570	}
571	} else {
572	// ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
573	if (match(V: Op0, P: m_LShr(L: m_ImmConstant(C), R: m_Value(V&: X))) &&
574	match(V: Op1, P: m_One())) {
575	Value *ConstCtlz =
576	IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::ctlz, LHS: C, RHS: Op1);
577	return BinaryOperator::CreateAdd(V1: ConstCtlz, V2: X);
578	}
579
580	// ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)
581	if (match(V: Op0, P: m_NUWShl(L: m_ImmConstant(C), R: m_Value(V&: X))) &&
582	match(V: Op1, P: m_One())) {
583	Value *ConstCtlz =
584	IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::ctlz, LHS: C, RHS: Op1);
585	return BinaryOperator::CreateSub(V1: ConstCtlz, V2: X);
586	}
587
588	// ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false)
589	if (Op0->hasOneUse() &&
590	match(V: Op0,
591	P: m_c_And(L: m_Not(V: m_Value(V&: X)), R: m_Add(L: m_Deferred(V: X), R: m_AllOnes())))) {
592	Type *Ty = II.getType();
593	unsigned BitWidth = Ty->getScalarSizeInBits();
594	auto *Cttz = IC.Builder.CreateIntrinsic(ID: Intrinsic::cttz, Types: Ty,
595	Args: {X, IC.Builder.getFalse()});
596	auto *Bw = ConstantInt::get(Ty, V: APInt (BitWidth, BitWidth));
597	return IC.replaceInstUsesWith(I&: II, V: IC.Builder.CreateSub(LHS: Bw, RHS: Cttz));
598	}
599	}
600
601	// cttz(Pow2) -> Log2(Pow2)
602	// ctlz(Pow2) -> BitWidth - 1 - Log2(Pow2)
603	if (auto *R = IC.tryGetLog2(Op: Op0, AssumeNonZero: match(V: Op1, P: m_One()))) {
604	if (IsTZ)
605	return IC.replaceInstUsesWith(I&: II, V: R);
606	BinaryOperator *BO = BinaryOperator::CreateSub(
607	V1: ConstantInt::get(Ty: R->getType(), V: R->getType()->getScalarSizeInBits() - `1`),
608	V2: R);
609	BO->setHasNoSignedWrap();
610	BO->setHasNoUnsignedWrap();
611	return BO;
612	}
613
614	KnownBits Known = IC.computeKnownBits(V: Op0, CxtI: &II);
615
616	// Create a mask for bits above (ctlz) or below (cttz) the first known one.
617	unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
618	: Known.countMaxLeadingZeros();
619	unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
620	: Known.countMinLeadingZeros();
621
622	// If all bits above (ctlz) or below (cttz) the first known one are known
623	// zero, this value is constant.
624	// FIXME: This should be in InstSimplify because we're replacing an
625	// instruction with a constant.
626	if (PossibleZeros == DefiniteZeros) {
627	auto *C = ConstantInt::get(Ty: Op0->getType(), V: DefiniteZeros);
628	return IC.replaceInstUsesWith(I&: II, V: C);
629	}
630
631	// If the input to cttz/ctlz is known to be non-zero,
632	// then change the 'ZeroIsPoison' parameter to 'true'
633	// because we know the zero behavior can't affect the result.
634	if (!Known.One.isZero() \|\|
635	isKnownNonZero(V: Op0, Q: IC.getSimplifyQuery().getWithInstruction(I: &II))) {
636	if (!match(V: II.getArgOperand(i: `1`), P: m_One()))
637	return IC.replaceOperand(I&: II, OpNum: `1`, V: IC.Builder.getTrue());
638	}
639
640	// Add range attribute since known bits can't completely reflect what we know.
641	unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
642	if (BitWidth != `1` && !II.hasRetAttr(Kind: Attribute::Range) &&
643	!II.getMetadata(KindID: LLVMContext::MD_range)) {
644	ConstantRange Range(APInt (BitWidth, DefiniteZeros),
645	APInt (BitWidth, PossibleZeros + `1`));
646	II.addRangeRetAttr(CR: Range);
647	return &II;
648	}
649
650	return nullptr;
651	}
652
653	static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
654	assert(II.getIntrinsicID() == Intrinsic::ctpop &&
655	"Expected ctpop intrinsic");
656	Type *Ty = II.getType();
657	unsigned BitWidth = Ty->getScalarSizeInBits();
658	Value *Op0 = II.getArgOperand(i: `0`);
659	Value X, Y;
660
661	// ctpop(bitreverse(x)) -> ctpop(x)
662	// ctpop(bswap(x)) -> ctpop(x)
663	if (match(V: Op0, P: m_BitReverse(Op0: m_Value(V&: X))) \|\| match(V: Op0, P: m_BSwap(Op0: m_Value(V&: X))))
664	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
665
666	// ctpop(rot(x)) -> ctpop(x)
667	if ((match(V: Op0, P: m_FShl(Op0: m_Value(V&: X), Op1: m_Value(V&: Y), Op2: m_Value())) \|\|
668	match(V: Op0, P: m_FShr(Op0: m_Value(V&: X), Op1: m_Value(V&: Y), Op2: m_Value()))) &&
669	X == Y)
670	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
671
672	// ctpop(x \| -x) -> bitwidth - cttz(x, false)
673	if (Op0->hasOneUse() &&
674	match(V: Op0, P: m_c_Or(L: m_Value(V&: X), R: m_Neg(V: m_Deferred(V: X))))) {
675	auto *Cttz = IC.Builder.CreateIntrinsic(ID: Intrinsic::cttz, Types: Ty,
676	Args: {X, IC.Builder.getFalse()});
677	auto *Bw = ConstantInt::get(Ty, V: APInt (BitWidth, BitWidth));
678	return IC.replaceInstUsesWith(I&: II, V: IC.Builder.CreateSub(LHS: Bw, RHS: Cttz));
679	}
680
681	// ctpop(~x & (x - 1)) -> cttz(x, false)
682	if (match(V: Op0,
683	P: m_c_And(L: m_Not(V: m_Value(V&: X)), R: m_Add(L: m_Deferred(V: X), R: m_AllOnes())))) {
684	Function *F =
685	Intrinsic::getOrInsertDeclaration(M: II.getModule(), id: Intrinsic::cttz, Tys: Ty);
686	return CallInst::Create(Func: F, Args: {X, IC.Builder.getFalse()});
687	}
688
689	// Zext doesn't change the number of set bits, so narrow:
690	// ctpop (zext X) --> zext (ctpop X)
691	if (match(V: Op0, P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X))))) {
692	Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(ID: Intrinsic::ctpop, V: X);
693	return CastInst::Create(Instruction::ZExt, S: NarrowPop, Ty);
694	}
695
696	KnownBits Known(BitWidth);
697	IC.computeKnownBits(V: Op0, Known, CxtI: &II);
698
699	// If all bits are zero except for exactly one fixed bit, then the result
700	// must be 0 or 1, and we can get that answer by shifting to LSB:
701	// ctpop (X & 32) --> (X & 32) >> 5
702	// TODO: Investigate removing this as its likely unnecessary given the below
703	// `isKnownToBeAPowerOfTwo` check.
704	if ((~Known.Zero).isPowerOf2())
705	return BinaryOperator::CreateLShr(
706	V1: Op0, V2: ConstantInt::get(Ty, V: (~Known.Zero).exactLogBase2()));
707
708	// More generally we can also handle non-constant power of 2 patterns such as
709	// shl/shr(Pow2, X), (X & -X), etc... by transforming:
710	// ctpop(Pow2OrZero) --> icmp ne X, 0
711	if (IC.isKnownToBeAPowerOfTwo(V: Op0, / OrZero / true))
712	return CastInst::Create(Instruction::ZExt,
713	S: IC.Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: Op0,
714	RHS: Constant::getNullValue(Ty)),
715	Ty);
716
717	// Add range attribute since known bits can't completely reflect what we know.
718	if (BitWidth != `1`) {
719	ConstantRange OldRange =
720	II.getRange().value_or(u: ConstantRange::getFull(BitWidth));
721
722	unsigned Lower = Known.countMinPopulation();
723	unsigned Upper = Known.countMaxPopulation() + `1`;
724
725	if (Lower == `0` && OldRange.contains(Val: APInt::getZero(numBits: BitWidth)) &&
726	isKnownNonZero(V: Op0, Q: IC.getSimplifyQuery().getWithInstruction(I: &II)))
727	Lower = `1`;
728
729	ConstantRange Range(APInt (BitWidth, Lower), APInt (BitWidth, Upper));
730	Range = Range.intersectWith(CR: OldRange, Type: ConstantRange::Unsigned);
731
732	if (Range != OldRange) {
733	II.addRangeRetAttr(CR: Range);
734	return &II;
735	}
736	}
737
738	return nullptr;
739	}
740
741	/// Convert `tbl`/`tbx` intrinsics to shufflevector if the mask is constant, and
742	/// at most two source operands are actually referenced.
743	static Instruction *simplifyNeonTbl(IntrinsicInst &II, InstCombiner &IC,
744	bool IsExtension) {
745	// Bail out if the mask is not a constant.
746	auto *C = dyn_cast<Constant>(Val: II.getArgOperand(i: II.arg_size() - `1`));
747	if (!C)
748	return nullptr;
749
750	auto *RetTy = cast<FixedVectorType>(Val: II.getType());
751	unsigned NumIndexes = RetTy->getNumElements();
752
753	// Only perform this transformation for <8 x i8> and <16 x i8> vector types.
754	if (!RetTy->getElementType()->isIntegerTy(Bitwidth: `8`) \|\|
755	(NumIndexes != `8` && NumIndexes != `16`))
756	return nullptr;
757
758	// For tbx instructions, the first argument is the "fallback" vector, which
759	// has the same length as the mask and return type.
760	unsigned int StartIndex = (unsigned)IsExtension;
761	auto *SourceTy =
762	cast<FixedVectorType>(Val: II.getArgOperand(i: StartIndex)->getType());
763	// Note that the element count of each source vector does not* need to be the*
764	// same as the element count of the return type and mask! All source vectors
765	// must have the same element count as each other, though.
766	unsigned NumElementsPerSource = SourceTy->getNumElements();
767
768	// There are no tbl/tbx intrinsics for which the destination size exceeds the
769	// source size. However, our definitions of the intrinsics, at least in
770	// IntrinsicsAArch64.td, allow for arbitrary destination vector sizes, so it
771	// could* technically happen.*
772	if (NumIndexes > NumElementsPerSource)
773	return nullptr;
774
775	// The tbl/tbx intrinsics take several source operands followed by a mask
776	// operand.
777	unsigned int NumSourceOperands = II.arg_size() - `1` - (unsigned)IsExtension;
778
779	// Map input operands to shuffle indices. This also helpfully deduplicates the
780	// input arguments, in case the same value is passed as an argument multiple
781	// times.
782	SmallDenseMap<Value , unsigned*, `2`> ValueToShuffleSlot;
783	Value *ShuffleOperands[`2`] = {PoisonValue::get(T: SourceTy),
784	PoisonValue::get(T: SourceTy)};
785
786	int Indexes[`16`];
787	for (unsigned I = `0`; I < NumIndexes; ++I) {
788	Constant *COp = C->getAggregateElement(Elt: I);
789
790	if (!COp \|\| (!isa<UndefValue>(Val: COp) && !isa<ConstantInt>(Val: COp)))
791	return nullptr;
792
793	if (isa<UndefValue>(Val: COp)) {
794	Indexes[I] = -`1`;
795	continue;
796	}
797
798	uint64_t Index = cast<ConstantInt>(Val: COp)->getZExtValue();
799	// The index of the input argument that this index references (0 = first
800	// source argument, etc).
801	unsigned SourceOperandIndex = Index / NumElementsPerSource;
802	// The index of the element at that source operand.
803	unsigned SourceOperandElementIndex = Index % NumElementsPerSource;
804
805	Value *SourceOperand;
806	if (SourceOperandIndex >= NumSourceOperands) {
807	// This index is out of bounds. Map it to index into either the fallback
808	// vector (tbx) or vector of zeroes (tbl).
809	SourceOperandIndex = NumSourceOperands;
810	if (IsExtension) {
811	// For out-of-bounds indices in tbx, choose the `I`th element of the
812	// fallback.
813	SourceOperand = II.getArgOperand(i: `0`);
814	SourceOperandElementIndex = I;
815	} else {
816	// Otherwise, choose some element from the dummy vector of zeroes (we'll
817	// always choose the first).
818	SourceOperand = Constant::getNullValue(Ty: SourceTy);
819	SourceOperandElementIndex = `0`;
820	}
821	} else {
822	SourceOperand = II.getArgOperand(i: SourceOperandIndex + StartIndex);
823	}
824
825	// The source operand may be the fallback vector, which may not have the
826	// same number of elements as the source vector. In that case, we could
827	// choose to extend its length with another shufflevector, but it's simpler
828	// to just bail instead.
829	if (cast<FixedVectorType>(Val: SourceOperand->getType())->getNumElements() !=
830	NumElementsPerSource)
831	return nullptr;
832
833	// We now know the source operand referenced by this index. Make it a
834	// shufflevector operand, if it isn't already.
835	unsigned NumSlots = ValueToShuffleSlot.size();
836	// This shuffle references more than two sources, and hence cannot be
837	// represented as a shufflevector.
838	if (NumSlots == `2` && !ValueToShuffleSlot.contains(Val: SourceOperand))
839	return nullptr;
840
841	auto [It, Inserted] =
842	ValueToShuffleSlot.try_emplace(Key: SourceOperand, Args&: NumSlots);
843	if (Inserted)
844	ShuffleOperands[It ->getSecond()] = SourceOperand;
845
846	unsigned RemappedIndex =
847	(It ->getSecond() * NumElementsPerSource) + SourceOperandElementIndex;
848	Indexes[I] = RemappedIndex;
849	}
850
851	Value *Shuf = IC.Builder.CreateShuffleVector(
852	V1: ShuffleOperands[`0`], V2: ShuffleOperands[`1`], Mask: ArrayRef(Indexes, NumIndexes));
853	return IC.replaceInstUsesWith(I&: II, V: Shuf);
854	}
855
856	// Returns true iff the 2 intrinsics have the same operands, limiting the
857	// comparison to the first NumOperands.
858	static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
859	unsigned NumOperands) {
860	assert(I.arg_size() >= NumOperands && "Not enough operands");
861	assert(E.arg_size() >= NumOperands && "Not enough operands");
862	for (unsigned i = `0`; i < NumOperands; i++)
863	if (I.getArgOperand(i) != E.getArgOperand(i))
864	return false;
865	return true;
866	}
867
868	// Remove trivially empty start/end intrinsic ranges, i.e. a start
869	// immediately followed by an end (ignoring debuginfo or other
870	// start/end intrinsics in between). As this handles only the most trivial
871	// cases, tracking the nesting level is not needed:
872	//
873	// call @llvm.foo.start(i1 0)
874	// call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
875	// call @llvm.foo.end(i1 0)
876	// call @llvm.foo.end(i1 0) ; &I
877	static bool
878	removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC,
879	std::function<bool(const IntrinsicInst &)> IsStart) {
880	// We start from the end intrinsic and scan backwards, so that InstCombine
881	// has already processed (and potentially removed) all the instructions
882	// before the end intrinsic.
883	BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
884	for (; BI != BE; ++BI) {
885	if (auto I = dyn_cast<IntrinsicInst>(Val: &BI)) {
886	if (I->isDebugOrPseudoInst() \|\|
887	I->getIntrinsicID() == EndI.getIntrinsicID())
888	continue;
889	if (IsStart (*I)) {
890	if (haveSameOperands(I: EndI, E: *I, NumOperands: EndI.arg_size())) {
891	IC.eraseInstFromFunction(I&: *I);
892	IC.eraseInstFromFunction(I&: EndI);
893	return true;
894	}
895	// Skip start intrinsics that don't pair with this end intrinsic.
896	continue;
897	}
898	}
899	break;
900	}
901
902	return false;
903	}
904
905	Instruction *InstCombinerImpl::visitVAEndInst(VAEndInst &I) {
906	removeTriviallyEmptyRange(EndI&: I, IC&: *this, IsStart: [&I](const IntrinsicInst &II) {
907	// Bail out on the case where the source va_list of a va_copy is destroyed
908	// immediately by a follow-up va_end.
909	return II.getIntrinsicID() == Intrinsic::vastart \|\|
910	(II.getIntrinsicID() == Intrinsic::vacopy &&
911	I.getArgOperand(i: `0`) != II.getArgOperand(i: `1`));
912	});
913	return nullptr;
914	}
915
916	static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) {
917	assert(Call.arg_size() > `1` && "Need at least 2 args to swap");
918	Value Arg0 = Call.getArgOperand(i: `0`), Arg1 = Call.getArgOperand(i: `1`);
919	if (isa<Constant>(Val: Arg0) && !isa<Constant>(Val: Arg1)) {
920	Call.setArgOperand(i: `0`, v: Arg1);
921	Call.setArgOperand(i: `1`, v: Arg0);
922	return &Call;
923	}
924	return nullptr;
925	}
926
927	/// Creates a result tuple for an overflow intrinsic \p II with a given
928	/// \p Result and a constant \p Overflow value.
929	static Instruction createOverflowTuple(IntrinsicInst II, Value *Result,
930	Constant *Overflow) {
931	Constant *V[] = {PoisonValue::get(T: Result->getType()), Overflow};
932	StructType *ST = cast<StructType>(Val: II->getType());
933	Constant *Struct = ConstantStruct::get(T: ST, V);
934	return InsertValueInst::Create(Agg: Struct, Val: Result, Idxs: `0`);
935	}
936
937	Instruction *
938	InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
939	WithOverflowInst *WO = cast<WithOverflowInst>(Val: II);
940	Value OperationResult = nullptr*;
941	Constant OverflowResult = nullptr*;
942	if (OptimizeOverflowCheck(BinaryOp: WO->getBinaryOp(), IsSigned: WO->isSigned(), LHS: WO->getLHS(),
943	RHS: WO->getRHS(), CtxI&: *WO, OperationResult, OverflowResult))
944	return createOverflowTuple(II: WO, Result: OperationResult, Overflow: OverflowResult);
945
946	// See whether we can optimize the overflow check with assumption information.
947	for (User *U : WO->users()) {
948	if (!match(V: U, P: m_ExtractValue<`1`>(V: m_Value())))
949	continue;
950
951	for (auto &AssumeVH : AC.assumptionsFor(V: U)) {
952	if (!AssumeVH)
953	continue;
954	CallInst *I = cast<CallInst>(Val&: AssumeVH);
955	if (!match(V: I->getArgOperand(i: `0`), P: m_Not(V: m_Specific(V: U))))
956	continue;
957	if (!isValidAssumeForContext(I, CxtI: II, /DT=/nullptr,
958	/AllowEphemerals=/true))
959	continue;
960	Value *Result =
961	Builder.CreateBinOp(Opc: WO->getBinaryOp(), LHS: WO->getLHS(), RHS: WO->getRHS());
962	Result->takeName(V: WO);
963	if (auto *Inst = dyn_cast<Instruction>(Val: Result)) {
964	if (WO->isSigned())
965	Inst->setHasNoSignedWrap();
966	else
967	Inst->setHasNoUnsignedWrap();
968	}
969	return createOverflowTuple(II: WO, Result,
970	Overflow: ConstantInt::getFalse(Ty: U->getType()));
971	}
972	}
973
974	return nullptr;
975	}
976
977	static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
978	Ty = Ty->getScalarType();
979	return F.getDenormalMode(FPType: Ty->getFltSemantics()).Input == DenormalMode::IEEE;
980	}
981
982	static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) {
983	Ty = Ty->getScalarType();
984	return F.getDenormalMode(FPType: Ty->getFltSemantics()).inputsAreZero();
985	}
986
987	/// \returns the compare predicate type if the test performed by
988	/// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the
989	/// floating-point environment assumed for \p F for type \p Ty
990	static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask,
991	const Function &F, Type *Ty) {
992	switch (static_cast<unsigned>(Mask)) {
993	case fcZero:
994	if (inputDenormalIsIEEE(F, Ty))
995	return FCmpInst::FCMP_OEQ;
996	break;
997	case fcZero \| fcSubnormal:
998	if (inputDenormalIsDAZ(F, Ty))
999	return FCmpInst::FCMP_OEQ;
1000	break;
1001	case fcPositive \| fcNegZero:
1002	if (inputDenormalIsIEEE(F, Ty))
1003	return FCmpInst::FCMP_OGE;
1004	break;
1005	case fcPositive \| fcNegZero \| fcNegSubnormal:
1006	if (inputDenormalIsDAZ(F, Ty))
1007	return FCmpInst::FCMP_OGE;
1008	break;
1009	case fcPosSubnormal \| fcPosNormal \| fcPosInf:
1010	if (inputDenormalIsIEEE(F, Ty))
1011	return FCmpInst::FCMP_OGT;
1012	break;
1013	case fcNegative \| fcPosZero:
1014	if (inputDenormalIsIEEE(F, Ty))
1015	return FCmpInst::FCMP_OLE;
1016	break;
1017	case fcNegative \| fcPosZero \| fcPosSubnormal:
1018	if (inputDenormalIsDAZ(F, Ty))
1019	return FCmpInst::FCMP_OLE;
1020	break;
1021	case fcNegSubnormal \| fcNegNormal \| fcNegInf:
1022	if (inputDenormalIsIEEE(F, Ty))
1023	return FCmpInst::FCMP_OLT;
1024	break;
1025	case fcPosNormal \| fcPosInf:
1026	if (inputDenormalIsDAZ(F, Ty))
1027	return FCmpInst::FCMP_OGT;
1028	break;
1029	case fcNegNormal \| fcNegInf:
1030	if (inputDenormalIsDAZ(F, Ty))
1031	return FCmpInst::FCMP_OLT;
1032	break;
1033	case ~fcZero & ~fcNan:
1034	if (inputDenormalIsIEEE(F, Ty))
1035	return FCmpInst::FCMP_ONE;
1036	break;
1037	case ~(fcZero \| fcSubnormal) & ~fcNan:
1038	if (inputDenormalIsDAZ(F, Ty))
1039	return FCmpInst::FCMP_ONE;
1040	break;
1041	default:
1042	break;
1043	}
1044
1045	return FCmpInst::BAD_FCMP_PREDICATE;
1046	}
1047
1048	Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
1049	Value *Src0 = II.getArgOperand(i: `0`);
1050	Value *Src1 = II.getArgOperand(i: `1`);
1051	const ConstantInt *CMask = cast<ConstantInt>(Val: Src1);
1052	FPClassTest Mask = static_cast<FPClassTest>(CMask->getZExtValue());
1053	const bool IsUnordered = (Mask & fcNan) == fcNan;
1054	const bool IsOrdered = (Mask & fcNan) == fcNone;
1055	const FPClassTest OrderedMask = Mask & ~fcNan;
1056	const FPClassTest OrderedInvertedMask = ~OrderedMask & ~fcNan;
1057
1058	const bool IsStrict =
1059	II.getFunction()->getAttributes().hasFnAttr(Kind: Attribute::StrictFP);
1060
1061	Value *FNegSrc;
1062	if (match(V: Src0, P: m_FNeg(X: m_Value(V&: FNegSrc)))) {
1063	// is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask)
1064
1065	II.setArgOperand(i: `1`, v: ConstantInt::get(Ty: Src1->getType(), V: fneg(Mask)));
1066	return replaceOperand(I&: II, OpNum: `0`, V: FNegSrc);
1067	}
1068
1069	Value *FAbsSrc;
1070	if (match(V: Src0, P: m_FAbs(Op0: m_Value(V&: FAbsSrc)))) {
1071	II.setArgOperand(i: `1`, v: ConstantInt::get(Ty: Src1->getType(), V: inverse_fabs(Mask)));
1072	return replaceOperand(I&: II, OpNum: `0`, V: FAbsSrc);
1073	}
1074
1075	if ((OrderedMask == fcInf \|\| OrderedInvertedMask == fcInf) &&
1076	(IsOrdered \|\| IsUnordered) && !IsStrict) {
1077	// is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf
1078	// is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf
1079	// is.fpclass(x, fcInf\|fcNan) -> fcmp ueq fabs(x), +inf
1080	// is.fpclass(x, ~(fcInf\|fcNan)) -> fcmp une fabs(x), +inf
1081	Constant *Inf = ConstantFP::getInfinity(Ty: Src0->getType());
1082	FCmpInst::Predicate Pred =
1083	IsUnordered ? FCmpInst::FCMP_UEQ : FCmpInst::FCMP_OEQ;
1084	if (OrderedInvertedMask == fcInf)
1085	Pred = IsUnordered ? FCmpInst::FCMP_UNE : FCmpInst::FCMP_ONE;
1086
1087	Value *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Src0);
1088	Value *CmpInf = Builder.CreateFCmp(P: Pred, LHS: Fabs, RHS: Inf);
1089	CmpInf->takeName(V: &II);
1090	return replaceInstUsesWith(I&: II, V: CmpInf);
1091	}
1092
1093	if ((OrderedMask == fcPosInf \|\| OrderedMask == fcNegInf) &&
1094	(IsOrdered \|\| IsUnordered) && !IsStrict) {
1095	// is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf
1096	// is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf
1097	// is.fpclass(x, fcPosInf\|fcNan) -> fcmp ueq x, +inf
1098	// is.fpclass(x, fcNegInf\|fcNan) -> fcmp ueq x, -inf
1099	Constant *Inf =
1100	ConstantFP::getInfinity(Ty: Src0->getType(), Negative: OrderedMask == fcNegInf);
1101	Value *EqInf = IsUnordered ? Builder.CreateFCmpUEQ(LHS: Src0, RHS: Inf)
1102	: Builder.CreateFCmpOEQ(LHS: Src0, RHS: Inf);
1103
1104	EqInf->takeName(V: &II);
1105	return replaceInstUsesWith(I&: II, V: EqInf);
1106	}
1107
1108	if ((OrderedInvertedMask == fcPosInf \|\| OrderedInvertedMask == fcNegInf) &&
1109	(IsOrdered \|\| IsUnordered) && !IsStrict) {
1110	// is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf
1111	// is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf
1112	// is.fpclass(x, ~fcPosInf\|fcNan) -> fcmp une x, +inf
1113	// is.fpclass(x, ~fcNegInf\|fcNan) -> fcmp une x, -inf
1114	Constant *Inf = ConstantFP::getInfinity(Ty: Src0->getType(),
1115	Negative: OrderedInvertedMask == fcNegInf);
1116	Value *NeInf = IsUnordered ? Builder.CreateFCmpUNE(LHS: Src0, RHS: Inf)
1117	: Builder.CreateFCmpONE(LHS: Src0, RHS: Inf);
1118	NeInf->takeName(V: &II);
1119	return replaceInstUsesWith(I&: II, V: NeInf);
1120	}
1121
1122	if (Mask == fcNan && !IsStrict) {
1123	// Equivalent of isnan. Replace with standard fcmp if we don't care about FP
1124	// exceptions.
1125	Value *IsNan =
1126	Builder.CreateFCmpUNO(LHS: Src0, RHS: ConstantFP::getZero(Ty: Src0->getType()));
1127	IsNan->takeName(V: &II);
1128	return replaceInstUsesWith(I&: II, V: IsNan);
1129	}
1130
1131	if (Mask == (~fcNan & fcAllFlags) && !IsStrict) {
1132	// Equivalent of !isnan. Replace with standard fcmp.
1133	Value *FCmp =
1134	Builder.CreateFCmpORD(LHS: Src0, RHS: ConstantFP::getZero(Ty: Src0->getType()));
1135	FCmp->takeName(V: &II);
1136	return replaceInstUsesWith(I&: II, V: FCmp);
1137	}
1138
1139	FCmpInst::Predicate PredType = FCmpInst::BAD_FCMP_PREDICATE;
1140
1141	// Try to replace with an fcmp with 0
1142	//
1143	// is.fpclass(x, fcZero) -> fcmp oeq x, 0.0
1144	// is.fpclass(x, fcZero \| fcNan) -> fcmp ueq x, 0.0
1145	// is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0
1146	// is.fpclass(x, ~fcZero) -> fcmp une x, 0.0
1147	//
1148	// is.fpclass(x, fcPosSubnormal \| fcPosNormal \| fcPosInf) -> fcmp ogt x, 0.0
1149	// is.fpclass(x, fcPositive \| fcNegZero) -> fcmp oge x, 0.0
1150	//
1151	// is.fpclass(x, fcNegSubnormal \| fcNegNormal \| fcNegInf) -> fcmp olt x, 0.0
1152	// is.fpclass(x, fcNegative \| fcPosZero) -> fcmp ole x, 0.0
1153	//
1154	if (!IsStrict && (IsOrdered \|\| IsUnordered) &&
1155	(PredType = fpclassTestIsFCmp0(Mask: OrderedMask, F: *II.getFunction(),
1156	Ty: Src0->getType())) !=
1157	FCmpInst::BAD_FCMP_PREDICATE) {
1158	Constant *Zero = ConstantFP::getZero(Ty: Src0->getType());
1159	// Equivalent of == 0.
1160	Value *FCmp = Builder.CreateFCmp(
1161	P: IsUnordered ? FCmpInst::getUnorderedPredicate(Pred: PredType) : PredType,
1162	LHS: Src0, RHS: Zero);
1163
1164	FCmp->takeName(V: &II);
1165	return replaceInstUsesWith(I&: II, V: FCmp);
1166	}
1167
1168	KnownFPClass Known =
1169	computeKnownFPClass(V: Src0, InterestedClasses: Mask, SQ: SQ.getWithInstruction(I: &II));
1170
1171	// Clear test bits we know must be false from the source value.
1172	// fp_class (nnan x), qnan\|snan\|other -> fp_class (nnan x), other
1173	// fp_class (ninf x), ninf\|pinf\|other -> fp_class (ninf x), other
1174	if ((Mask & Known.KnownFPClasses) != Mask) {
1175	II.setArgOperand(
1176	i: `1`, v: ConstantInt::get(Ty: Src1->getType(), V: Mask & Known.KnownFPClasses));
1177	return &II;
1178	}
1179
1180	// If none of the tests which can return false are possible, fold to true.
1181	// fp_class (nnan x), ~(qnan\|snan) -> true
1182	// fp_class (ninf x), ~(ninf\|pinf) -> true
1183	if (Mask == Known.KnownFPClasses)
1184	return replaceInstUsesWith(I&: II, V: ConstantInt::get(Ty: II.getType(), V: true));
1185
1186	return nullptr;
1187	}
1188
1189	static std::optional<bool> getKnownSign(Value Op, const* SimplifyQuery &SQ) {
1190	KnownBits Known = computeKnownBits(V: Op, Q: SQ);
1191	if (Known.isNonNegative())
1192	return false;
1193	if (Known.isNegative())
1194	return true;
1195
1196	Value X, Y;
1197	if (match(V: Op, P: m_NSWSub(L: m_Value(V&: X), R: m_Value(V&: Y))))
1198	return isImpliedByDomCondition(Pred: ICmpInst::ICMP_SLT, LHS: X, RHS: Y, ContextI: SQ.CxtI, DL: SQ.DL);
1199
1200	return std::nullopt;
1201	}
1202
1203	static std::optional<bool> getKnownSignOrZero(Value *Op,
1204	const SimplifyQuery &SQ) {
1205	if (std::optional<bool> Sign = getKnownSign(Op, SQ))
1206	return Sign;
1207
1208	Value X, Y;
1209	if (match(V: Op, P: m_NSWSub(L: m_Value(V&: X), R: m_Value(V&: Y))))
1210	return isImpliedByDomCondition(Pred: ICmpInst::ICMP_SLE, LHS: X, RHS: Y, ContextI: SQ.CxtI, DL: SQ.DL);
1211
1212	return std::nullopt;
1213	}
1214
1215	/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
1216	static bool signBitMustBeTheSame(Value Op0, Value Op1,
1217	const SimplifyQuery &SQ) {
1218	std::optional<bool> Known1 = getKnownSign(Op: Op1, SQ);
1219	if (!Known1)
1220	return false;
1221	std::optional<bool> Known0 = getKnownSign(Op: Op0, SQ);
1222	if (!Known0)
1223	return false;
1224	return Known0 == Known1;
1225	}
1226
1227	/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
1228	/// can trigger other combines.
1229	static Instruction moveAddAfterMinMax(IntrinsicInst II,
1230	InstCombiner::BuilderTy &Builder) {
1231	Intrinsic::ID MinMaxID = II->getIntrinsicID();
1232	assert((MinMaxID == Intrinsic::smax \|\| MinMaxID == Intrinsic::smin \|\|
1233	MinMaxID == Intrinsic::umax \|\| MinMaxID == Intrinsic::umin) &&
1234	"Expected a min or max intrinsic");
1235
1236	// TODO: Match vectors with undef elements, but undef may not propagate.
1237	Value Op0 = II->getArgOperand(i: `0`), Op1 = II->getArgOperand(i: `1`);
1238	Value *X;
1239	const APInt C0, C1;
1240	if (!match(V: Op0, P: m_OneUse(SubPattern: m_Add(L: m_Value(V&: X), R: m_APInt(Res&: C0)))) \|\|
1241	!match(V: Op1, P: m_APInt(Res&: C1)))
1242	return nullptr;
1243
1244	// Check for necessary no-wrap and overflow constraints.
1245	bool IsSigned = MinMaxID == Intrinsic::smax \|\| MinMaxID == Intrinsic::smin;
1246	auto *Add = cast<BinaryOperator>(Val: Op0);
1247	if ((IsSigned && !Add->hasNoSignedWrap()) \|\|
1248	(!IsSigned && !Add->hasNoUnsignedWrap()))
1249	return nullptr;
1250
1251	// If the constant difference overflows, then instsimplify should reduce the
1252	// min/max to the add or C1.
1253	bool Overflow;
1254	APInt CDiff =
1255	IsSigned ? C1->ssub_ov(RHS: C0, Overflow) : C1->usub_ov(RHS: C0, Overflow);
1256	assert(!Overflow && "Expected simplify of min/max");
1257
1258	// min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
1259	// Note: the "mismatched" no-overflow setting does not propagate.
1260	Constant *NewMinMaxC = ConstantInt::get(Ty: II->getType(), V: CDiff);
1261	Value *NewMinMax = Builder.CreateBinaryIntrinsic(ID: MinMaxID, LHS: X, RHS: NewMinMaxC);
1262	return IsSigned ? BinaryOperator::CreateNSWAdd(V1: NewMinMax, V2: Add->getOperand(i_nocapture: `1`))
1263	: BinaryOperator::CreateNUWAdd(V1: NewMinMax, V2: Add->getOperand(i_nocapture: `1`));
1264	}
1265	/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
1266	Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
1267	Type *Ty = MinMax1.getType();
1268
1269	// We are looking for a tree of:
1270	// max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
1271	// Where the min and max could be reversed
1272	Instruction *MinMax2;
1273	BinaryOperator *AddSub;
1274	const APInt MinValue, MaxValue;
1275	if (match(V: &MinMax1, P: m_SMin(L: m_Instruction(I&: MinMax2), R: m_APInt(Res&: MaxValue)))) {
1276	if (!match(V: MinMax2, P: m_SMax(L: m_BinOp(I&: AddSub), R: m_APInt(Res&: MinValue))))
1277	return nullptr;
1278	} else if (match(V: &MinMax1,
1279	P: m_SMax(L: m_Instruction(I&: MinMax2), R: m_APInt(Res&: MinValue)))) {
1280	if (!match(V: MinMax2, P: m_SMin(L: m_BinOp(I&: AddSub), R: m_APInt(Res&: MaxValue))))
1281	return nullptr;
1282	} else
1283	return nullptr;
1284
1285	// Check that the constants clamp a saturate, and that the new type would be
1286	// sensible to convert to.
1287	if (!(MaxValue + `1`).isPowerOf2() \|\| -MinValue != *MaxValue + `1`)
1288	return nullptr;
1289	// In what bitwidth can this be treated as saturating arithmetics?
1290	unsigned NewBitWidth = (*MaxValue + `1`).logBase2() + `1`;
1291	// FIXME: This isn't quite right for vectors, but using the scalar type is a
1292	// good first approximation for what should be done there.
1293	if (!shouldChangeType(FromBitWidth: Ty->getScalarType()->getIntegerBitWidth(), ToBitWidth: NewBitWidth))
1294	return nullptr;
1295
1296	// Also make sure that the inner min/max and the add/sub have one use.
1297	if (!MinMax2->hasOneUse() \|\| !AddSub->hasOneUse())
1298	return nullptr;
1299
1300	// Create the new type (which can be a vector type)
1301	Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
1302
1303	Intrinsic::ID IntrinsicID;
1304	if (AddSub->getOpcode() == Instruction::Add)
1305	IntrinsicID = Intrinsic::sadd_sat;
1306	else if (AddSub->getOpcode() == Instruction::Sub)
1307	IntrinsicID = Intrinsic::ssub_sat;
1308	else
1309	return nullptr;
1310
1311	// The two operands of the add/sub must be nsw-truncatable to the NewTy. This
1312	// is usually achieved via a sext from a smaller type.
1313	if (ComputeMaxSignificantBits(Op: AddSub->getOperand(i_nocapture: `0`), CxtI: AddSub) > NewBitWidth \|\|
1314	ComputeMaxSignificantBits(Op: AddSub->getOperand(i_nocapture: `1`), CxtI: AddSub) > NewBitWidth)
1315	return nullptr;
1316
1317	// Finally create and return the sat intrinsic, truncated to the new type
1318	Value *AT = Builder.CreateTrunc(V: AddSub->getOperand(i_nocapture: `0`), DestTy: NewTy);
1319	Value *BT = Builder.CreateTrunc(V: AddSub->getOperand(i_nocapture: `1`), DestTy: NewTy);
1320	Value *Sat = Builder.CreateIntrinsic(ID: IntrinsicID, Types: NewTy, Args: {AT, BT});
1321	return CastInst::Create(Instruction::SExt, S: Sat, Ty);
1322	}
1323
1324
1325	/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
1326	/// can only be one of two possible constant values -- turn that into a select
1327	/// of constants.
1328	static Instruction foldClampRangeOfTwo(IntrinsicInst II,
1329	InstCombiner::BuilderTy &Builder) {
1330	Value I0 = II->getArgOperand(i: `0`), I1 = II->getArgOperand(i: `1`);
1331	Value *X;
1332	const APInt C0, C1;
1333	if (!match(V: I1, P: m_APInt(Res&: C1)) \|\| !I0->hasOneUse())
1334	return nullptr;
1335
1336	CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
1337	switch (II->getIntrinsicID()) {
1338	case Intrinsic::smax:
1339	if (match(V: I0, P: m_SMin(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && C0 == C1 + `1`)
1340	Pred = ICmpInst::ICMP_SGT;
1341	break;
1342	case Intrinsic::smin:
1343	if (match(V: I0, P: m_SMax(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && C1 == C0 + `1`)
1344	Pred = ICmpInst::ICMP_SLT;
1345	break;
1346	case Intrinsic::umax:
1347	if (match(V: I0, P: m_UMin(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && C0 == C1 + `1`)
1348	Pred = ICmpInst::ICMP_UGT;
1349	break;
1350	case Intrinsic::umin:
1351	if (match(V: I0, P: m_UMax(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && C1 == C0 + `1`)
1352	Pred = ICmpInst::ICMP_ULT;
1353	break;
1354	default:
1355	llvm_unreachable("Expected min/max intrinsic");
1356	}
1357	if (Pred == CmpInst::BAD_ICMP_PREDICATE)
1358	return nullptr;
1359
1360	// max (min X, 42), 41 --> X > 41 ? 42 : 41
1361	// min (max X, 42), 43 --> X < 43 ? 42 : 43
1362	Value *Cmp = Builder.CreateICmp(P: Pred, LHS: X, RHS: I1);
1363	return SelectInst::Create(C: Cmp, S1: ConstantInt::get(Ty: II->getType(), V: *C0), S2: I1);
1364	}
1365
1366	/// If this min/max has a constant operand and an operand that is a matching
1367	/// min/max with a constant operand, constant-fold the 2 constant operands.
1368	static Value reassociateMinMaxWithConstants(IntrinsicInst II,
1369	IRBuilderBase &Builder,
1370	const SimplifyQuery &SQ) {
1371	Intrinsic::ID MinMaxID = II->getIntrinsicID();
1372	auto *LHS = dyn_cast<MinMaxIntrinsic>(Val: II->getArgOperand(i: `0`));
1373	if (!LHS)
1374	return nullptr;
1375
1376	Constant C0, C1;
1377	if (!match(V: LHS->getArgOperand(i: `1`), P: m_ImmConstant(C&: C0)) \|\|
1378	!match(V: II->getArgOperand(i: `1`), P: m_ImmConstant(C&: C1)))
1379	return nullptr;
1380
1381	// max (max X, C0), C1 --> max X, (max C0, C1)
1382	// min (min X, C0), C1 --> min X, (min C0, C1)
1383	// umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1)
1384	// smin (umin X, nneg C0), nneg C1 --> umin X, (smin C0, C1)
1385	Intrinsic::ID InnerMinMaxID = LHS->getIntrinsicID();
1386	if (InnerMinMaxID != MinMaxID &&
1387	!(((MinMaxID == Intrinsic::umax && InnerMinMaxID == Intrinsic::smax) \|\|
1388	(MinMaxID == Intrinsic::smin && InnerMinMaxID == Intrinsic::umin)) &&
1389	isKnownNonNegative(V: C0, SQ) && isKnownNonNegative(V: C1, SQ)))
1390	return nullptr;
1391
1392	ICmpInst::Predicate Pred = MinMaxIntrinsic::getPredicate(ID: MinMaxID);
1393	Value *CondC = Builder.CreateICmp(P: Pred, LHS: C0, RHS: C1);
1394	Value *NewC = Builder.CreateSelect(C: CondC, True: C0, False: C1);
1395	return Builder.CreateIntrinsic(ID: InnerMinMaxID, Types: II->getType(),
1396	Args: {LHS->getArgOperand(i: `0`), NewC});
1397	}
1398
1399	/// If this min/max has a matching min/max operand with a constant, try to push
1400	/// the constant operand into this instruction. This can enable more folds.
1401	static Instruction *
1402	reassociateMinMaxWithConstantInOperand(IntrinsicInst *II,
1403	InstCombiner::BuilderTy &Builder) {
1404	// Match and capture a min/max operand candidate.
1405	Value X, Y;
1406	Constant *C;
1407	Instruction *Inner;
1408	if (!match(V: II, P: m_c_MaxOrMin(L: m_OneUse(SubPattern: m_CombineAnd(
1409	L: m_Instruction(I&: Inner),
1410	R: m_MaxOrMin(L: m_Value(V&: X), R: m_ImmConstant(C)))),
1411	R: m_Value(V&: Y))))
1412	return nullptr;
1413
1414	// The inner op must match. Check for constants to avoid infinite loops.
1415	Intrinsic::ID MinMaxID = II->getIntrinsicID();
1416	auto *InnerMM = dyn_cast<IntrinsicInst>(Val: Inner);
1417	if (!InnerMM \|\| InnerMM->getIntrinsicID() != MinMaxID \|\|
1418	match(V: X, P: m_ImmConstant()) \|\| match(V: Y, P: m_ImmConstant()))
1419	return nullptr;
1420
1421	// max (max X, C), Y --> max (max X, Y), C
1422	Function *MinMax = Intrinsic::getOrInsertDeclaration(M: II->getModule(),
1423	id: MinMaxID, Tys: II->getType());
1424	Value *NewInner = Builder.CreateBinaryIntrinsic(ID: MinMaxID, LHS: X, RHS: Y);
1425	NewInner->takeName(V: Inner);
1426	return CallInst::Create(Func: MinMax, Args: {NewInner, C});
1427	}
1428
1429	/// Reduce a sequence of min/max intrinsics with a common operand.
1430	static Instruction factorizeMinMaxTree(IntrinsicInst II) {
1431	// Match 3 of the same min/max ops. Example: umin(umin(), umin()).
1432	auto *LHS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: `0`));
1433	auto *RHS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: `1`));
1434	Intrinsic::ID MinMaxID = II->getIntrinsicID();
1435	if (!LHS \|\| !RHS \|\| LHS->getIntrinsicID() != MinMaxID \|\|
1436	RHS->getIntrinsicID() != MinMaxID \|\|
1437	(!LHS->hasOneUse() && !RHS->hasOneUse()))
1438	return nullptr;
1439
1440	Value *A = LHS->getArgOperand(i: `0`);
1441	Value *B = LHS->getArgOperand(i: `1`);
1442	Value *C = RHS->getArgOperand(i: `0`);
1443	Value *D = RHS->getArgOperand(i: `1`);
1444
1445	// Look for a common operand.
1446	Value MinMaxOp = nullptr*;
1447	Value ThirdOp = nullptr*;
1448	if (LHS->hasOneUse()) {
1449	// If the LHS is only used in this chain and the RHS is used outside of it,
1450	// reuse the RHS min/max because that will eliminate the LHS.
1451	if (D == A \|\| C == A) {
1452	// min(min(a, b), min(c, a)) --> min(min(c, a), b)
1453	// min(min(a, b), min(a, d)) --> min(min(a, d), b)
1454	MinMaxOp = RHS;
1455	ThirdOp = B;
1456	} else if (D == B \|\| C == B) {
1457	// min(min(a, b), min(c, b)) --> min(min(c, b), a)
1458	// min(min(a, b), min(b, d)) --> min(min(b, d), a)
1459	MinMaxOp = RHS;
1460	ThirdOp = A;
1461	}
1462	} else {
1463	assert(RHS->hasOneUse() && "Expected one-use operand");
1464	// Reuse the LHS. This will eliminate the RHS.
1465	if (D == A \|\| D == B) {
1466	// min(min(a, b), min(c, a)) --> min(min(a, b), c)
1467	// min(min(a, b), min(c, b)) --> min(min(a, b), c)
1468	MinMaxOp = LHS;
1469	ThirdOp = C;
1470	} else if (C == A \|\| C == B) {
1471	// min(min(a, b), min(b, d)) --> min(min(a, b), d)
1472	// min(min(a, b), min(c, b)) --> min(min(a, b), d)
1473	MinMaxOp = LHS;
1474	ThirdOp = D;
1475	}
1476	}
1477
1478	if (!MinMaxOp \|\| !ThirdOp)
1479	return nullptr;
1480
1481	Module *Mod = II->getModule();
1482	Function *MinMax =
1483	Intrinsic::getOrInsertDeclaration(M: Mod, id: MinMaxID, Tys: II->getType());
1484	return CallInst::Create(Func: MinMax, Args: { MinMaxOp, ThirdOp });
1485	}
1486
1487	/// If all arguments of the intrinsic are unary shuffles with the same mask,
1488	/// try to shuffle after the intrinsic.
1489	Instruction *
1490	InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) {
1491	if (!II->getType()->isVectorTy() \|\|
1492	!isTriviallyVectorizable(ID: II->getIntrinsicID()) \|\|
1493	!II->getCalledFunction()->isSpeculatable())
1494	return nullptr;
1495
1496	Value *X;
1497	Constant *C;
1498	ArrayRef<int> Mask;
1499	auto *NonConstArg = find_if_not(Range: II->args(), P: [&II](Use &Arg) {
1500	return isa<Constant>(Val: Arg.get()) \|\|
1501	isVectorIntrinsicWithScalarOpAtArg(ID: II->getIntrinsicID(),
1502	ScalarOpdIdx: Arg.getOperandNo(), TTI: nullptr);
1503	});
1504	if (!NonConstArg \|\|
1505	!match(V: NonConstArg, P: m_Shuffle(v1: m_Value(V&: X), v2: m_Poison(), mask: m_Mask (Mask))))
1506	return nullptr;
1507
1508	// At least 1 operand must be a shuffle with 1 use because we are creating 2
1509	// instructions.
1510	if (none_of(Range: II->args(), P: match_fn(P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(), v2: m_Value())))))
1511	return nullptr;
1512
1513	// See if all arguments are shuffled with the same mask.
1514	SmallVector<Value *, `4`> NewArgs;
1515	Type *SrcTy = X->getType();
1516	for (Use &Arg : II->args()) {
1517	if (isVectorIntrinsicWithScalarOpAtArg(ID: II->getIntrinsicID(),
1518	ScalarOpdIdx: Arg.getOperandNo(), TTI: nullptr))
1519	NewArgs.push_back(Elt: Arg);
1520	else if (match(V: &Arg,
1521	P: m_Shuffle(v1: m_Value(V&: X), v2: m_Poison(), mask: m_SpecificMask (Mask))) &&
1522	X->getType() == SrcTy)
1523	NewArgs.push_back(Elt: X);
1524	else if (match(V: &Arg, P: m_ImmConstant(C))) {
1525	// If it's a constant, try find the constant that would be shuffled to C.
1526	if (Constant *ShuffledC =
1527	unshuffleConstant(ShMask: Mask, C, NewCTy: cast<VectorType>(Val: SrcTy)))
1528	NewArgs.push_back(Elt: ShuffledC);
1529	else
1530	return nullptr;
1531	} else
1532	return nullptr;
1533	}
1534
1535	// intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
1536	Instruction FPI = isa<FPMathOperator>(Val: II) ? II : nullptr*;
1537	// Result type might be a different vector width.
1538	// TODO: Check that the result type isn't widened?
1539	VectorType *ResTy =
1540	VectorType::get(ElementType: II->getType()->getScalarType(), Other: cast<VectorType>(Val: SrcTy));
1541	Value *NewIntrinsic =
1542	Builder.CreateIntrinsic(RetTy: ResTy, ID: II->getIntrinsicID(), Args: NewArgs, FMFSource: FPI);
1543	return new ShuffleVectorInst (NewIntrinsic, Mask);
1544	}
1545
1546	/// If all arguments of the intrinsic are reverses, try to pull the reverse
1547	/// after the intrinsic.
1548	Value InstCombinerImpl::foldReversedIntrinsicOperands(IntrinsicInst II) {
1549	if (!II->getType()->isVectorTy() \|\|
1550	!isTriviallyVectorizable(ID: II->getIntrinsicID()))
1551	return nullptr;
1552
1553	// At least 1 operand must be a reverse with 1 use because we are creating 2
1554	// instructions.
1555	if (none_of(Range: II->args(), P: [](Value *V) {
1556	return match(V, P: m_OneUse(SubPattern: m_VecReverse(Op0: m_Value())));
1557	}))
1558	return nullptr;
1559
1560	Value *X;
1561	Constant *C;
1562	SmallVector<Value *> NewArgs;
1563	for (Use &Arg : II->args()) {
1564	if (isVectorIntrinsicWithScalarOpAtArg(ID: II->getIntrinsicID(),
1565	ScalarOpdIdx: Arg.getOperandNo(), TTI: nullptr))
1566	NewArgs.push_back(Elt: Arg);
1567	else if (match(V: &Arg, P: m_VecReverse(Op0: m_Value(V&: X))))
1568	NewArgs.push_back(Elt: X);
1569	else if (isSplatValue(V: Arg))
1570	NewArgs.push_back(Elt: Arg);
1571	else if (match(V: &Arg, P: m_ImmConstant(C)))
1572	NewArgs.push_back(Elt: Builder.CreateVectorReverse(V: C));
1573	else
1574	return nullptr;
1575	}
1576
1577	// intrinsic (reverse X), (reverse Y), ... --> reverse (intrinsic X, Y, ...)
1578	Instruction FPI = isa<FPMathOperator>(Val: II) ? II : nullptr*;
1579	Instruction *NewIntrinsic = Builder.CreateIntrinsic(
1580	RetTy: II->getType(), ID: II->getIntrinsicID(), Args: NewArgs, FMFSource: FPI);
1581	return Builder.CreateVectorReverse(V: NewIntrinsic);
1582	}
1583
1584	/// Fold the following cases and accepts bswap and bitreverse intrinsics:
1585	/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
1586	/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
1587	template <Intrinsic::ID IntrID>
1588	static Instruction foldBitOrderCrossLogicOp(Value V,
1589	InstCombiner::BuilderTy &Builder) {
1590	static_assert(IntrID == Intrinsic::bswap \|\| IntrID == Intrinsic::bitreverse,
1591	"This helper only supports BSWAP and BITREVERSE intrinsics");
1592
1593	Value X, Y;
1594	// Find bitwise logic op. Check that it is a BinaryOperator explicitly so we
1595	// don't match ConstantExpr that aren't meaningful for this transform.
1596	if (match(V, P: m_OneUse(SubPattern: m_BitwiseLogic(L: m_Value(V&: X), R: m_Value(V&: Y)))) &&
1597	isa<BinaryOperator>(Val: V)) {
1598	Value OldReorderX, OldReorderY;
1599	BinaryOperator::BinaryOps Op = cast<BinaryOperator>(Val: V)->getOpcode();
1600
1601	// If both X and Y are bswap/bitreverse, the transform reduces the number
1602	// of instructions even if there's multiuse.
1603	// If only one operand is bswap/bitreverse, we need to ensure the operand
1604	// have only one use.
1605	if (match(X, m_Intrinsic<IntrID>(m_Value(V&: OldReorderX))) &&
1606	match(Y, m_Intrinsic<IntrID>(m_Value(V&: OldReorderY)))) {
1607	return BinaryOperator::Create(Op, S1: OldReorderX, S2: OldReorderY);
1608	}
1609
1610	if (match(X, m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: OldReorderX))))) {
1611	Value *NewReorder = Builder.CreateUnaryIntrinsic(ID: IntrID, V: Y);
1612	return BinaryOperator::Create(Op, S1: OldReorderX, S2: NewReorder);
1613	}
1614
1615	if (match(Y, m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: OldReorderY))))) {
1616	Value *NewReorder = Builder.CreateUnaryIntrinsic(ID: IntrID, V: X);
1617	return BinaryOperator::Create(Op, S1: NewReorder, S2: OldReorderY);
1618	}
1619	}
1620	return nullptr;
1621	}
1622
1623	/// Helper to match idempotent binary intrinsics, namely, intrinsics where
1624	/// `f(f(x, y), y) == f(x, y)` holds.
1625	static bool isIdempotentBinaryIntrinsic(Intrinsic::ID IID) {
1626	switch (IID) {
1627	case Intrinsic::smax:
1628	case Intrinsic::smin:
1629	case Intrinsic::umax:
1630	case Intrinsic::umin:
1631	case Intrinsic::maximum:
1632	case Intrinsic::minimum:
1633	case Intrinsic::maximumnum:
1634	case Intrinsic::minimumnum:
1635	case Intrinsic::maxnum:
1636	case Intrinsic::minnum:
1637	return true;
1638	default:
1639	return false;
1640	}
1641	}
1642
1643	/// Attempt to simplify value-accumulating recurrences of kind:
1644	/// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
1645	/// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
1646	/// And let the idempotent binary intrinsic be hoisted, when the operands are
1647	/// known to be loop-invariant.
1648	static Value *foldIdempotentBinaryIntrinsicRecurrence(InstCombinerImpl &IC,
1649	IntrinsicInst *II) {
1650	PHINode *PN;
1651	Value Init, OtherOp;
1652
1653	// A binary intrinsic recurrence with loop-invariant operands is equivalent to
1654	// `call @llvm.binary.intrinsic(Init, OtherOp)`.
1655	auto IID = II->getIntrinsicID();
1656	if (!isIdempotentBinaryIntrinsic(IID) \|\|
1657	!matchSimpleBinaryIntrinsicRecurrence(I: II, P&: PN, Init, OtherOp) \|\|
1658	!IC.getDominatorTree().dominates(Def: OtherOp, User: PN))
1659	return nullptr;
1660
1661	auto *InvariantBinaryInst =
1662	IC.Builder.CreateBinaryIntrinsic(ID: IID, LHS: Init, RHS: OtherOp);
1663	if (isa<FPMathOperator>(Val: InvariantBinaryInst))
1664	cast<Instruction>(Val: InvariantBinaryInst)->copyFastMathFlags(I: II);
1665	return InvariantBinaryInst;
1666	}
1667
1668	static Value simplifyReductionOperand(Value Arg, bool CanReorderLanes) {
1669	if (!CanReorderLanes)
1670	return nullptr;
1671
1672	Value *V;
1673	if (match(V: Arg, P: m_VecReverse(Op0: m_Value(V))))
1674	return V;
1675
1676	ArrayRef<int> Mask;
1677	if (!isa<FixedVectorType>(Val: Arg->getType()) \|\|
1678	!match(V: Arg, P: m_Shuffle(v1: m_Value(V), v2: m_Undef(), mask: m_Mask (Mask))) \|\|
1679	!cast<ShuffleVectorInst>(Val: Arg)->isSingleSource())
1680	return nullptr;
1681
1682	int Sz = Mask.size();
1683	SmallBitVector UsedIndices(Sz);
1684	for (int Idx : Mask) {
1685	if (Idx == PoisonMaskElem \|\| UsedIndices.test(Idx))
1686	return nullptr;
1687	UsedIndices.set(Idx);
1688	}
1689
1690	// Can remove shuffle iff just shuffled elements, no repeats, undefs, or
1691	// other changes.
1692	return UsedIndices.all() ? V : nullptr;
1693	}
1694
1695	/// Fold an unsigned minimum of trailing or leading zero bits counts:
1696	/// umin(cttz(CtOp1, ZeroUndef), ConstOp) --> cttz(CtOp1 \| (1 << ConstOp))
1697	/// umin(ctlz(CtOp1, ZeroUndef), ConstOp) --> ctlz(CtOp1 \| (SignedMin
1698	/// >> ConstOp))
1699	/// umin(cttz(CtOp1), cttz(CtOp2)) --> cttz(CtOp1 \| CtOp2)
1700	/// umin(ctlz(CtOp1), ctlz(CtOp2)) --> ctlz(CtOp1 \| CtOp2)
1701	template <Intrinsic::ID IntrID>
1702	static Value *
1703	foldMinimumOverTrailingOrLeadingZeroCount(Value I0, Value I1,
1704	const DataLayout &DL,
1705	InstCombiner::BuilderTy &Builder) {
1706	static_assert(IntrID == Intrinsic::cttz \|\| IntrID == Intrinsic::ctlz,
1707	"This helper only supports cttz and ctlz intrinsics");
1708
1709	Value CtOp1, CtOp2;
1710	Value ZeroUndef1, ZeroUndef2;
1711	if (!match(I0, m_OneUse(
1712	m_Intrinsic<IntrID>(m_Value(V&: CtOp1), m_Value(V&: ZeroUndef1)))))
1713	return nullptr;
1714
1715	if (match(I1,
1716	m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: CtOp2), m_Value(V&: ZeroUndef2)))))
1717	return Builder.CreateBinaryIntrinsic(
1718	ID: IntrID, LHS: Builder.CreateOr(LHS: CtOp1, RHS: CtOp2),
1719	RHS: Builder.CreateOr(LHS: ZeroUndef1, RHS: ZeroUndef2));
1720
1721	unsigned BitWidth = I1->getType()->getScalarSizeInBits();
1722	auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
1723	if (!match(I1, m_CheckedInt(LessBitWidth)))
1724	// We have a constant >= BitWidth (which can be handled by CVP)
1725	// or a non-splat vector with elements < and >= BitWidth
1726	return nullptr;
1727
1728	Type *Ty = I1->getType();
1729	Constant *NewConst = ConstantFoldBinaryOpOperands(
1730	Opcode: IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr,
1731	LHS: IntrID == Intrinsic::cttz
1732	? ConstantInt::get(Ty, V: `1`)
1733	: ConstantInt::get(Ty, V: APInt::getSignedMinValue(numBits: BitWidth)),
1734	RHS: cast<Constant>(Val: I1), DL);
1735	return Builder.CreateBinaryIntrinsic(
1736	ID: IntrID, LHS: Builder.CreateOr(LHS: CtOp1, RHS: NewConst),
1737	RHS: ConstantInt::getTrue(Ty: ZeroUndef1->getType()));
1738	}
1739
1740	/// Return whether "X LOp (Y ROp Z)" is always equal to
1741	/// "(X LOp Y) ROp (X LOp Z)".
1742	static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW,
1743	bool HasNSW, Intrinsic::ID ROp) {
1744	switch (ROp) {
1745	case Intrinsic::umax:
1746	case Intrinsic::umin:
1747	if (HasNUW && LOp == Instruction::Add)
1748	return true;
1749	if (HasNUW && LOp == Instruction::Shl)
1750	return true;
1751	return false;
1752	case Intrinsic::smax:
1753	case Intrinsic::smin:
1754	return HasNSW && LOp == Instruction::Add;
1755	default:
1756	return false;
1757	}
1758	}
1759
1760	/// Return whether "(X ROp Y) LOp Z" is always equal to
1761	/// "(X LOp Z) ROp (Y LOp Z)".
1762	static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, bool HasNUW,
1763	bool HasNSW, Intrinsic::ID ROp) {
1764	if (Instruction::isCommutative(Opcode: LOp) \|\| LOp == Instruction::Shl)
1765	return leftDistributesOverRight(LOp, HasNUW, HasNSW, ROp);
1766	switch (ROp) {
1767	case Intrinsic::umax:
1768	case Intrinsic::umin:
1769	return HasNUW && LOp == Instruction::Sub;
1770	case Intrinsic::smax:
1771	case Intrinsic::smin:
1772	return HasNSW && LOp == Instruction::Sub;
1773	default:
1774	return false;
1775	}
1776	}
1777
1778	// Attempts to factorise a common term
1779	// in an instruction that has the form "(A op' B) op (C op' D)
1780	// where op is an intrinsic and op' is a binop
1781	static Value *
1782	foldIntrinsicUsingDistributiveLaws(IntrinsicInst *II,
1783	InstCombiner::BuilderTy &Builder) {
1784	Value LHS = II->getOperand(i_nocapture: `0`), RHS = II->getOperand(i_nocapture: `1`);
1785	Intrinsic::ID TopLevelOpcode = II->getIntrinsicID();
1786
1787	OverflowingBinaryOperator *Op0 = dyn_cast<OverflowingBinaryOperator>(Val: LHS);
1788	OverflowingBinaryOperator *Op1 = dyn_cast<OverflowingBinaryOperator>(Val: RHS);
1789
1790	if (!Op0 \|\| !Op1)
1791	return nullptr;
1792
1793	if (Op0->getOpcode() != Op1->getOpcode())
1794	return nullptr;
1795
1796	if (!Op0->hasOneUse() \|\| !Op1->hasOneUse())
1797	return nullptr;
1798
1799	Instruction::BinaryOps InnerOpcode =
1800	static_cast<Instruction::BinaryOps>(Op0->getOpcode());
1801	bool HasNUW = Op0->hasNoUnsignedWrap() && Op1->hasNoUnsignedWrap();
1802	bool HasNSW = Op0->hasNoSignedWrap() && Op1->hasNoSignedWrap();
1803
1804	Value *A = Op0->getOperand(i_nocapture: `0`);
1805	Value *B = Op0->getOperand(i_nocapture: `1`);
1806	Value *C = Op1->getOperand(i_nocapture: `0`);
1807	Value *D = Op1->getOperand(i_nocapture: `1`);
1808
1809	// Attempts to swap variables such that A equals C or B equals D,
1810	// if the inner operation is commutative.
1811	if (Op0->isCommutative() && A != C && B != D) {
1812	if (A == D \|\| B == C)
1813	std::swap(a&: C, b&: D);
1814	else
1815	return nullptr;
1816	}
1817
1818	BinaryOperator *NewBinop;
1819	if (A == C &&
1820	leftDistributesOverRight(LOp: InnerOpcode, HasNUW, HasNSW, ROp: TopLevelOpcode)) {
1821	Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(ID: TopLevelOpcode, LHS: B, RHS: D);
1822	NewBinop =
1823	cast<BinaryOperator>(Val: Builder.CreateBinOp(Opc: InnerOpcode, LHS: A, RHS: NewIntrinsic));
1824	} else if (B == D && rightDistributesOverLeft(LOp: InnerOpcode, HasNUW, HasNSW,
1825	ROp: TopLevelOpcode)) {
1826	Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(ID: TopLevelOpcode, LHS: A, RHS: C);
1827	NewBinop =
1828	cast<BinaryOperator>(Val: Builder.CreateBinOp(Opc: InnerOpcode, LHS: NewIntrinsic, RHS: B));
1829	} else {
1830	return nullptr;
1831	}
1832
1833	NewBinop->setHasNoUnsignedWrap(HasNUW);
1834	NewBinop->setHasNoSignedWrap(HasNSW);
1835
1836	return NewBinop;
1837	}
1838
1839	static Instruction foldNeonShift(IntrinsicInst II, InstCombinerImpl &IC) {
1840	Value *Arg0 = II->getArgOperand(i: `0`);
1841	auto *ShiftConst = dyn_cast<Constant>(Val: II->getArgOperand(i: `1`));
1842	if (!ShiftConst)
1843	return nullptr;
1844
1845	int ElemBits = Arg0->getType()->getScalarSizeInBits();
1846	bool AllPositive = true;
1847	bool AllNegative = true;
1848
1849	auto Check = [&](Constant C) -> bool* {
1850	if (auto *CI = dyn_cast_or_null<ConstantInt>(Val: C)) {
1851	const APInt &V = CI->getValue();
1852	if (V.isNonNegative()) {
1853	AllNegative = false;
1854	return AllPositive && V.ult(RHS: ElemBits);
1855	}
1856	AllPositive = false;
1857	return AllNegative && V.sgt(RHS: -ElemBits);
1858	}
1859	return false;
1860	};
1861
1862	if (auto *VTy = dyn_cast<FixedVectorType>(Val: Arg0->getType())) {
1863	for (unsigned I = `0`, E = VTy->getNumElements(); I < E; ++I) {
1864	if (!Check (ShiftConst->getAggregateElement(Elt: I)))
1865	return nullptr;
1866	}
1867
1868	} else if (!Check (ShiftConst))
1869	return nullptr;
1870
1871	IRBuilderBase &B = IC.Builder;
1872	if (AllPositive)
1873	return IC.replaceInstUsesWith(I&: *II, V: B.CreateShl(LHS: Arg0, RHS: ShiftConst));
1874
1875	Value *NegAmt = B.CreateNeg(V: ShiftConst);
1876	Intrinsic::ID IID = II->getIntrinsicID();
1877	const bool IsSigned =
1878	IID == Intrinsic::arm_neon_vshifts \|\| IID == Intrinsic::aarch64_neon_sshl;
1879	Value *Result =
1880	IsSigned ? B.CreateAShr(LHS: Arg0, RHS: NegAmt) : B.CreateLShr(LHS: Arg0, RHS: NegAmt);
1881	return IC.replaceInstUsesWith(I&: *II, V: Result);
1882	}
1883
1884	/// CallInst simplification. This mostly only handles folding of intrinsic
1885	/// instructions. For normal calls, it allows visitCallBase to do the heavy
1886	/// lifting.
1887	Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
1888	// Don't try to simplify calls without uses. It will not do anything useful,
1889	// but will result in the following folds being skipped.
1890	if (!CI.use_empty()) {
1891	SmallVector<Value *, `8`> Args(CI.args());
1892	if (Value *V = simplifyCall(Call: &CI, Callee: CI.getCalledOperand(), Args,
1893	Q: SQ.getWithInstruction(I: &CI)))
1894	return replaceInstUsesWith(I&: CI, V);
1895	}
1896
1897	if (Value *FreedOp = getFreedOperand(CB: &CI, TLI: &TLI))
1898	return visitFree(FI&: CI, FreedOp);
1899
1900	// If the caller function (i.e. us, the function that contains this CallInst)
1901	// is nounwind, mark the call as nounwind, even if the callee isn't.
1902	if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1903	CI.setDoesNotThrow();
1904	return &CI;
1905	}
1906
1907	IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: &CI);
1908	if (!II)
1909	return visitCallBase(Call&: CI);
1910
1911	// Intrinsics cannot occur in an invoke or a callbr, so handle them here
1912	// instead of in visitCallBase.
1913	if (auto *MI = dyn_cast<AnyMemIntrinsic>(Val: II)) {
1914	if (auto NumBytes = MI->getLengthInBytes()) {
1915	// memmove/cpy/set of zero bytes is a noop.
1916	if (NumBytes ->isZero())
1917	return eraseInstFromFunction(I&: CI);
1918
1919	// For atomic unordered mem intrinsics if len is not a positive or
1920	// not a multiple of element size then behavior is undefined.
1921	if (MI->isAtomic() &&
1922	(NumBytes ->isNegative() \|\|
1923	(NumBytes ->getZExtValue() % MI->getElementSizeInBytes() != `0`))) {
1924	CreateNonTerminatorUnreachable(InsertAt: MI);
1925	assert(MI->getType()->isVoidTy() &&
1926	"non void atomic unordered mem intrinsic");
1927	return eraseInstFromFunction(I&: *MI);
1928	}
1929	}
1930
1931	// No other transformations apply to volatile transfers.
1932	if (MI->isVolatile())
1933	return nullptr;
1934
1935	if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(Val: MI)) {
1936	// memmove(x,x,size) -> noop.
1937	if (MTI->getSource() == MTI->getDest())
1938	return eraseInstFromFunction(I&: CI);
1939	}
1940
1941	auto IsPointerUndefined = [MI](Value *Ptr) {
1942	return isa<ConstantPointerNull>(Val: Ptr) &&
1943	!NullPointerIsDefined(
1944	F: MI->getFunction(),
1945	AS: cast<PointerType>(Val: Ptr->getType())->getAddressSpace());
1946	};
1947	bool SrcIsUndefined = false;
1948	// If we can determine a pointer alignment that is bigger than currently
1949	// set, update the alignment.
1950	if (auto *MTI = dyn_cast<AnyMemTransferInst>(Val: MI)) {
1951	if (Instruction *I = SimplifyAnyMemTransfer(MI: MTI))
1952	return I;
1953	SrcIsUndefined = IsPointerUndefined (MTI->getRawSource());
1954	} else if (auto *MSI = dyn_cast<AnyMemSetInst>(Val: MI)) {
1955	if (Instruction *I = SimplifyAnyMemSet(MI: MSI))
1956	return I;
1957	}
1958
1959	// If src/dest is null, this memory intrinsic must be a noop.
1960	if (SrcIsUndefined \|\| IsPointerUndefined (MI->getRawDest())) {
1961	Builder.CreateAssumption(Cond: Builder.CreateIsNull(Arg: MI->getLength()));
1962	return eraseInstFromFunction(I&: CI);
1963	}
1964
1965	// If we have a memmove and the source operation is a constant global,
1966	// then the source and dest pointers can't alias, so we can change this
1967	// into a call to memcpy.
1968	if (auto *MMI = dyn_cast<AnyMemMoveInst>(Val: MI)) {
1969	if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(Val: MMI->getSource()))
1970	if (GVSrc->isConstant()) {
1971	Module *M = CI.getModule();
1972	Intrinsic::ID MemCpyID =
1973	MMI->isAtomic()
1974	? Intrinsic::memcpy_element_unordered_atomic
1975	: Intrinsic::memcpy;
1976	Type *Tys[`3`] = { CI.getArgOperand(i: `0`)->getType(),
1977	CI.getArgOperand(i: `1`)->getType(),
1978	CI.getArgOperand(i: `2`)->getType() };
1979	CI.setCalledFunction(
1980	Intrinsic::getOrInsertDeclaration(M, id: MemCpyID, Tys));
1981	return II;
1982	}
1983	}
1984	}
1985
1986	// For fixed width vector result intrinsics, use the generic demanded vector
1987	// support.
1988	if (auto *IIFVTy = dyn_cast<FixedVectorType>(Val: II->getType())) {
1989	auto VWidth = IIFVTy->getNumElements();
1990	APInt PoisonElts(VWidth, `0`);
1991	APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
1992	if (Value *V = SimplifyDemandedVectorElts(V: II, DemandedElts: AllOnesEltMask, PoisonElts)) {
1993	if (V != II)
1994	return replaceInstUsesWith(I&: *II, V);
1995	return II;
1996	}
1997	}
1998
1999	if (II->isCommutative()) {
2000	if (auto Pair = matchSymmetricPair(LHS: II->getOperand(i_nocapture: `0`), RHS: II->getOperand(i_nocapture: `1`))) {
2001	replaceOperand(I&: *II, OpNum: `0`, V: Pair ->first);
2002	replaceOperand(I&: *II, OpNum: `1`, V: Pair ->second);
2003	return II;
2004	}
2005
2006	if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(Call&: CI))
2007	return NewCall;
2008	}
2009
2010	// Unused constrained FP intrinsic calls may have declared side effect, which
2011	// prevents it from being removed. In some cases however the side effect is
2012	// actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
2013	// returns a replacement, the call may be removed.
2014	if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(Val: CI)) {
2015	if (simplifyConstrainedFPCall(Call: &CI, Q: SQ.getWithInstruction(I: &CI)))
2016	return eraseInstFromFunction(I&: CI);
2017	}
2018
2019	Intrinsic::ID IID = II->getIntrinsicID();
2020	switch (IID) {
2021	case Intrinsic::objectsize: {
2022	SmallVector<Instruction *> InsertedInstructions;
2023	if (Value V = lowerObjectSizeCall(ObjectSize: II, DL, TLI: &TLI, AA, /MustSucceed=/*false,
2024	InsertedInstructions: &InsertedInstructions)) {
2025	for (Instruction *Inserted : InsertedInstructions)
2026	Worklist.add(I: Inserted);
2027	return replaceInstUsesWith(I&: CI, V);
2028	}
2029	return nullptr;
2030	}
2031	case Intrinsic::abs: {
2032	Value *IIOperand = II->getArgOperand(i: `0`);
2033	bool IntMinIsPoison = cast<Constant>(Val: II->getArgOperand(i: `1`))->isOneValue();
2034
2035	// abs(-x) -> abs(x)
2036	Value *X;
2037	if (match(V: IIOperand, P: m_Neg(V: m_Value(V&: X)))) {
2038	if (cast<Instruction>(Val: IIOperand)->hasNoSignedWrap() \|\| IntMinIsPoison)
2039	replaceOperand(I&: *II, OpNum: `1`, V: Builder.getTrue());
2040	return replaceOperand(I&: *II, OpNum: `0`, V: X);
2041	}
2042	if (match(V: IIOperand, P: m_c_Select(L: m_Neg(V: m_Value(V&: X)), R: m_Deferred(V: X))))
2043	return replaceOperand(I&: *II, OpNum: `0`, V: X);
2044
2045	Value *Y;
2046	// abs(a abs(b)) -> abs(a * b)*
2047	if (match(V: IIOperand,
2048	P: m_OneUse(SubPattern: m_c_Mul(L: m_Value(V&: X),
2049	R: m_Intrinsic<Intrinsic::abs>(Op0: m_Value(V&: Y)))))) {
2050	bool NSW =
2051	cast<Instruction>(Val: IIOperand)->hasNoSignedWrap() && IntMinIsPoison;
2052	auto *XY = NSW ? Builder.CreateNSWMul(LHS: X, RHS: Y) : Builder.CreateMul(LHS: X, RHS: Y);
2053	return replaceOperand(I&: *II, OpNum: `0`, V: XY);
2054	}
2055
2056	if (std::optional<bool> Known =
2057	getKnownSignOrZero(Op: IIOperand, SQ: SQ.getWithInstruction(I: II))) {
2058	// abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)
2059	// abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)
2060	if (!*Known)
2061	return replaceInstUsesWith(I&: *II, V: IIOperand);
2062
2063	// abs(x) -> -x if x < 0
2064	// abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y)
2065	if (IntMinIsPoison)
2066	return BinaryOperator::CreateNSWNeg(Op: IIOperand);
2067	return BinaryOperator::CreateNeg(Op: IIOperand);
2068	}
2069
2070	// abs (sext X) --> zext (abs X)*
2071	// Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
2072	if (match(V: IIOperand, P: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: X))))) {
2073	Value *NarrowAbs =
2074	Builder.CreateBinaryIntrinsic(ID: Intrinsic::abs, LHS: X, RHS: Builder.getFalse());
2075	return CastInst::Create(Instruction::ZExt, S: NarrowAbs, Ty: II->getType());
2076	}
2077
2078	// Match a complicated way to check if a number is odd/even:
2079	// abs (srem X, 2) --> and X, 1
2080	const APInt *C;
2081	if (match(V: IIOperand, P: m_SRem(L: m_Value(V&: X), R: m_APInt(Res&: C))) && *C == `2`)
2082	return BinaryOperator::CreateAnd(V1: X, V2: ConstantInt::get(Ty: II->getType(), V: `1`));
2083
2084	break;
2085	}
2086	case Intrinsic::umin: {
2087	Value I0 = II->getArgOperand(i: `0`), I1 = II->getArgOperand(i: `1`);
2088	// umin(x, 1) == zext(x != 0)
2089	if (match(V: I1, P: m_One())) {
2090	assert(II->getType()->getScalarSizeInBits() != `1` &&
2091	"Expected simplify of umin with max constant");
2092	Value *Zero = Constant::getNullValue(Ty: I0->getType());
2093	Value *Cmp = Builder.CreateICmpNE(LHS: I0, RHS: Zero);
2094	return CastInst::Create(Instruction::ZExt, S: Cmp, Ty: II->getType());
2095	}
2096	// umin(cttz(x), const) --> cttz(x \| (1 << const))
2097	if (Value *FoldedCttz =
2098	foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::cttz>(
2099	I0, I1, DL, Builder))
2100	return replaceInstUsesWith(I&: *II, V: FoldedCttz);
2101	// umin(ctlz(x), const) --> ctlz(x \| (SignedMin >> const))
2102	if (Value *FoldedCtlz =
2103	foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::ctlz>(
2104	I0, I1, DL, Builder))
2105	return replaceInstUsesWith(I&: *II, V: FoldedCtlz);
2106	[[fallthrough]];
2107	}
2108	case Intrinsic::umax: {
2109	Value I0 = II->getArgOperand(i: `0`), I1 = II->getArgOperand(i: `1`);
2110	Value X, Y;
2111	if (match(V: I0, P: m_ZExt(Op: m_Value(V&: X))) && match(V: I1, P: m_ZExt(Op: m_Value(V&: Y))) &&
2112	(I0->hasOneUse() \|\| I1->hasOneUse()) && X->getType() == Y->getType()) {
2113	Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y);
2114	return CastInst::Create(Instruction::ZExt, S: NarrowMaxMin, Ty: II->getType());
2115	}
2116	Constant *C;
2117	if (match(V: I0, P: m_ZExt(Op: m_Value(V&: X))) && match(V: I1, P: m_Constant(C)) &&
2118	I0->hasOneUse()) {
2119	if (Constant *NarrowC = getLosslessUnsignedTrunc(C, DestTy: X->getType(), DL)) {
2120	Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: NarrowC);
2121	return CastInst::Create(Instruction::ZExt, S: NarrowMaxMin, Ty: II->getType());
2122	}
2123	}
2124	// If C is not 0:
2125	// umax(nuw_shl(x, C), x + 1) -> x == 0 ? 1 : nuw_shl(x, C)
2126	// If C is not 0 or 1:
2127	// umax(nuw_mul(x, C), x + 1) -> x == 0 ? 1 : nuw_mul(x, C)
2128	auto foldMaxMulShift = [&](Value A, Value B) -> Instruction * {
2129	const APInt *C;
2130	Value *X;
2131	if (!match(V: A, P: m_NUWShl(L: m_Value(V&: X), R: m_APInt(Res&: C))) &&
2132	!(match(V: A, P: m_NUWMul(L: m_Value(V&: X), R: m_APInt(Res&: C))) && !C->isOne()))
2133	return nullptr;
2134	if (C->isZero())
2135	return nullptr;
2136	if (!match(V: B, P: m_OneUse(SubPattern: m_Add(L: m_Specific(V: X), R: m_One()))))
2137	return nullptr;
2138
2139	Value *Cmp = Builder.CreateICmpEQ(LHS: X, RHS: ConstantInt::get(Ty: X->getType(), V: `0`));
2140	Value NewSelect = nullptr*;
2141	NewSelect = Builder.CreateSelectWithUnknownProfile(
2142	C: Cmp, True: ConstantInt::get(Ty: X->getType(), V: `1`), False: A, DEBUG_TYPE);
2143	return replaceInstUsesWith(I&: *II, V: NewSelect);
2144	};
2145
2146	if (IID == Intrinsic::umax) {
2147	if (Instruction *I = foldMaxMulShift (I0, I1))
2148	return I;
2149	if (Instruction *I = foldMaxMulShift (I1, I0))
2150	return I;
2151	}
2152
2153	// If both operands of unsigned min/max are sign-extended, it is still ok
2154	// to narrow the operation.
2155	[[fallthrough]];
2156	}
2157	case Intrinsic::smax:
2158	case Intrinsic::smin: {
2159	Value I0 = II->getArgOperand(i: `0`), I1 = II->getArgOperand(i: `1`);
2160	Value X, Y;
2161	if (match(V: I0, P: m_SExt(Op: m_Value(V&: X))) && match(V: I1, P: m_SExt(Op: m_Value(V&: Y))) &&
2162	(I0->hasOneUse() \|\| I1->hasOneUse()) && X->getType() == Y->getType()) {
2163	Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y);
2164	return CastInst::Create(Instruction::SExt, S: NarrowMaxMin, Ty: II->getType());
2165	}
2166
2167	Constant *C;
2168	if (match(V: I0, P: m_SExt(Op: m_Value(V&: X))) && match(V: I1, P: m_Constant(C)) &&
2169	I0->hasOneUse()) {
2170	if (Constant *NarrowC = getLosslessSignedTrunc(C, DestTy: X->getType(), DL)) {
2171	Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: NarrowC);
2172	return CastInst::Create(Instruction::SExt, S: NarrowMaxMin, Ty: II->getType());
2173	}
2174	}
2175
2176	// smax(smin(X, MinC), MaxC) -> smin(smax(X, MaxC), MinC) if MinC s>= MaxC
2177	// umax(umin(X, MinC), MaxC) -> umin(umax(X, MaxC), MinC) if MinC u>= MaxC
2178	const APInt MinC, MaxC;
2179	auto CreateCanonicalClampForm = [&](bool IsSigned) {
2180	auto MaxIID = IsSigned ? Intrinsic::smax : Intrinsic::umax;
2181	auto MinIID = IsSigned ? Intrinsic::smin : Intrinsic::umin;
2182	Value *NewMax = Builder.CreateBinaryIntrinsic(
2183	ID: MaxIID, LHS: X, RHS: ConstantInt::get(Ty: X->getType(), V: *MaxC));
2184	return replaceInstUsesWith(
2185	I&: *II, V: Builder.CreateBinaryIntrinsic(
2186	ID: MinIID, LHS: NewMax, RHS: ConstantInt::get(Ty: X->getType(), V: *MinC)));
2187	};
2188	if (IID == Intrinsic::smax &&
2189	match(V: I0, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::smin>(Op0: m_Value(V&: X),
2190	Op1: m_APInt(Res&: MinC)))) &&
2191	match(V: I1, P: m_APInt(Res&: MaxC)) && MinC->sgt(RHS: *MaxC))
2192	return CreateCanonicalClampForm (true);
2193	if (IID == Intrinsic::umax &&
2194	match(V: I0, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::umin>(Op0: m_Value(V&: X),
2195	Op1: m_APInt(Res&: MinC)))) &&
2196	match(V: I1, P: m_APInt(Res&: MaxC)) && MinC->ugt(RHS: *MaxC))
2197	return CreateCanonicalClampForm (false);
2198
2199	// umin(i1 X, i1 Y) -> and i1 X, Y
2200	// smax(i1 X, i1 Y) -> and i1 X, Y
2201	if ((IID == Intrinsic::umin \|\| IID == Intrinsic::smax) &&
2202	II->getType()->isIntOrIntVectorTy(BitWidth: `1`)) {
2203	return BinaryOperator::CreateAnd(V1: I0, V2: I1);
2204	}
2205
2206	// umax(i1 X, i1 Y) -> or i1 X, Y
2207	// smin(i1 X, i1 Y) -> or i1 X, Y
2208	if ((IID == Intrinsic::umax \|\| IID == Intrinsic::smin) &&
2209	II->getType()->isIntOrIntVectorTy(BitWidth: `1`)) {
2210	return BinaryOperator::CreateOr(V1: I0, V2: I1);
2211	}
2212
2213	// smin(smax(X, -1), 1) -> scmp(X, 0)
2214	// smax(smin(X, 1), -1) -> scmp(X, 0)
2215	// At this point, smax(smin(X, 1), -1) is changed to smin(smax(X, -1)
2216	// And i1's have been changed to and/ors
2217	// So we only need to check for smin
2218	if (IID == Intrinsic::smin) {
2219	if (match(V: I0, P: m_OneUse(SubPattern: m_SMax(L: m_Value(V&: X), R: m_AllOnes()))) &&
2220	match(V: I1, P: m_One())) {
2221	Value *Zero = ConstantInt::get(Ty: X->getType(), V: `0`);
2222	return replaceInstUsesWith(
2223	I&: CI,
2224	V: Builder.CreateIntrinsic(RetTy: II->getType(), ID: Intrinsic::scmp, Args: {X, Zero}));
2225	}
2226	}
2227
2228	if (IID == Intrinsic::smax \|\| IID == Intrinsic::smin) {
2229	// smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
2230	// smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
2231	// TODO: Canonicalize neg after min/max if I1 is constant.
2232	if (match(V: I0, P: m_NSWNeg(V: m_Value(V&: X))) && match(V: I1, P: m_NSWNeg(V: m_Value(V&: Y))) &&
2233	(I0->hasOneUse() \|\| I1->hasOneUse())) {
2234	Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMaxID: IID);
2235	Value *InvMaxMin = Builder.CreateBinaryIntrinsic(ID: InvID, LHS: X, RHS: Y);
2236	return BinaryOperator::CreateNSWNeg(Op: InvMaxMin);
2237	}
2238	}
2239
2240	// (umax X, (xor X, Pow2))
2241	// -> (or X, Pow2)
2242	// (umin X, (xor X, Pow2))
2243	// -> (and X, ~Pow2)
2244	// (smax X, (xor X, Pos_Pow2))
2245	// -> (or X, Pos_Pow2)
2246	// (smin X, (xor X, Pos_Pow2))
2247	// -> (and X, ~Pos_Pow2)
2248	// (smax X, (xor X, Neg_Pow2))
2249	// -> (and X, ~Neg_Pow2)
2250	// (smin X, (xor X, Neg_Pow2))
2251	// -> (or X, Neg_Pow2)
2252	if ((match(V: I0, P: m_c_Xor(L: m_Specific(V: I1), R: m_Value(V&: X))) \|\|
2253	match(V: I1, P: m_c_Xor(L: m_Specific(V: I0), R: m_Value(V&: X)))) &&
2254	isKnownToBeAPowerOfTwo(V: X, / OrZero / true)) {
2255	bool UseOr = IID == Intrinsic::smax \|\| IID == Intrinsic::umax;
2256	bool UseAndN = IID == Intrinsic::smin \|\| IID == Intrinsic::umin;
2257
2258	if (IID == Intrinsic::smax \|\| IID == Intrinsic::smin) {
2259	auto KnownSign = getKnownSign(Op: X, SQ: SQ.getWithInstruction(I: II));
2260	if (KnownSign == std::nullopt) {
2261	UseOr = false;
2262	UseAndN = false;
2263	} else if (KnownSign /* true is Signed. /) {
2264	UseOr ^= true;
2265	UseAndN ^= true;
2266	Type *Ty = I0->getType();
2267	// Negative power of 2 must be IntMin. It's possible to be able to
2268	// prove negative / power of 2 without actually having known bits, so
2269	// just get the value by hand.
2270	X = Constant::getIntegerValue(
2271	Ty, V: APInt::getSignedMinValue(numBits: Ty->getScalarSizeInBits()));
2272	}
2273	}
2274	if (UseOr)
2275	return BinaryOperator::CreateOr(V1: I0, V2: X);
2276	else if (UseAndN)
2277	return BinaryOperator::CreateAnd(V1: I0, V2: Builder.CreateNot(V: X));
2278	}
2279
2280	// If we can eliminate ~A and Y is free to invert:
2281	// max ~A, Y --> ~(min A, ~Y)
2282	//
2283	// Examples:
2284	// max ~A, ~Y --> ~(min A, Y)
2285	// max ~A, C --> ~(min A, ~C)
2286	// max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
2287	auto moveNotAfterMinMax = [&](Value X, Value Y) -> Instruction * {
2288	Value *A;
2289	if (match(V: X, P: m_OneUse(SubPattern: m_Not(V: m_Value(V&: A)))) &&
2290	!isFreeToInvert(V: A, WillInvertAllUses: A->hasOneUse())) {
2291	if (Value *NotY = getFreelyInverted(V: Y, WillInvertAllUses: Y->hasOneUse(), Builder: &Builder)) {
2292	Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMaxID: IID);
2293	Value *InvMaxMin = Builder.CreateBinaryIntrinsic(ID: InvID, LHS: A, RHS: NotY);
2294	return BinaryOperator::CreateNot(Op: InvMaxMin);
2295	}
2296	}
2297	return nullptr;
2298	};
2299
2300	if (Instruction *I = moveNotAfterMinMax (I0, I1))
2301	return I;
2302	if (Instruction *I = moveNotAfterMinMax (I1, I0))
2303	return I;
2304
2305	if (Instruction *I = moveAddAfterMinMax(II, Builder))
2306	return I;
2307
2308	// minmax (X & NegPow2C, Y & NegPow2C) --> minmax(X, Y) & NegPow2C
2309	const APInt *RHSC;
2310	if (match(V: I0, P: m_OneUse(SubPattern: m_And(L: m_Value(V&: X), R: m_NegatedPower2(V&: RHSC)))) &&
2311	match(V: I1, P: m_OneUse(SubPattern: m_And(L: m_Value(V&: Y), R: m_SpecificInt(V: *RHSC)))))
2312	return BinaryOperator::CreateAnd(V1: Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y),
2313	V2: ConstantInt::get(Ty: II->getType(), V: *RHSC));
2314
2315	// smax(X, -X) --> abs(X)
2316	// smin(X, -X) --> -abs(X)
2317	// umax(X, -X) --> -abs(X)
2318	// umin(X, -X) --> abs(X)
2319	if (isKnownNegation(X: I0, Y: I1)) {
2320	// We can choose either operand as the input to abs(), but if we can
2321	// eliminate the only use of a value, that's better for subsequent
2322	// transforms/analysis.
2323	if (I0->hasOneUse() && !I1->hasOneUse())
2324	std::swap(a&: I0, b&: I1);
2325
2326	// This is some variant of abs(). See if we can propagate 'nsw' to the abs
2327	// operation and potentially its negation.
2328	bool IntMinIsPoison = isKnownNegation(X: I0, Y: I1, / NeedNSW / true);
2329	Value *Abs = Builder.CreateBinaryIntrinsic(
2330	ID: Intrinsic::abs, LHS: I0,
2331	RHS: ConstantInt::getBool(Context&: II->getContext(), V: IntMinIsPoison));
2332
2333	// We don't have a "nabs" intrinsic, so negate if needed based on the
2334	// max/min operation.
2335	if (IID == Intrinsic::smin \|\| IID == Intrinsic::umax)
2336	Abs = Builder.CreateNeg(V: Abs, Name: "nabs", HasNSW: IntMinIsPoison);
2337	return replaceInstUsesWith(I&: CI, V: Abs);
2338	}
2339
2340	if (Instruction *Sel = foldClampRangeOfTwo(II, Builder))
2341	return Sel;
2342
2343	if (Instruction SAdd = matchSAddSubSat(MinMax1&: II))
2344	return SAdd;
2345
2346	if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder, SQ))
2347	return replaceInstUsesWith(I&: *II, V: NewMinMax);
2348
2349	if (Instruction *R = reassociateMinMaxWithConstantInOperand(II, Builder))
2350	return R;
2351
2352	if (Instruction *NewMinMax = factorizeMinMaxTree(II))
2353	return NewMinMax;
2354
2355	// Try to fold minmax with constant RHS based on range information
2356	if (match(V: I1, P: m_APIntAllowPoison(Res&: RHSC))) {
2357	ICmpInst::Predicate Pred =
2358	ICmpInst::getNonStrictPredicate(pred: MinMaxIntrinsic::getPredicate(ID: IID));
2359	bool IsSigned = MinMaxIntrinsic::isSigned(ID: IID);
2360	ConstantRange LHS_CR = computeConstantRangeIncludingKnownBits(
2361	V: I0, ForSigned: IsSigned, SQ: SQ.getWithInstruction(I: II));
2362	if (!LHS_CR.isFullSet()) {
2363	if (LHS_CR.icmp(Pred, Other: *RHSC))
2364	return replaceInstUsesWith(I&: *II, V: I0);
2365	if (LHS_CR.icmp(Pred: ICmpInst::getSwappedPredicate(pred: Pred), Other: *RHSC))
2366	return replaceInstUsesWith(I&: *II,
2367	V: ConstantInt::get(Ty: II->getType(), V: *RHSC));
2368	}
2369	}
2370
2371	if (Value *V = foldIntrinsicUsingDistributiveLaws(II, Builder))
2372	return replaceInstUsesWith(I&: *II, V);
2373
2374	break;
2375	}
2376	case Intrinsic::scmp: {
2377	Value I0 = II->getArgOperand(i: `0`), I1 = II->getArgOperand(i: `1`);
2378	Value LHS, RHS;
2379	if (match(V: I0, P: m_NSWSub(L: m_Value(V&: LHS), R: m_Value(V&: RHS))) && match(V: I1, P: m_Zero()))
2380	return replaceInstUsesWith(
2381	I&: CI,
2382	V: Builder.CreateIntrinsic(RetTy: II->getType(), ID: Intrinsic::scmp, Args: {LHS, RHS}));
2383	break;
2384	}
2385	case Intrinsic::bitreverse: {
2386	Value *IIOperand = II->getArgOperand(i: `0`);
2387	// bitrev (zext i1 X to ?) --> X ? SignBitC : 0
2388	Value *X;
2389	if (match(V: IIOperand, P: m_ZExt(Op: m_Value(V&: X))) &&
2390	X->getType()->isIntOrIntVectorTy(BitWidth: `1`)) {
2391	Type *Ty = II->getType();
2392	APInt SignBit = APInt::getSignMask(BitWidth: Ty->getScalarSizeInBits());
2393	return SelectInst::Create(C: X, S1: ConstantInt::get(Ty, V: SignBit),
2394	S2: ConstantInt::getNullValue(Ty));
2395	}
2396
2397	if (Instruction *crossLogicOpFold =
2398	foldBitOrderCrossLogicOp<Intrinsic::bitreverse>(V: IIOperand, Builder))
2399	return crossLogicOpFold;
2400
2401	break;
2402	}
2403	case Intrinsic::bswap: {
2404	Value *IIOperand = II->getArgOperand(i: `0`);
2405
2406	// Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
2407	// inverse-shift-of-bswap:
2408	// bswap (shl X, Y) --> lshr (bswap X), Y
2409	// bswap (lshr X, Y) --> shl (bswap X), Y
2410	Value X, Y;
2411	if (match(V: IIOperand, P: m_OneUse(SubPattern: m_LogicalShift(L: m_Value(V&: X), R: m_Value(V&: Y))))) {
2412	unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
2413	if (MaskedValueIsZero(V: Y, Mask: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: `3`))) {
2414	Value *NewSwap = Builder.CreateUnaryIntrinsic(ID: Intrinsic::bswap, V: X);
2415	BinaryOperator::BinaryOps InverseShift =
2416	cast<BinaryOperator>(Val: IIOperand)->getOpcode() == Instruction::Shl
2417	? Instruction::LShr
2418	: Instruction::Shl;
2419	return BinaryOperator::Create(Op: InverseShift, S1: NewSwap, S2: Y);
2420	}
2421	}
2422
2423	KnownBits Known = computeKnownBits(V: IIOperand, CxtI: II);
2424	uint64_t LZ = alignDown(Value: Known.countMinLeadingZeros(), Align: `8`);
2425	uint64_t TZ = alignDown(Value: Known.countMinTrailingZeros(), Align: `8`);
2426	unsigned BW = Known.getBitWidth();
2427
2428	// bswap(x) -> shift(x) if x has exactly one "active byte"
2429	if (BW - LZ - TZ == `8`) {
2430	assert(LZ != TZ && "active byte cannot be in the middle");
2431	if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
2432	return BinaryOperator::CreateNUWShl(
2433	V1: IIOperand, V2: ConstantInt::get(Ty: IIOperand->getType(), V: LZ - TZ));
2434	// -> lshr(x) if the "active byte" is in the high part of x
2435	return BinaryOperator::CreateExactLShr(
2436	V1: IIOperand, V2: ConstantInt::get(Ty: IIOperand->getType(), V: TZ - LZ));
2437	}
2438
2439	// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
2440	if (match(V: IIOperand, P: m_Trunc(Op: m_BSwap(Op0: m_Value(V&: X))))) {
2441	unsigned C = X->getType()->getScalarSizeInBits() - BW;
2442	Value *CV = ConstantInt::get(Ty: X->getType(), V: C);
2443	Value *V = Builder.CreateLShr(LHS: X, RHS: CV);
2444	return new TruncInst (V, IIOperand->getType());
2445	}
2446
2447	if (Instruction *crossLogicOpFold =
2448	foldBitOrderCrossLogicOp<Intrinsic::bswap>(V: IIOperand, Builder)) {
2449	return crossLogicOpFold;
2450	}
2451
2452	// Try to fold into bitreverse if bswap is the root of the expression tree.
2453	if (Instruction BitOp = matchBSwapOrBitReverse(I&: II, /MatchBSwaps/ false,
2454	/MatchBitReversals/ true))
2455	return BitOp;
2456	break;
2457	}
2458	case Intrinsic::masked_load:
2459	if (Value SimplifiedMaskedOp = simplifyMaskedLoad(II&: II))
2460	return replaceInstUsesWith(I&: CI, V: SimplifiedMaskedOp);
2461	break;
2462	case Intrinsic::masked_store:
2463	return simplifyMaskedStore(II&: *II);
2464	case Intrinsic::masked_gather:
2465	return simplifyMaskedGather(II&: *II);
2466	case Intrinsic::masked_scatter:
2467	return simplifyMaskedScatter(II&: *II);
2468	case Intrinsic::launder_invariant_group:
2469	case Intrinsic::strip_invariant_group:
2470	if (auto SkippedBarrier = simplifyInvariantGroupIntrinsic(II&: II, IC&: *this))
2471	return replaceInstUsesWith(I&: *II, V: SkippedBarrier);
2472	break;
2473	case Intrinsic::powi:
2474	if (ConstantInt *Power = dyn_cast<ConstantInt>(Val: II->getArgOperand(i: `1`))) {
2475	// 0 and 1 are handled in instsimplify
2476	// powi(x, -1) -> 1/x
2477	if (Power->isMinusOne())
2478	return BinaryOperator::CreateFDivFMF(V1: ConstantFP::get(Ty: CI.getType(), V: `1.0`),
2479	V2: II->getArgOperand(i: `0`), FMFSource: II);
2480	// powi(x, 2) -> xx*
2481	if (Power->equalsInt(V: `2`))
2482	return BinaryOperator::CreateFMulFMF(V1: II->getArgOperand(i: `0`),
2483	V2: II->getArgOperand(i: `0`), FMFSource: II);
2484
2485	if (!Power->getValue()[`0`]) {
2486	Value *X;
2487	// If power is even:
2488	// powi(-x, p) -> powi(x, p)
2489	// powi(fabs(x), p) -> powi(x, p)
2490	// powi(copysign(x, y), p) -> powi(x, p)
2491	if (match(V: II->getArgOperand(i: `0`), P: m_FNeg(X: m_Value(V&: X))) \|\|
2492	match(V: II->getArgOperand(i: `0`), P: m_FAbs(Op0: m_Value(V&: X))) \|\|
2493	match(V: II->getArgOperand(i: `0`),
2494	P: m_Intrinsic<Intrinsic::copysign>(Op0: m_Value(V&: X), Op1: m_Value())))
2495	return replaceOperand(I&: *II, OpNum: `0`, V: X);
2496	}
2497	}
2498	break;
2499
2500	case Intrinsic::cttz:
2501	case Intrinsic::ctlz:
2502	if (auto I = foldCttzCtlz(II&: II, IC&: *this))
2503	return I;
2504	break;
2505
2506	case Intrinsic::ctpop:
2507	if (auto I = foldCtpop(II&: II, IC&: *this))
2508	return I;
2509	break;
2510
2511	case Intrinsic::fshl:
2512	case Intrinsic::fshr: {
2513	Value Op0 = II->getArgOperand(i: `0`), Op1 = II->getArgOperand(i: `1`);
2514	Type *Ty = II->getType();
2515	unsigned BitWidth = Ty->getScalarSizeInBits();
2516	Constant *ShAmtC;
2517	if (match(V: II->getArgOperand(i: `2`), P: m_ImmConstant(C&: ShAmtC))) {
2518	// Canonicalize a shift amount constant operand to modulo the bit-width.
2519	Constant *WidthC = ConstantInt::get(Ty, V: BitWidth);
2520	Constant *ModuloC =
2521	ConstantFoldBinaryOpOperands(Opcode: Instruction::URem, LHS: ShAmtC, RHS: WidthC, DL);
2522	if (!ModuloC)
2523	return nullptr;
2524	if (ModuloC != ShAmtC)
2525	return replaceOperand(I&: *II, OpNum: `2`, V: ModuloC);
2526
2527	assert(match(ConstantFoldCompareInstOperands(ICmpInst::ICMP_UGT, WidthC,
2528	ShAmtC, DL),
2529	m_One()) &&
2530	"Shift amount expected to be modulo bitwidth");
2531
2532	// Canonicalize funnel shift right by constant to funnel shift left. This
2533	// is not entirely arbitrary. For historical reasons, the backend may
2534	// recognize rotate left patterns but miss rotate right patterns.
2535	if (IID == Intrinsic::fshr) {
2536	// fshr X, Y, C --> fshl X, Y, (BitWidth - C) if C is not zero.
2537	if (!isKnownNonZero(V: ShAmtC, Q: SQ.getWithInstruction(I: II)))
2538	return nullptr;
2539
2540	Constant *LeftShiftC = ConstantExpr::getSub(C1: WidthC, C2: ShAmtC);
2541	Module *Mod = II->getModule();
2542	Function *Fshl =
2543	Intrinsic::getOrInsertDeclaration(M: Mod, id: Intrinsic::fshl, Tys: Ty);
2544	return CallInst::Create(Func: Fshl, Args: { Op0, Op1, LeftShiftC });
2545	}
2546	assert(IID == Intrinsic::fshl &&
2547	"All funnel shifts by simple constants should go left");
2548
2549	// fshl(X, 0, C) --> shl X, C
2550	// fshl(X, undef, C) --> shl X, C
2551	if (match(V: Op1, P: m_ZeroInt()) \|\| match(V: Op1, P: m_Undef()))
2552	return BinaryOperator::CreateShl(V1: Op0, V2: ShAmtC);
2553
2554	// fshl(0, X, C) --> lshr X, (BW-C)
2555	// fshl(undef, X, C) --> lshr X, (BW-C)
2556	if (match(V: Op0, P: m_ZeroInt()) \|\| match(V: Op0, P: m_Undef()))
2557	return BinaryOperator::CreateLShr(V1: Op1,
2558	V2: ConstantExpr::getSub(C1: WidthC, C2: ShAmtC));
2559
2560	// fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
2561	if (Op0 == Op1 && BitWidth == `16` && match(V: ShAmtC, P: m_SpecificInt(V: `8`))) {
2562	Module *Mod = II->getModule();
2563	Function *Bswap =
2564	Intrinsic::getOrInsertDeclaration(M: Mod, id: Intrinsic::bswap, Tys: Ty);
2565	return CallInst::Create(Func: Bswap, Args: { Op0 });
2566	}
2567	if (Instruction *BitOp =
2568	matchBSwapOrBitReverse(I&: II, /MatchBSwaps/* true,
2569	/MatchBitReversals/ true))
2570	return BitOp;
2571
2572	// R = fshl(X, X, C2)
2573	// fshl(R, R, C1) --> fshl(X, X, (C1 + C2) % bitsize)
2574	Value *InnerOp;
2575	const APInt ShAmtInnerC, ShAmtOuterC;
2576	if (match(V: Op0, P: m_FShl(Op0: m_Value(V&: InnerOp), Op1: m_Deferred(V: InnerOp),
2577	Op2: m_APInt(Res&: ShAmtInnerC))) &&
2578	match(V: ShAmtC, P: m_APInt(Res&: ShAmtOuterC)) && Op0 == Op1) {
2579	APInt Sum = ShAmtOuterC + ShAmtInnerC;
2580	APInt Modulo = Sum.urem(RHS: APInt (Sum.getBitWidth(), BitWidth));
2581	if (Modulo.isZero())
2582	return replaceInstUsesWith(I&: *II, V: InnerOp);
2583	Constant *ModuloC = ConstantInt::get(Ty, V: Modulo);
2584	return CallInst::Create(Func: cast<IntrinsicInst>(Val: Op0)->getCalledFunction(),
2585	Args: {InnerOp, InnerOp, ModuloC});
2586	}
2587	}
2588
2589	// fshl(X, X, Neg(Y)) --> fshr(X, X, Y)
2590	// fshr(X, X, Neg(Y)) --> fshl(X, X, Y)
2591	// if BitWidth is a power-of-2
2592	Value *Y;
2593	if (Op0 == Op1 && isPowerOf2_32(Value: BitWidth) &&
2594	match(V: II->getArgOperand(i: `2`), P: m_Neg(V: m_Value(V&: Y)))) {
2595	Module *Mod = II->getModule();
2596	Function *OppositeShift = Intrinsic::getOrInsertDeclaration(
2597	M: Mod, id: IID == Intrinsic::fshl ? Intrinsic::fshr : Intrinsic::fshl, Tys: Ty);
2598	return CallInst::Create(Func: OppositeShift, Args: {Op0, Op1, Y});
2599	}
2600
2601	// fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) if bitwidth is a
2602	// power-of-2
2603	if (IID == Intrinsic::fshl && isPowerOf2_32(Value: BitWidth) &&
2604	match(V: Op1, P: m_ZeroInt())) {
2605	Value *Op2 = II->getArgOperand(i: `2`);
2606	Value *And = Builder.CreateAnd(LHS: Op2, RHS: ConstantInt::get(Ty, V: BitWidth - `1`));
2607	return BinaryOperator::CreateShl(V1: Op0, V2: And);
2608	}
2609
2610	// Left or right might be masked.
2611	if (SimplifyDemandedInstructionBits(Inst&: *II))
2612	return &CI;
2613
2614	// The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
2615	// so only the low bits of the shift amount are demanded if the bitwidth is
2616	// a power-of-2.
2617	if (!isPowerOf2_32(Value: BitWidth))
2618	break;
2619	APInt Op2Demanded = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: Log2_32_Ceil(Value: BitWidth));
2620	KnownBits Op2Known(BitWidth);
2621	if (SimplifyDemandedBits(I: II, OpNo: `2`, DemandedMask: Op2Demanded, Known&: Op2Known))
2622	return &CI;
2623	break;
2624	}
2625	case Intrinsic::ptrmask: {
2626	unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType());
2627	KnownBits Known(BitWidth);
2628	if (SimplifyDemandedInstructionBits(Inst&: *II, Known))
2629	return II;
2630
2631	Value InnerPtr, InnerMask;
2632	bool Changed = false;
2633	// Combine:
2634	// (ptrmask (ptrmask p, A), B)
2635	// -> (ptrmask p, (and A, B))
2636	if (match(V: II->getArgOperand(i: `0`),
2637	P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ptrmask>(Op0: m_Value(V&: InnerPtr),
2638	Op1: m_Value(V&: InnerMask))))) {
2639	assert(II->getArgOperand(`1`)->getType() == InnerMask->getType() &&
2640	"Mask types must match");
2641	// TODO: If InnerMask == Op1, we could copy attributes from inner
2642	// callsite -> outer callsite.
2643	Value *NewMask = Builder.CreateAnd(LHS: II->getArgOperand(i: `1`), RHS: InnerMask);
2644	replaceOperand(I&: CI, OpNum: `0`, V: InnerPtr);
2645	replaceOperand(I&: CI, OpNum: `1`, V: NewMask);
2646	Changed = true;
2647	}
2648
2649	// See if we can deduce non-null.
2650	if (!CI.hasRetAttr(Kind: Attribute::NonNull) &&
2651	(Known.isNonZero() \|\|
2652	isKnownNonZero(V: II, Q: getSimplifyQuery().getWithInstruction(I: II)))) {
2653	CI.addRetAttr(Kind: Attribute::NonNull);
2654	Changed = true;
2655	}
2656
2657	unsigned NewAlignmentLog =
2658	std::min(a: Value::MaxAlignmentExponent,
2659	b: std::min(a: BitWidth - `1`, b: Known.countMinTrailingZeros()));
2660	// Known bits will capture if we had alignment information associated with
2661	// the pointer argument.
2662	if (NewAlignmentLog > Log2(A: CI.getRetAlign().valueOrOne())) {
2663	CI.addRetAttr(Attr: Attribute::getWithAlignment(
2664	Context&: CI.getContext(), Alignment: Align (uint64_t(`1`) << NewAlignmentLog)));
2665	Changed = true;
2666	}
2667	if (Changed)
2668	return &CI;
2669	break;
2670	}
2671	case Intrinsic::uadd_with_overflow:
2672	case Intrinsic::sadd_with_overflow: {
2673	if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2674	return I;
2675
2676	// Given 2 constant operands whose sum does not overflow:
2677	// uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
2678	// saddo (X +nsw C0), C1 -> saddo X, C0 + C1
2679	Value *X;
2680	const APInt C0, C1;
2681	Value *Arg0 = II->getArgOperand(i: `0`);
2682	Value *Arg1 = II->getArgOperand(i: `1`);
2683	bool IsSigned = IID == Intrinsic::sadd_with_overflow;
2684	bool HasNWAdd = IsSigned
2685	? match(V: Arg0, P: m_NSWAddLike(L: m_Value(V&: X), R: m_APInt(Res&: C0)))
2686	: match(V: Arg0, P: m_NUWAddLike(L: m_Value(V&: X), R: m_APInt(Res&: C0)));
2687	if (HasNWAdd && match(V: Arg1, P: m_APInt(Res&: C1))) {
2688	bool Overflow;
2689	APInt NewC =
2690	IsSigned ? C1->sadd_ov(RHS: C0, Overflow) : C1->uadd_ov(RHS: C0, Overflow);
2691	if (!Overflow)
2692	return replaceInstUsesWith(
2693	I&: *II, V: Builder.CreateBinaryIntrinsic(
2694	ID: IID, LHS: X, RHS: ConstantInt::get(Ty: Arg1->getType(), V: NewC)));
2695	}
2696	break;
2697	}
2698
2699	case Intrinsic::umul_with_overflow:
2700	case Intrinsic::smul_with_overflow:
2701	case Intrinsic::usub_with_overflow:
2702	if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2703	return I;
2704	break;
2705
2706	case Intrinsic::ssub_with_overflow: {
2707	if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2708	return I;
2709
2710	Constant *C;
2711	Value *Arg0 = II->getArgOperand(i: `0`);
2712	Value *Arg1 = II->getArgOperand(i: `1`);
2713	// Given a constant C that is not the minimum signed value
2714	// for an integer of a given bit width:
2715	//
2716	// ssubo X, C -> saddo X, -C
2717	if (match(V: Arg1, P: m_Constant(C)) && C->isNotMinSignedValue()) {
2718	Value *NegVal = ConstantExpr::getNeg(C);
2719	// Build a saddo call that is equivalent to the discovered
2720	// ssubo call.
2721	return replaceInstUsesWith(
2722	I&: *II, V: Builder.CreateBinaryIntrinsic(ID: Intrinsic::sadd_with_overflow,
2723	LHS: Arg0, RHS: NegVal));
2724	}
2725
2726	break;
2727	}
2728
2729	case Intrinsic::uadd_sat:
2730	case Intrinsic::sadd_sat:
2731	case Intrinsic::usub_sat:
2732	case Intrinsic::ssub_sat: {
2733	SaturatingInst *SI = cast<SaturatingInst>(Val: II);
2734	Type *Ty = SI->getType();
2735	Value *Arg0 = SI->getLHS();
2736	Value *Arg1 = SI->getRHS();
2737
2738	// Make use of known overflow information.
2739	OverflowResult OR = computeOverflow(BinaryOp: SI->getBinaryOp(), IsSigned: SI->isSigned(),
2740	LHS: Arg0, RHS: Arg1, CxtI: SI);
2741	switch (OR) {
2742	case OverflowResult::MayOverflow:
2743	break;
2744	case OverflowResult::NeverOverflows:
2745	if (SI->isSigned())
2746	return BinaryOperator::CreateNSW(Opc: SI->getBinaryOp(), V1: Arg0, V2: Arg1);
2747	else
2748	return BinaryOperator::CreateNUW(Opc: SI->getBinaryOp(), V1: Arg0, V2: Arg1);
2749	case OverflowResult::AlwaysOverflowsLow: {
2750	unsigned BitWidth = Ty->getScalarSizeInBits();
2751	APInt Min = APSInt::getMinValue(numBits: BitWidth, Unsigned: !SI->isSigned());
2752	return replaceInstUsesWith(I&: *SI, V: ConstantInt::get(Ty, V: Min));
2753	}
2754	case OverflowResult::AlwaysOverflowsHigh: {
2755	unsigned BitWidth = Ty->getScalarSizeInBits();
2756	APInt Max = APSInt::getMaxValue(numBits: BitWidth, Unsigned: !SI->isSigned());
2757	return replaceInstUsesWith(I&: *SI, V: ConstantInt::get(Ty, V: Max));
2758	}
2759	}
2760
2761	// usub_sat((sub nuw C, A), C1) -> usub_sat(usub_sat(C, C1), A)
2762	// which after that:
2763	// usub_sat((sub nuw C, A), C1) -> usub_sat(C - C1, A) if C1 u< C
2764	// usub_sat((sub nuw C, A), C1) -> 0 otherwise
2765	Constant C, C1;
2766	Value *A;
2767	if (IID == Intrinsic::usub_sat &&
2768	match(V: Arg0, P: m_NUWSub(L: m_ImmConstant(C), R: m_Value(V&: A))) &&
2769	match(V: Arg1, P: m_ImmConstant(C&: C1))) {
2770	auto *NewC = Builder.CreateBinaryIntrinsic(ID: Intrinsic::usub_sat, LHS: C, RHS: C1);
2771	auto *NewSub =
2772	Builder.CreateBinaryIntrinsic(ID: Intrinsic::usub_sat, LHS: NewC, RHS: A);
2773	return replaceInstUsesWith(I&: *SI, V: NewSub);
2774	}
2775
2776	// ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
2777	if (IID == Intrinsic::ssub_sat && match(V: Arg1, P: m_Constant(C)) &&
2778	C->isNotMinSignedValue()) {
2779	Value *NegVal = ConstantExpr::getNeg(C);
2780	return replaceInstUsesWith(
2781	I&: *II, V: Builder.CreateBinaryIntrinsic(
2782	ID: Intrinsic::sadd_sat, LHS: Arg0, RHS: NegVal));
2783	}
2784
2785	// sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
2786	// sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
2787	// if Val and Val2 have the same sign
2788	if (auto *Other = dyn_cast<IntrinsicInst>(Val: Arg0)) {
2789	Value *X;
2790	const APInt Val, Val2;
2791	APInt NewVal;
2792	bool IsUnsigned =
2793	IID == Intrinsic::uadd_sat \|\| IID == Intrinsic::usub_sat;
2794	if (Other->getIntrinsicID() == IID &&
2795	match(V: Arg1, P: m_APInt(Res&: Val)) &&
2796	match(V: Other->getArgOperand(i: `0`), P: m_Value(V&: X)) &&
2797	match(V: Other->getArgOperand(i: `1`), P: m_APInt(Res&: Val2))) {
2798	if (IsUnsigned)
2799	NewVal = Val->uadd_sat(RHS: *Val2);
2800	else if (Val->isNonNegative() == Val2->isNonNegative()) {
2801	bool Overflow;
2802	NewVal = Val->sadd_ov(RHS: *Val2, Overflow);
2803	if (Overflow) {
2804	// Both adds together may add more than SignedMaxValue
2805	// without saturating the final result.
2806	break;
2807	}
2808	} else {
2809	// Cannot fold saturated addition with different signs.
2810	break;
2811	}
2812
2813	return replaceInstUsesWith(
2814	I&: *II, V: Builder.CreateBinaryIntrinsic(
2815	ID: IID, LHS: X, RHS: ConstantInt::get(Ty: II->getType(), V: NewVal)));
2816	}
2817	}
2818	break;
2819	}
2820
2821	case Intrinsic::minnum:
2822	case Intrinsic::maxnum:
2823	case Intrinsic::minimumnum:
2824	case Intrinsic::maximumnum:
2825	case Intrinsic::minimum:
2826	case Intrinsic::maximum: {
2827	Value *Arg0 = II->getArgOperand(i: `0`);
2828	Value *Arg1 = II->getArgOperand(i: `1`);
2829	Value X, Y;
2830	if (match(V: Arg0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Arg1, P: m_FNeg(X: m_Value(V&: Y))) &&
2831	(Arg0->hasOneUse() \|\| Arg1->hasOneUse())) {
2832	// If both operands are negated, invert the call and negate the result:
2833	// min(-X, -Y) --> -(max(X, Y))
2834	// max(-X, -Y) --> -(min(X, Y))
2835	Intrinsic::ID NewIID;
2836	switch (IID) {
2837	case Intrinsic::maxnum:
2838	NewIID = Intrinsic::minnum;
2839	break;
2840	case Intrinsic::minnum:
2841	NewIID = Intrinsic::maxnum;
2842	break;
2843	case Intrinsic::maximumnum:
2844	NewIID = Intrinsic::minimumnum;
2845	break;
2846	case Intrinsic::minimumnum:
2847	NewIID = Intrinsic::maximumnum;
2848	break;
2849	case Intrinsic::maximum:
2850	NewIID = Intrinsic::minimum;
2851	break;
2852	case Intrinsic::minimum:
2853	NewIID = Intrinsic::maximum;
2854	break;
2855	default:
2856	llvm_unreachable("unexpected intrinsic ID");
2857	}
2858	Value *NewCall = Builder.CreateBinaryIntrinsic(ID: NewIID, LHS: X, RHS: Y, FMFSource: II);
2859	Instruction *FNeg = UnaryOperator::CreateFNeg(V: NewCall);
2860	FNeg->copyIRFlags(V: II);
2861	return FNeg;
2862	}
2863
2864	// m(m(X, C2), C1) -> m(X, C)
2865	const APFloat C1, C2;
2866	if (auto *M = dyn_cast<IntrinsicInst>(Val: Arg0)) {
2867	if (M->getIntrinsicID() == IID && match(V: Arg1, P: m_APFloat(Res&: C1)) &&
2868	((match(V: M->getArgOperand(i: `0`), P: m_Value(V&: X)) &&
2869	match(V: M->getArgOperand(i: `1`), P: m_APFloat(Res&: C2))) \|\|
2870	(match(V: M->getArgOperand(i: `1`), P: m_Value(V&: X)) &&
2871	match(V: M->getArgOperand(i: `0`), P: m_APFloat(Res&: C2))))) {
2872	APFloat Res(`0.0`);
2873	switch (IID) {
2874	case Intrinsic::maxnum:
2875	Res = maxnum(A: C1, B: C2);
2876	break;
2877	case Intrinsic::minnum:
2878	Res = minnum(A: C1, B: C2);
2879	break;
2880	case Intrinsic::maximumnum:
2881	Res = maximumnum(A: C1, B: C2);
2882	break;
2883	case Intrinsic::minimumnum:
2884	Res = minimumnum(A: C1, B: C2);
2885	break;
2886	case Intrinsic::maximum:
2887	Res = maximum(A: C1, B: C2);
2888	break;
2889	case Intrinsic::minimum:
2890	Res = minimum(A: C1, B: C2);
2891	break;
2892	default:
2893	llvm_unreachable("unexpected intrinsic ID");
2894	}
2895	// TODO: Conservatively intersecting FMF. If Res == C2, the transform
2896	// was a simplification (so Arg0 and its original flags could
2897	// propagate?)
2898	Value *V = Builder.CreateBinaryIntrinsic(
2899	ID: IID, LHS: X, RHS: ConstantFP::get(Ty: Arg0->getType(), V: Res),
2900	FMFSource: FMFSource::intersect(A: II, B: M));
2901	return replaceInstUsesWith(I&: *II, V);
2902	}
2903	}
2904
2905	// m((fpext X), (fpext Y)) -> fpext (m(X, Y))
2906	if (match(V: Arg0, P: m_FPExt(Op: m_Value(V&: X))) && match(V: Arg1, P: m_FPExt(Op: m_Value(V&: Y))) &&
2907	(Arg0->hasOneUse() \|\| Arg1->hasOneUse()) &&
2908	X->getType() == Y->getType()) {
2909	Value *NewCall =
2910	Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y, FMFSource: II, Name: II->getName());
2911	return new FPExtInst (NewCall, II->getType());
2912	}
2913
2914	// m(fpext X, C) -> fpext m(X, TruncC) if C can be losslessly truncated.
2915	Constant *C;
2916	if (match(V: Arg0, P: m_OneUse(SubPattern: m_FPExt(Op: m_Value(V&: X)))) &&
2917	match(V: Arg1, P: m_ImmConstant(C))) {
2918	if (Constant *TruncC =
2919	getLosslessInvCast(C, InvCastTo: X->getType(), CastOp: Instruction::FPExt, DL)) {
2920	Value *NewCall =
2921	Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: TruncC, FMFSource: II, Name: II->getName());
2922	return new FPExtInst (NewCall, II->getType());
2923	}
2924	}
2925
2926	// max X, -X --> fabs X
2927	// min X, -X --> -(fabs X)
2928	// TODO: Remove one-use limitation? That is obviously better for max,
2929	// hence why we don't check for one-use for that. However,
2930	// it would be an extra instruction for min (fnabs), but
2931	// that is still likely better for analysis and codegen.
2932	auto IsMinMaxOrXNegX = [IID, &X](Value Op0, Value Op1) {
2933	if (match(V: Op0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Op1, P: m_Specific(V: X)))
2934	return Op0->hasOneUse() \|\|
2935	(IID != Intrinsic::minimum && IID != Intrinsic::minnum &&
2936	IID != Intrinsic::minimumnum);
2937	return false;
2938	};
2939
2940	if (IsMinMaxOrXNegX (Arg0, Arg1) \|\| IsMinMaxOrXNegX (Arg1, Arg0)) {
2941	Value *R = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: X, FMFSource: II);
2942	if (IID == Intrinsic::minimum \|\| IID == Intrinsic::minnum \|\|
2943	IID == Intrinsic::minimumnum)
2944	R = Builder.CreateFNegFMF(V: R, FMFSource: II);
2945	return replaceInstUsesWith(I&: *II, V: R);
2946	}
2947
2948	break;
2949	}
2950	case Intrinsic::matrix_multiply: {
2951	// Optimize negation in matrix multiplication.
2952
2953	// -A -B -> A * B*
2954	Value A, B;
2955	if (match(V: II->getArgOperand(i: `0`), P: m_FNeg(X: m_Value(V&: A))) &&
2956	match(V: II->getArgOperand(i: `1`), P: m_FNeg(X: m_Value(V&: B)))) {
2957	replaceOperand(I&: *II, OpNum: `0`, V: A);
2958	replaceOperand(I&: *II, OpNum: `1`, V: B);
2959	return II;
2960	}
2961
2962	Value *Op0 = II->getOperand(i_nocapture: `0`);
2963	Value *Op1 = II->getOperand(i_nocapture: `1`);
2964	Value OpNotNeg, NegatedOp;
2965	unsigned NegatedOpArg, OtherOpArg;
2966	if (match(V: Op0, P: m_FNeg(X: m_Value(V&: OpNotNeg)))) {
2967	NegatedOp = Op0;
2968	NegatedOpArg = `0`;
2969	OtherOpArg = `1`;
2970	} else if (match(V: Op1, P: m_FNeg(X: m_Value(V&: OpNotNeg)))) {
2971	NegatedOp = Op1;
2972	NegatedOpArg = `1`;
2973	OtherOpArg = `0`;
2974	} else
2975	// Multiplication doesn't have a negated operand.
2976	break;
2977
2978	// Only optimize if the negated operand has only one use.
2979	if (!NegatedOp->hasOneUse())
2980	break;
2981
2982	Value *OtherOp = II->getOperand(i_nocapture: OtherOpArg);
2983	VectorType *RetTy = cast<VectorType>(Val: II->getType());
2984	VectorType *NegatedOpTy = cast<VectorType>(Val: NegatedOp->getType());
2985	VectorType *OtherOpTy = cast<VectorType>(Val: OtherOp->getType());
2986	ElementCount NegatedCount = NegatedOpTy->getElementCount();
2987	ElementCount OtherCount = OtherOpTy->getElementCount();
2988	ElementCount RetCount = RetTy->getElementCount();
2989	// (-A) B -> A * (-B), if it is cheaper to negate B and vice versa.*
2990	if (ElementCount::isKnownGT(LHS: NegatedCount, RHS: OtherCount) &&
2991	ElementCount::isKnownLT(LHS: OtherCount, RHS: RetCount)) {
2992	Value *InverseOtherOp = Builder.CreateFNeg(V: OtherOp);
2993	replaceOperand(I&: *II, OpNum: NegatedOpArg, V: OpNotNeg);
2994	replaceOperand(I&: *II, OpNum: OtherOpArg, V: InverseOtherOp);
2995	return II;
2996	}
2997	// (-A) B -> -(A * B), if it is cheaper to negate the result*
2998	if (ElementCount::isKnownGT(LHS: NegatedCount, RHS: RetCount)) {
2999	SmallVector<Value *, `5`> NewArgs(II->args());
3000	NewArgs [NegatedOpArg] = OpNotNeg;
3001	Instruction *NewMul =
3002	Builder.CreateIntrinsic(RetTy: II->getType(), ID: IID, Args: NewArgs, FMFSource: II);
3003	return replaceInstUsesWith(I&: *II, V: Builder.CreateFNegFMF(V: NewMul, FMFSource: II));
3004	}
3005	break;
3006	}
3007	case Intrinsic::fmuladd: {
3008	// Try to simplify the underlying FMul.
3009	if (Value *V =
3010	simplifyFMulInst(LHS: II->getArgOperand(i: `0`), RHS: II->getArgOperand(i: `1`),
3011	FMF: II->getFastMathFlags(), Q: SQ.getWithInstruction(I: II)))
3012	return BinaryOperator::CreateFAddFMF(V1: V, V2: II->getArgOperand(i: `2`),
3013	FMF: II->getFastMathFlags());
3014
3015	[[fallthrough]];
3016	}
3017	case Intrinsic::fma: {
3018	// fma fneg(x), fneg(y), z -> fma x, y, z
3019	Value *Src0 = II->getArgOperand(i: `0`);
3020	Value *Src1 = II->getArgOperand(i: `1`);
3021	Value *Src2 = II->getArgOperand(i: `2`);
3022	Value X, Y;
3023	if (match(V: Src0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Src1, P: m_FNeg(X: m_Value(V&: Y)))) {
3024	replaceOperand(I&: *II, OpNum: `0`, V: X);
3025	replaceOperand(I&: *II, OpNum: `1`, V: Y);
3026	return II;
3027	}
3028
3029	// fma fabs(x), fabs(x), z -> fma x, x, z
3030	if (match(V: Src0, P: m_FAbs(Op0: m_Value(V&: X))) &&
3031	match(V: Src1, P: m_FAbs(Op0: m_Specific(V: X)))) {
3032	replaceOperand(I&: *II, OpNum: `0`, V: X);
3033	replaceOperand(I&: *II, OpNum: `1`, V: X);
3034	return II;
3035	}
3036
3037	// Try to simplify the underlying FMul. We can only apply simplifications
3038	// that do not require rounding.
3039	if (Value *V = simplifyFMAFMul(LHS: Src0, RHS: Src1, FMF: II->getFastMathFlags(),
3040	Q: SQ.getWithInstruction(I: II)))
3041	return BinaryOperator::CreateFAddFMF(V1: V, V2: Src2, FMF: II->getFastMathFlags());
3042
3043	// fma x, y, 0 -> fmul x, y
3044	// This is always valid for -0.0, but requires nsz for +0.0 as
3045	// -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
3046	if (match(V: Src2, P: m_NegZeroFP()) \|\|
3047	(match(V: Src2, P: m_PosZeroFP()) && II->getFastMathFlags().noSignedZeros()))
3048	return BinaryOperator::CreateFMulFMF(V1: Src0, V2: Src1, FMFSource: II);
3049
3050	// fma x, -1.0, y -> fsub y, x
3051	if (match(V: Src1, P: m_SpecificFP(V: -`1.0`)))
3052	return BinaryOperator::CreateFSubFMF(V1: Src2, V2: Src0, FMFSource: II);
3053
3054	break;
3055	}
3056	case Intrinsic::copysign: {
3057	Value Mag = II->getArgOperand(i: `0`), Sign = II->getArgOperand(i: `1`);
3058	if (std::optional<bool> KnownSignBit = computeKnownFPSignBit(
3059	V: Sign, SQ: getSimplifyQuery().getWithInstruction(I: II))) {
3060	if (*KnownSignBit) {
3061	// If we know that the sign argument is negative, reduce to FNABS:
3062	// copysign Mag, -Sign --> fneg (fabs Mag)
3063	Value *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Mag, FMFSource: II);
3064	return replaceInstUsesWith(I&: *II, V: Builder.CreateFNegFMF(V: Fabs, FMFSource: II));
3065	}
3066
3067	// If we know that the sign argument is positive, reduce to FABS:
3068	// copysign Mag, +Sign --> fabs Mag
3069	Value *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Mag, FMFSource: II);
3070	return replaceInstUsesWith(I&: *II, V: Fabs);
3071	}
3072
3073	// Propagate sign argument through nested calls:
3074	// copysign Mag, (copysign ?, X) --> copysign Mag, X
3075	Value *X;
3076	if (match(V: Sign, P: m_Intrinsic<Intrinsic::copysign>(Op0: m_Value(), Op1: m_Value(V&: X)))) {
3077	Value *CopySign =
3078	Builder.CreateCopySign(LHS: Mag, RHS: X, FMFSource: FMFSource::intersect(A: II, B: Sign));
3079	return replaceInstUsesWith(I&: *II, V: CopySign);
3080	}
3081
3082	// Clear sign-bit of constant magnitude:
3083	// copysign -MagC, X --> copysign MagC, X
3084	// TODO: Support constant folding for fabs
3085	const APFloat *MagC;
3086	if (match(V: Mag, P: m_APFloat(Res&: MagC)) && MagC->isNegative()) {
3087	APFloat PosMagC = *MagC;
3088	PosMagC.clearSign();
3089	return replaceOperand(I&: *II, OpNum: `0`, V: ConstantFP::get(Ty: Mag->getType(), V: PosMagC));
3090	}
3091
3092	// Peek through changes of magnitude's sign-bit. This call rewrites those:
3093	// copysign (fabs X), Sign --> copysign X, Sign
3094	// copysign (fneg X), Sign --> copysign X, Sign
3095	if (match(V: Mag, P: m_FAbs(Op0: m_Value(V&: X))) \|\| match(V: Mag, P: m_FNeg(X: m_Value(V&: X))))
3096	return replaceOperand(I&: *II, OpNum: `0`, V: X);
3097
3098	Type *SignEltTy = Sign->getType()->getScalarType();
3099
3100	Value *CastSrc;
3101	if (match(V: Sign,
3102	P: m_OneUse(SubPattern: m_ElementWiseBitCast(Op: m_OneUse(SubPattern: m_Value(V&: CastSrc))))) &&
3103	CastSrc->getType()->isIntOrIntVectorTy() &&
3104	APFloat::hasSignBitInMSB(SignEltTy->getFltSemantics())) {
3105	KnownBits Known(SignEltTy->getPrimitiveSizeInBits());
3106	if (SimplifyDemandedBits(I: cast<Instruction>(Val: Sign), Op: `0`,
3107	DemandedMask: APInt::getSignMask(BitWidth: Known.getBitWidth()), Known,
3108	Q: SQ))
3109	return II;
3110	}
3111
3112	break;
3113	}
3114	case Intrinsic::fabs: {
3115	Value Cond, TVal, *FVal;
3116	Value *Arg = II->getArgOperand(i: `0`);
3117	Value *X;
3118	// fabs (-X) --> fabs (X)
3119	if (match(V: Arg, P: m_FNeg(X: m_Value(V&: X)))) {
3120	CallInst *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: X, FMFSource: II);
3121	return replaceInstUsesWith(I&: CI, V: Fabs);
3122	}
3123
3124	if (match(V: Arg, P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: TVal), R: m_Value(V&: FVal)))) {
3125	// fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
3126	if (Arg->hasOneUse() ? (isa<Constant>(Val: TVal) \|\| isa<Constant>(Val: FVal))
3127	: (isa<Constant>(Val: TVal) && isa<Constant>(Val: FVal))) {
3128	CallInst *AbsT = Builder.CreateCall(Callee: II->getCalledFunction(), Args: {TVal});
3129	CallInst *AbsF = Builder.CreateCall(Callee: II->getCalledFunction(), Args: {FVal});
3130	SelectInst *SI = SelectInst::Create(C: Cond, S1: AbsT, S2: AbsF);
3131	SI->setFastMathFlags(II->getFastMathFlags() \|
3132	cast<SelectInst>(Val: Arg)->getFastMathFlags());
3133	// Can't copy nsz to select, as even with the nsz flag the fabs result
3134	// always has the sign bit unset.
3135	SI->setHasNoSignedZeros(false);
3136	return SI;
3137	}
3138	// fabs (select Cond, -FVal, FVal) --> fabs FVal
3139	if (match(V: TVal, P: m_FNeg(X: m_Specific(V: FVal))))
3140	return replaceOperand(I&: *II, OpNum: `0`, V: FVal);
3141	// fabs (select Cond, TVal, -TVal) --> fabs TVal
3142	if (match(V: FVal, P: m_FNeg(X: m_Specific(V: TVal))))
3143	return replaceOperand(I&: *II, OpNum: `0`, V: TVal);
3144	}
3145
3146	Value Magnitude, Sign;
3147	if (match(V: II->getArgOperand(i: `0`),
3148	P: m_CopySign(Op0: m_Value(V&: Magnitude), Op1: m_Value(V&: Sign)))) {
3149	// fabs (copysign x, y) -> (fabs x)
3150	CallInst *AbsSign =
3151	Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Magnitude, FMFSource: II);
3152	return replaceInstUsesWith(I&: *II, V: AbsSign);
3153	}
3154
3155	[[fallthrough]];
3156	}
3157	case Intrinsic::ceil:
3158	case Intrinsic::floor:
3159	case Intrinsic::round:
3160	case Intrinsic::roundeven:
3161	case Intrinsic::nearbyint:
3162	case Intrinsic::rint:
3163	case Intrinsic::trunc: {
3164	Value *ExtSrc;
3165	if (match(V: II->getArgOperand(i: `0`), P: m_OneUse(SubPattern: m_FPExt(Op: m_Value(V&: ExtSrc))))) {
3166	// Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
3167	Value *NarrowII = Builder.CreateUnaryIntrinsic(ID: IID, V: ExtSrc, FMFSource: II);
3168	return new FPExtInst (NarrowII, II->getType());
3169	}
3170	break;
3171	}
3172	case Intrinsic::cos:
3173	case Intrinsic::amdgcn_cos:
3174	case Intrinsic::cosh: {
3175	Value X, Sign;
3176	Value *Src = II->getArgOperand(i: `0`);
3177	if (match(V: Src, P: m_FNeg(X: m_Value(V&: X))) \|\| match(V: Src, P: m_FAbs(Op0: m_Value(V&: X))) \|\|
3178	match(V: Src, P: m_CopySign(Op0: m_Value(V&: X), Op1: m_Value(V&: Sign)))) {
3179	// f(-x) --> f(x)
3180	// f(fabs(x)) --> f(x)
3181	// f(copysign(x, y)) --> f(x)
3182	// for f in {cos, cosh}
3183	return replaceOperand(I&: *II, OpNum: `0`, V: X);
3184	}
3185	break;
3186	}
3187	case Intrinsic::sin:
3188	case Intrinsic::amdgcn_sin:
3189	case Intrinsic::sinh:
3190	case Intrinsic::tan:
3191	case Intrinsic::tanh: {
3192	Value *X;
3193	if (match(V: II->getArgOperand(i: `0`), P: m_OneUse(SubPattern: m_FNeg(X: m_Value(V&: X))))) {
3194	// f(-x) --> -f(x)
3195	// for f in {sin, sinh, tan, tanh}
3196	Value *NewFunc = Builder.CreateUnaryIntrinsic(ID: IID, V: X, FMFSource: II);
3197	return UnaryOperator::CreateFNegFMF(Op: NewFunc, FMFSource: II);
3198	}
3199	break;
3200	}
3201	case Intrinsic::ldexp: {
3202	// ldexp(ldexp(x, a), b) -> ldexp(x, a + b)
3203	//
3204	// The danger is if the first ldexp would overflow to infinity or underflow
3205	// to zero, but the combined exponent avoids it. We ignore this with
3206	// reassoc.
3207	//
3208	// It's also safe to fold if we know both exponents are >= 0 or <= 0 since
3209	// it would just double down on the overflow/underflow which would occur
3210	// anyway.
3211	//
3212	// TODO: Could do better if we had range tracking for the input value
3213	// exponent. Also could broaden sign check to cover == 0 case.
3214	Value *Src = II->getArgOperand(i: `0`);
3215	Value *Exp = II->getArgOperand(i: `1`);
3216
3217	uint64_t ConstExp;
3218	if (match(V: Exp, P: m_ConstantInt(V&: ConstExp))) {
3219	// ldexp(x, K) -> fmul x, 2^K
3220	const fltSemantics &FPTy =
3221	Src->getType()->getScalarType()->getFltSemantics();
3222
3223	APFloat Scaled = scalbn(X: APFloat::getOne(Sem: FPTy), Exp: static_cast<int>(ConstExp),
3224	RM: APFloat::rmNearestTiesToEven);
3225	if (!Scaled.isZero() && !Scaled.isInfinity()) {
3226	// Skip overflow and underflow cases.
3227	Constant *FPConst = ConstantFP::get(Ty: Src->getType(), V: Scaled);
3228	return BinaryOperator::CreateFMulFMF(V1: Src, V2: FPConst, FMFSource: II);
3229	}
3230	}
3231
3232	Value *InnerSrc;
3233	Value *InnerExp;
3234	if (match(V: Src, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ldexp>(
3235	Op0: m_Value(V&: InnerSrc), Op1: m_Value(V&: InnerExp)))) &&
3236	Exp->getType() == InnerExp->getType()) {
3237	FastMathFlags FMF = II->getFastMathFlags();
3238	FastMathFlags InnerFlags = cast<FPMathOperator>(Val: Src)->getFastMathFlags();
3239
3240	if ((FMF.allowReassoc() && InnerFlags.allowReassoc()) \|\|
3241	signBitMustBeTheSame(Op0: Exp, Op1: InnerExp, SQ: SQ.getWithInstruction(I: II))) {
3242	// TODO: Add nsw/nuw probably safe if integer type exceeds exponent
3243	// width.
3244	Value *NewExp = Builder.CreateAdd(LHS: InnerExp, RHS: Exp);
3245	II->setArgOperand(i: `1`, v: NewExp);
3246	II->setFastMathFlags(InnerFlags); // Or the inner flags.
3247	return replaceOperand(I&: *II, OpNum: `0`, V: InnerSrc);
3248	}
3249	}
3250
3251	// ldexp(x, zext(i1 y)) -> fmul x, (select y, 2.0, 1.0)
3252	// ldexp(x, sext(i1 y)) -> fmul x, (select y, 0.5, 1.0)
3253	Value *ExtSrc;
3254	if (match(V: Exp, P: m_ZExt(Op: m_Value(V&: ExtSrc))) &&
3255	ExtSrc->getType()->getScalarSizeInBits() == `1`) {
3256	Value *Select =
3257	Builder.CreateSelect(C: ExtSrc, True: ConstantFP::get(Ty: II->getType(), V: `2.0`),
3258	False: ConstantFP::get(Ty: II->getType(), V: `1.0`));
3259	return BinaryOperator::CreateFMulFMF(V1: Src, V2: Select, FMFSource: II);
3260	}
3261	if (match(V: Exp, P: m_SExt(Op: m_Value(V&: ExtSrc))) &&
3262	ExtSrc->getType()->getScalarSizeInBits() == `1`) {
3263	Value *Select =
3264	Builder.CreateSelect(C: ExtSrc, True: ConstantFP::get(Ty: II->getType(), V: `0.5`),
3265	False: ConstantFP::get(Ty: II->getType(), V: `1.0`));
3266	return BinaryOperator::CreateFMulFMF(V1: Src, V2: Select, FMFSource: II);
3267	}
3268
3269	// ldexp(x, c ? exp : 0) -> c ? ldexp(x, exp) : x
3270	// ldexp(x, c ? 0 : exp) -> c ? x : ldexp(x, exp)
3271	///
3272	// TODO: If we cared, should insert a canonicalize for x
3273	Value SelectCond, SelectLHS, *SelectRHS;
3274	if (match(V: II->getArgOperand(i: `1`),
3275	P: m_OneUse(SubPattern: m_Select(C: m_Value(V&: SelectCond), L: m_Value(V&: SelectLHS),
3276	R: m_Value(V&: SelectRHS))))) {
3277	Value NewLdexp = nullptr*;
3278	Value Select = nullptr*;
3279	if (match(V: SelectRHS, P: m_ZeroInt())) {
3280	NewLdexp = Builder.CreateLdexp(Src, Exp: SelectLHS, FMFSource: II);
3281	Select = Builder.CreateSelect(C: SelectCond, True: NewLdexp, False: Src);
3282	} else if (match(V: SelectLHS, P: m_ZeroInt())) {
3283	NewLdexp = Builder.CreateLdexp(Src, Exp: SelectRHS, FMFSource: II);
3284	Select = Builder.CreateSelect(C: SelectCond, True: Src, False: NewLdexp);
3285	}
3286
3287	if (NewLdexp) {
3288	Select->takeName(V: II);
3289	return replaceInstUsesWith(I&: *II, V: Select);
3290	}
3291	}
3292
3293	break;
3294	}
3295	case Intrinsic::ptrauth_auth:
3296	case Intrinsic::ptrauth_resign: {
3297	// We don't support this optimization on intrinsic calls with deactivation
3298	// symbols, which are represented using operand bundles.
3299	if (II->hasOperandBundles())
3300	break;
3301
3302	// (sign\|resign) + (auth\|resign) can be folded by omitting the middle
3303	// sign+auth component if the key and discriminator match.
3304	bool NeedSign = II->getIntrinsicID() == Intrinsic::ptrauth_resign;
3305	Value *Ptr = II->getArgOperand(i: `0`);
3306	Value *Key = II->getArgOperand(i: `1`);
3307	Value *Disc = II->getArgOperand(i: `2`);
3308
3309	// AuthKey will be the key we need to end up authenticating against in
3310	// whatever we replace this sequence with.
3311	Value AuthKey = nullptr, AuthDisc = nullptr, *BasePtr;
3312	if (const auto *CI = dyn_cast<CallBase>(Val: Ptr)) {
3313	// We don't support this optimization on intrinsic calls with deactivation
3314	// symbols, which are represented using operand bundles.
3315	if (CI->hasOperandBundles())
3316	break;
3317
3318	BasePtr = CI->getArgOperand(i: `0`);
3319	if (CI->getIntrinsicID() == Intrinsic::ptrauth_sign) {
3320	if (CI->getArgOperand(i: `1`) != Key \|\| CI->getArgOperand(i: `2`) != Disc)
3321	break;
3322	} else if (CI->getIntrinsicID() == Intrinsic::ptrauth_resign) {
3323	if (CI->getArgOperand(i: `3`) != Key \|\| CI->getArgOperand(i: `4`) != Disc)
3324	break;
3325	AuthKey = CI->getArgOperand(i: `1`);
3326	AuthDisc = CI->getArgOperand(i: `2`);
3327	} else
3328	break;
3329	} else if (const auto *PtrToInt = dyn_cast<PtrToIntOperator>(Val: Ptr)) {
3330	// ptrauth constants are equivalent to a call to @llvm.ptrauth.sign for
3331	// our purposes, so check for that too.
3332	const auto *CPA = dyn_cast<ConstantPtrAuth>(Val: PtrToInt->getOperand(i_nocapture: `0`));
3333	if (!CPA \|\| !CPA->isKnownCompatibleWith(Key, Discriminator: Disc, DL))
3334	break;
3335
3336	// resign(ptrauth(p,ks,ds),ks,ds,kr,dr) -> ptrauth(p,kr,dr)
3337	if (NeedSign && isa<ConstantInt>(Val: II->getArgOperand(i: `4`))) {
3338	auto *SignKey = cast<ConstantInt>(Val: II->getArgOperand(i: `3`));
3339	auto *SignDisc = cast<ConstantInt>(Val: II->getArgOperand(i: `4`));
3340	auto *Null = ConstantPointerNull::get(T: Builder.getPtrTy());
3341	auto *NewCPA = ConstantPtrAuth::get(Ptr: CPA->getPointer(), Key: SignKey,
3342	Disc: SignDisc, /AddrDisc=/Null,
3343	/DeactivationSymbol=/Null);
3344	replaceInstUsesWith(
3345	I&: *II, V: ConstantExpr::getPointerCast(C: NewCPA, Ty: II->getType()));
3346	return eraseInstFromFunction(I&: *II);
3347	}
3348
3349	// auth(ptrauth(p,k,d),k,d) -> p
3350	BasePtr = Builder.CreatePtrToInt(V: CPA->getPointer(), DestTy: II->getType());
3351	} else
3352	break;
3353
3354	unsigned NewIntrin;
3355	if (AuthKey && NeedSign) {
3356	// resign(0,1) + resign(1,2) = resign(0, 2)
3357	NewIntrin = Intrinsic::ptrauth_resign;
3358	} else if (AuthKey) {
3359	// resign(0,1) + auth(1) = auth(0)
3360	NewIntrin = Intrinsic::ptrauth_auth;
3361	} else if (NeedSign) {
3362	// sign(0) + resign(0, 1) = sign(1)
3363	NewIntrin = Intrinsic::ptrauth_sign;
3364	} else {
3365	// sign(0) + auth(0) = nop
3366	replaceInstUsesWith(I&: *II, V: BasePtr);
3367	return eraseInstFromFunction(I&: *II);
3368	}
3369
3370	SmallVector<Value *, `4`> CallArgs;
3371	CallArgs.push_back(Elt: BasePtr);
3372	if (AuthKey) {
3373	CallArgs.push_back(Elt: AuthKey);
3374	CallArgs.push_back(Elt: AuthDisc);
3375	}
3376
3377	if (NeedSign) {
3378	CallArgs.push_back(Elt: II->getArgOperand(i: `3`));
3379	CallArgs.push_back(Elt: II->getArgOperand(i: `4`));
3380	}
3381
3382	Function *NewFn =
3383	Intrinsic::getOrInsertDeclaration(M: II->getModule(), id: NewIntrin);
3384	return CallInst::Create(Func: NewFn, Args: CallArgs);
3385	}
3386	case Intrinsic::arm_neon_vtbl1:
3387	case Intrinsic::arm_neon_vtbl2:
3388	case Intrinsic::arm_neon_vtbl3:
3389	case Intrinsic::arm_neon_vtbl4:
3390	case Intrinsic::aarch64_neon_tbl1:
3391	case Intrinsic::aarch64_neon_tbl2:
3392	case Intrinsic::aarch64_neon_tbl3:
3393	case Intrinsic::aarch64_neon_tbl4:
3394	return simplifyNeonTbl(II&: II, IC&: this, /IsExtension=/false);
3395	case Intrinsic::arm_neon_vtbx1:
3396	case Intrinsic::arm_neon_vtbx2:
3397	case Intrinsic::arm_neon_vtbx3:
3398	case Intrinsic::arm_neon_vtbx4:
3399	case Intrinsic::aarch64_neon_tbx1:
3400	case Intrinsic::aarch64_neon_tbx2:
3401	case Intrinsic::aarch64_neon_tbx3:
3402	case Intrinsic::aarch64_neon_tbx4:
3403	return simplifyNeonTbl(II&: II, IC&: this, /IsExtension=/true);
3404
3405	case Intrinsic::arm_neon_vmulls:
3406	case Intrinsic::arm_neon_vmullu:
3407	case Intrinsic::aarch64_neon_smull:
3408	case Intrinsic::aarch64_neon_umull: {
3409	Value *Arg0 = II->getArgOperand(i: `0`);
3410	Value *Arg1 = II->getArgOperand(i: `1`);
3411
3412	// Handle mul by zero first:
3413	if (isa<ConstantAggregateZero>(Val: Arg0) \|\| isa<ConstantAggregateZero>(Val: Arg1)) {
3414	return replaceInstUsesWith(I&: CI, V: ConstantAggregateZero::get(Ty: II->getType()));
3415	}
3416
3417	// Check for constant LHS & RHS - in this case we just simplify.
3418	bool Zext = (IID == Intrinsic::arm_neon_vmullu \|\|
3419	IID == Intrinsic::aarch64_neon_umull);
3420	VectorType *NewVT = cast<VectorType>(Val: II->getType());
3421	if (Constant *CV0 = dyn_cast<Constant>(Val: Arg0)) {
3422	if (Constant *CV1 = dyn_cast<Constant>(Val: Arg1)) {
3423	Value V0 = Builder.CreateIntCast(V: CV0, DestTy: NewVT, /isSigned=/*!Zext);
3424	Value V1 = Builder.CreateIntCast(V: CV1, DestTy: NewVT, /isSigned=/*!Zext);
3425	return replaceInstUsesWith(I&: CI, V: Builder.CreateMul(LHS: V0, RHS: V1));
3426	}
3427
3428	// Couldn't simplify - canonicalize constant to the RHS.
3429	std::swap(a&: Arg0, b&: Arg1);
3430	}
3431
3432	// Handle mul by one:
3433	if (Constant *CV1 = dyn_cast<Constant>(Val: Arg1))
3434	if (ConstantInt *Splat =
3435	dyn_cast_or_null<ConstantInt>(Val: CV1->getSplatValue()))
3436	if (Splat->isOne())
3437	return CastInst::CreateIntegerCast(S: Arg0, Ty: II->getType(),
3438	/isSigned=/!Zext);
3439
3440	break;
3441	}
3442	case Intrinsic::arm_neon_aesd:
3443	case Intrinsic::arm_neon_aese:
3444	case Intrinsic::aarch64_crypto_aesd:
3445	case Intrinsic::aarch64_crypto_aese:
3446	case Intrinsic::aarch64_sve_aesd:
3447	case Intrinsic::aarch64_sve_aese: {
3448	Value *DataArg = II->getArgOperand(i: `0`);
3449	Value *KeyArg = II->getArgOperand(i: `1`);
3450
3451	// Accept zero on either operand.
3452	if (!match(V: KeyArg, P: m_ZeroInt()))
3453	std::swap(a&: KeyArg, b&: DataArg);
3454
3455	// Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
3456	Value Data, Key;
3457	if (match(V: KeyArg, P: m_ZeroInt()) &&
3458	match(V: DataArg, P: m_Xor(L: m_Value(V&: Data), R: m_Value(V&: Key)))) {
3459	replaceOperand(I&: *II, OpNum: `0`, V: Data);
3460	replaceOperand(I&: *II, OpNum: `1`, V: Key);
3461	return II;
3462	}
3463	break;
3464	}
3465	case Intrinsic::arm_neon_vshifts:
3466	case Intrinsic::arm_neon_vshiftu:
3467	case Intrinsic::aarch64_neon_sshl:
3468	case Intrinsic::aarch64_neon_ushl:
3469	return foldNeonShift(II, IC&: *this);
3470	case Intrinsic::hexagon_V6_vandvrt:
3471	case Intrinsic::hexagon_V6_vandvrt_128B: {
3472	// Simplify Q -> V -> Q conversion.
3473	if (auto Op0 = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: `0`))) {
3474	Intrinsic::ID ID0 = Op0->getIntrinsicID();
3475	if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
3476	ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
3477	break;
3478	Value Bytes = Op0->getArgOperand(i: `1`), Mask = II->getArgOperand(i: `1`);
3479	uint64_t Bytes1 = computeKnownBits(V: Bytes, CxtI: Op0).One.getZExtValue();
3480	uint64_t Mask1 = computeKnownBits(V: Mask, CxtI: II).One.getZExtValue();
3481	// Check if every byte has common bits in Bytes and Mask.
3482	uint64_t C = Bytes1 & Mask1;
3483	if ((C & `0xFF`) && (C & `0xFF00`) && (C & `0xFF0000`) && (C & `0xFF000000`))
3484	return replaceInstUsesWith(I&: *II, V: Op0->getArgOperand(i: `0`));
3485	}
3486	break;
3487	}
3488	case Intrinsic::stackrestore: {
3489	enum class ClassifyResult {
3490	None,
3491	Alloca,
3492	StackRestore,
3493	CallWithSideEffects,
3494	};
3495	auto Classify = [](const Instruction *I) {
3496	if (isa<AllocaInst>(Val: I))
3497	return ClassifyResult::Alloca;
3498
3499	if (auto *CI = dyn_cast<CallInst>(Val: I)) {
3500	if (auto *II = dyn_cast<IntrinsicInst>(Val: CI)) {
3501	if (II->getIntrinsicID() == Intrinsic::stackrestore)
3502	return ClassifyResult::StackRestore;
3503
3504	if (II->mayHaveSideEffects())
3505	return ClassifyResult::CallWithSideEffects;
3506	} else {
3507	// Consider all non-intrinsic calls to be side effects
3508	return ClassifyResult::CallWithSideEffects;
3509	}
3510	}
3511
3512	return ClassifyResult::None;
3513	};
3514
3515	// If the stacksave and the stackrestore are in the same BB, and there is
3516	// no intervening call, alloca, or stackrestore of a different stacksave,
3517	// remove the restore. This can happen when variable allocas are DCE'd.
3518	if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: `0`))) {
3519	if (SS->getIntrinsicID() == Intrinsic::stacksave &&
3520	SS->getParent() == II->getParent()) {
3521	BasicBlock::iterator BI(SS);
3522	bool CannotRemove = false;
3523	for (++BI; &*BI != II; ++BI) {
3524	switch (Classify (&*BI)) {
3525	case ClassifyResult::None:
3526	// So far so good, look at next instructions.
3527	break;
3528
3529	case ClassifyResult::StackRestore:
3530	// If we found an intervening stackrestore for a different
3531	// stacksave, we can't remove the stackrestore. Otherwise, continue.
3532	if (cast<IntrinsicInst>(Val&: *BI).getArgOperand(i: `0`) != SS)
3533	CannotRemove = true;
3534	break;
3535
3536	case ClassifyResult::Alloca:
3537	case ClassifyResult::CallWithSideEffects:
3538	// If we found an alloca, a non-intrinsic call, or an intrinsic
3539	// call with side effects, we can't remove the stackrestore.
3540	CannotRemove = true;
3541	break;
3542	}
3543	if (CannotRemove)
3544	break;
3545	}
3546
3547	if (!CannotRemove)
3548	return eraseInstFromFunction(I&: CI);
3549	}
3550	}
3551
3552	// Scan down this block to see if there is another stack restore in the
3553	// same block without an intervening call/alloca.
3554	BasicBlock::iterator BI(II);
3555	Instruction *TI = II->getParent()->getTerminator();
3556	bool CannotRemove = false;
3557	for (++BI; &*BI != TI; ++BI) {
3558	switch (Classify (&*BI)) {
3559	case ClassifyResult::None:
3560	// So far so good, look at next instructions.
3561	break;
3562
3563	case ClassifyResult::StackRestore:
3564	// If there is a stackrestore below this one, remove this one.
3565	return eraseInstFromFunction(I&: CI);
3566
3567	case ClassifyResult::Alloca:
3568	case ClassifyResult::CallWithSideEffects:
3569	// If we found an alloca, a non-intrinsic call, or an intrinsic call
3570	// with side effects (such as llvm.stacksave and llvm.read_register),
3571	// we can't remove the stack restore.
3572	CannotRemove = true;
3573	break;
3574	}
3575	if (CannotRemove)
3576	break;
3577	}
3578
3579	// If the stack restore is in a return, resume, or unwind block and if there
3580	// are no allocas or calls between the restore and the return, nuke the
3581	// restore.
3582	if (!CannotRemove && (isa<ReturnInst>(Val: TI) \|\| isa<ResumeInst>(Val: TI)))
3583	return eraseInstFromFunction(I&: CI);
3584	break;
3585	}
3586	case Intrinsic::lifetime_end:
3587	// Asan needs to poison memory to detect invalid access which is possible
3588	// even for empty lifetime range.
3589	if (II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeAddress) \|\|
3590	II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeMemory) \|\|
3591	II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeHWAddress) \|\|
3592	II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeMemTag))
3593	break;
3594
3595	if (removeTriviallyEmptyRange(EndI&: II, IC&: this, IsStart: [](const IntrinsicInst &I) {
3596	return I.getIntrinsicID() == Intrinsic::lifetime_start;
3597	}))
3598	return nullptr;
3599	break;
3600	case Intrinsic::assume: {
3601	Value *IIOperand = II->getArgOperand(i: `0`);
3602	SmallVector<OperandBundleDef, `4`> OpBundles;
3603	II->getOperandBundlesAsDefs(Defs&: OpBundles);
3604
3605	/// This will remove the boolean Condition from the assume given as
3606	/// argument and remove the assume if it becomes useless.
3607	/// always returns nullptr for use as a return values.
3608	auto RemoveConditionFromAssume = [&](Instruction Assume) -> Instruction {
3609	assert(isa<AssumeInst>(Assume));
3610	if (isAssumeWithEmptyBundle(Assume: *cast<AssumeInst>(Val: II)))
3611	return eraseInstFromFunction(I&: CI);
3612	replaceUse(U&: II->getOperandUse(i: `0`), NewValue: ConstantInt::getTrue(Context&: II->getContext()));
3613	return nullptr;
3614	};
3615	// Remove an assume if it is followed by an identical assume.
3616	// TODO: Do we need this? Unless there are conflicting assumptions, the
3617	// computeKnownBits(IIOperand) below here eliminates redundant assumes.
3618	Instruction *Next = II->getNextNode();
3619	if (match(V: Next, P: m_Intrinsic<Intrinsic::assume>(Op0: m_Specific(V: IIOperand))))
3620	return RemoveConditionFromAssume (Next);
3621
3622	// Canonicalize assume(a && b) -> assume(a); assume(b);
3623	// Note: New assumption intrinsics created here are registered by
3624	// the InstCombineIRInserter object.
3625	FunctionType *AssumeIntrinsicTy = II->getFunctionType();
3626	Value *AssumeIntrinsic = II->getCalledOperand();
3627	Value A, B;
3628	if (match(V: IIOperand, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3629	Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic, Args: A, OpBundles,
3630	Name: II->getName());
3631	Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic, Args: B, Name: II->getName());
3632	return eraseInstFromFunction(I&: *II);
3633	}
3634	// assume(!(a \|\| b)) -> assume(!a); assume(!b);
3635	if (match(V: IIOperand, P: m_Not(V: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B))))) {
3636	Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic,
3637	Args: Builder.CreateNot(V: A), OpBundles, Name: II->getName());
3638	Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic,
3639	Args: Builder.CreateNot(V: B), Name: II->getName());
3640	return eraseInstFromFunction(I&: *II);
3641	}
3642
3643	for (unsigned Idx = `0`; Idx < II->getNumOperandBundles(); Idx++) {
3644	OperandBundleUse OBU = II->getOperandBundleAt(Index: Idx);
3645
3646	// Separate storage assumptions apply to the underlying allocations, not
3647	// any particular pointer within them. When evaluating the hints for AA
3648	// purposes we getUnderlyingObject them; by precomputing the answers here
3649	// we can avoid having to do so repeatedly there.
3650	if (OBU.getTagName() == "separate_storage") {
3651	assert(OBU.Inputs.size() == `2`);
3652	auto MaybeSimplifyHint = [&](const Use &U) {
3653	Value *Hint = U.get();
3654	// Not having a limit is safe because InstCombine removes unreachable
3655	// code.
3656	Value UnderlyingObject = getUnderlyingObject(V: Hint, /MaxLookup/* `0`);
3657	if (Hint != UnderlyingObject)
3658	replaceUse(U&: const_cast<Use &>(U), NewValue: UnderlyingObject);
3659	};
3660	MaybeSimplifyHint (OBU.Inputs [`0`]);
3661	MaybeSimplifyHint (OBU.Inputs [`1`]);
3662	}
3663
3664	// Try to remove redundant alignment assumptions.
3665	if (OBU.getTagName() == "align" && OBU.Inputs.size() == `2`) {
3666	RetainedKnowledge RK = getKnowledgeFromOperandInAssume(
3667	Assume&: *cast<AssumeInst>(Val: II), Idx: II->arg_size() + Idx);
3668	if (!RK \|\| RK.AttrKind != Attribute::Alignment \|\|
3669	!isPowerOf2_64(Value: RK.ArgValue) \|\| !isa<ConstantInt>(Val: RK.IRArgValue))
3670	continue;
3671
3672	// Remove align 1 bundles; they don't add any useful information.
3673	if (RK.ArgValue == `1`)
3674	return CallBase::removeOperandBundle(CB: II, ID: OBU.getTagID());
3675
3676	// Don't try to remove align assumptions for pointers derived from
3677	// arguments. We might lose information if the function gets inline and
3678	// the align argument attribute disappears.
3679	Value *UO = getUnderlyingObject(V: RK.WasOn);
3680	if (!UO \|\| isa<Argument>(Val: UO))
3681	continue;
3682
3683	// Compute known bits for the pointer, passing nullptr as context to
3684	// avoid computeKnownBits using the assumption we are about to remove
3685	// for reasoning.
3686	KnownBits Known = computeKnownBits(V: RK.WasOn, /CtxI=/CxtI: nullptr);
3687	unsigned TZ = std::min(a: Known.countMinTrailingZeros(),
3688	b: Value::MaxAlignmentExponent);
3689	if ((`1ULL` << TZ) < RK.ArgValue)
3690	continue;
3691	return CallBase::removeOperandBundle(CB: II, ID: OBU.getTagID());
3692	}
3693
3694	if (OBU.getTagName() == "nonnull" && OBU.Inputs.size() == `1`) {
3695	RetainedKnowledge RK = getKnowledgeFromOperandInAssume(
3696	Assume&: *cast<AssumeInst>(Val: II), Idx: II->arg_size() + Idx);
3697	if (!RK \|\| RK.AttrKind != Attribute::NonNull)
3698	continue;
3699
3700	// Drop assume if we can prove nonnull without it
3701	if (isKnownNonZero(V: RK.WasOn, Q: getSimplifyQuery().getWithInstruction(I: II)))
3702	return CallBase::removeOperandBundle(CB: II, ID: OBU.getTagID());
3703
3704	// Fold the assume into metadata if it's valid at the load
3705	if (auto *LI = dyn_cast<LoadInst>(Val: RK.WasOn);
3706	LI &&
3707	isValidAssumeForContext(I: II, CxtI: LI, DT: &DT, /AllowEphemerals=/true)) {
3708	MDNode *MD = MDNode::get(Context&: II->getContext(), MDs: {});
3709	LI->setMetadata(KindID: LLVMContext::MD_nonnull, Node: MD);
3710	LI->setMetadata(KindID: LLVMContext::MD_noundef, Node: MD);
3711	return CallBase::removeOperandBundle(CB: II, ID: OBU.getTagID());
3712	}
3713
3714	// TODO: apply nonnull return attributes to calls and invokes
3715	}
3716	}
3717
3718	// Convert nonnull assume like:
3719	// %A = icmp ne i32 %PTR, null*
3720	// call void @llvm.assume(i1 %A)
3721	// into
3722	// call void @llvm.assume(i1 true) [ "nonnull"(i32 %PTR) ]*
3723	if (match(V: IIOperand,
3724	P: m_SpecificICmp(MatchPred: ICmpInst::ICMP_NE, L: m_Value(V&: A), R: m_Zero())) &&
3725	A->getType()->isPointerTy()) {
3726	if (auto *Replacement = buildAssumeFromKnowledge(
3727	Knowledge: {RetainedKnowledge {Attribute::NonNull, `0`, A}}, CtxI: Next, AC: &AC, DT: &DT)) {
3728
3729	InsertNewInstBefore(New: Replacement, Old: Next->getIterator());
3730	AC.registerAssumption(CI: Replacement);
3731	return RemoveConditionFromAssume (II);
3732	}
3733	}
3734
3735	// Convert alignment assume like:
3736	// %B = ptrtoint i32 %A to i64*
3737	// %C = and i64 %B, Constant
3738	// %D = icmp eq i64 %C, 0
3739	// call void @llvm.assume(i1 %D)
3740	// into
3741	// call void @llvm.assume(i1 true) [ "align"(i32 [[A]], i64 Constant + 1)]*
3742	uint64_t AlignMask = `1`;
3743	if ((match(V: IIOperand, P: m_Not(V: m_Trunc(Op: m_Value(V&: A)))) \|\|
3744	match(V: IIOperand,
3745	P: m_SpecificICmp(MatchPred: ICmpInst::ICMP_EQ,
3746	L: m_And(L: m_Value(V&: A), R: m_ConstantInt(V&: AlignMask)),
3747	R: m_Zero())))) {
3748	if (isPowerOf2_64(Value: AlignMask + `1`)) {
3749	uint64_t Offset = `0`;
3750	match(V: A, P: m_Add(L: m_Value(V&: A), R: m_ConstantInt(V&: Offset)));
3751	if (match(V: A, P: m_PtrToIntOrAddr(Op: m_Value(V&: A)))) {
3752	/// Note: this doesn't preserve the offset information but merges
3753	/// offset and alignment.
3754	/// TODO: we can generate a GEP instead of merging the alignment with
3755	/// the offset.
3756	RetainedKnowledge RK{Attribute::Alignment,
3757	MinAlign(A: Offset, B: AlignMask + `1`), A};
3758	if (auto *Replacement =
3759	buildAssumeFromKnowledge(Knowledge: RK, CtxI: Next, AC: &AC, DT: &DT)) {
3760
3761	Replacement->insertAfter(InsertPos: II->getIterator());
3762	AC.registerAssumption(CI: Replacement);
3763	}
3764	return RemoveConditionFromAssume (II);
3765	}
3766	}
3767	}
3768
3769	/// Canonicalize Knowledge in operand bundles.
3770	if (EnableKnowledgeRetention && II->hasOperandBundles()) {
3771	for (unsigned Idx = `0`; Idx < II->getNumOperandBundles(); Idx++) {
3772	auto &BOI = II->bundle_op_info_begin()[Idx];
3773	RetainedKnowledge RK =
3774	llvm::getKnowledgeFromBundle(Assume&: cast<AssumeInst>(Val&: *II), BOI);
3775	if (BOI.End - BOI.Begin > `2`)
3776	continue; // Prevent reducing knowledge in an align with offset since
3777	// extracting a RetainedKnowledge from them looses offset
3778	// information
3779	RetainedKnowledge CanonRK =
3780	llvm::simplifyRetainedKnowledge(Assume: cast<AssumeInst>(Val: II), RK,
3781	AC: &getAssumptionCache(),
3782	DT: &getDominatorTree());
3783	if (CanonRK == RK)
3784	continue;
3785	if (!CanonRK) {
3786	if (BOI.End - BOI.Begin > `0`) {
3787	Worklist.pushValue(V: II->op_begin()[BOI.Begin]);
3788	Value::dropDroppableUse(U&: II->op_begin()[BOI.Begin]);
3789	}
3790	continue;
3791	}
3792	assert(RK.AttrKind == CanonRK.AttrKind);
3793	if (BOI.End - BOI.Begin > `0`)
3794	II->op_begin()[BOI.Begin].set(CanonRK.WasOn);
3795	if (BOI.End - BOI.Begin > `1`)
3796	II->op_begin()[BOI.Begin + `1`].set(ConstantInt::get(
3797	Ty: Type::getInt64Ty(C&: II->getContext()), V: CanonRK.ArgValue));
3798	if (RK.WasOn)
3799	Worklist.pushValue(V: RK.WasOn);
3800	return II;
3801	}
3802	}
3803
3804	// If there is a dominating assume with the same condition as this one,
3805	// then this one is redundant, and should be removed.
3806	KnownBits Known(`1`);
3807	computeKnownBits(V: IIOperand, Known, CxtI: II);
3808	if (Known.isAllOnes() && isAssumeWithEmptyBundle(Assume: cast<AssumeInst>(Val&: *II)))
3809	return eraseInstFromFunction(I&: *II);
3810
3811	// assume(false) is unreachable.
3812	if (match(V: IIOperand, P: m_CombineOr(L: m_Zero(), R: m_Undef()))) {
3813	CreateNonTerminatorUnreachable(InsertAt: II);
3814	return eraseInstFromFunction(I&: *II);
3815	}
3816
3817	// Update the cache of affected values for this assumption (we might be
3818	// here because we just simplified the condition).
3819	AC.updateAffectedValues(CI: cast<AssumeInst>(Val: II));
3820	break;
3821	}
3822	case Intrinsic::experimental_guard: {
3823	// Is this guard followed by another guard? We scan forward over a small
3824	// fixed window of instructions to handle common cases with conditions
3825	// computed between guards.
3826	Instruction *NextInst = II->getNextNode();
3827	for (unsigned i = `0`; i < GuardWideningWindow; i++) {
3828	// Note: Using context-free form to avoid compile time blow up
3829	if (!isSafeToSpeculativelyExecute(I: NextInst))
3830	break;
3831	NextInst = NextInst->getNextNode();
3832	}
3833	Value NextCond = nullptr*;
3834	if (match(V: NextInst,
3835	P: m_Intrinsic<Intrinsic::experimental_guard>(Op0: m_Value(V&: NextCond)))) {
3836	Value *CurrCond = II->getArgOperand(i: `0`);
3837
3838	// Remove a guard that it is immediately preceded by an identical guard.
3839	// Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3840	if (CurrCond != NextCond) {
3841	Instruction *MoveI = II->getNextNode();
3842	while (MoveI != NextInst) {
3843	auto *Temp = MoveI;
3844	MoveI = MoveI->getNextNode();
3845	Temp->moveBefore(InsertPos: II->getIterator());
3846	}
3847	replaceOperand(I&: *II, OpNum: `0`, V: Builder.CreateAnd(LHS: CurrCond, RHS: NextCond));
3848	}
3849	eraseInstFromFunction(I&: *NextInst);
3850	return II;
3851	}
3852	break;
3853	}
3854	case Intrinsic::vector_insert: {
3855	Value *Vec = II->getArgOperand(i: `0`);
3856	Value *SubVec = II->getArgOperand(i: `1`);
3857	Value *Idx = II->getArgOperand(i: `2`);
3858	auto *DstTy = dyn_cast<FixedVectorType>(Val: II->getType());
3859	auto *VecTy = dyn_cast<FixedVectorType>(Val: Vec->getType());
3860	auto *SubVecTy = dyn_cast<FixedVectorType>(Val: SubVec->getType());
3861
3862	// Only canonicalize if the destination vector, Vec, and SubVec are all
3863	// fixed vectors.
3864	if (DstTy && VecTy && SubVecTy) {
3865	unsigned DstNumElts = DstTy->getNumElements();
3866	unsigned VecNumElts = VecTy->getNumElements();
3867	unsigned SubVecNumElts = SubVecTy->getNumElements();
3868	unsigned IdxN = cast<ConstantInt>(Val: Idx)->getZExtValue();
3869
3870	// An insert that entirely overwrites Vec with SubVec is a nop.
3871	if (VecNumElts == SubVecNumElts)
3872	return replaceInstUsesWith(I&: CI, V: SubVec);
3873
3874	// Widen SubVec into a vector of the same width as Vec, since
3875	// shufflevector requires the two input vectors to be the same width.
3876	// Elements beyond the bounds of SubVec within the widened vector are
3877	// undefined.
3878	SmallVector<int, `8`> WidenMask;
3879	unsigned i;
3880	for (i = `0`; i != SubVecNumElts; ++i)
3881	WidenMask.push_back(Elt: i);
3882	for (; i != VecNumElts; ++i)
3883	WidenMask.push_back(Elt: PoisonMaskElem);
3884
3885	Value *WidenShuffle = Builder.CreateShuffleVector(V: SubVec, Mask: WidenMask);
3886
3887	SmallVector<int, `8`> Mask;
3888	for (unsigned i = `0`; i != IdxN; ++i)
3889	Mask.push_back(Elt: i);
3890	for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
3891	Mask.push_back(Elt: i);
3892	for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
3893	Mask.push_back(Elt: i);
3894
3895	Value *Shuffle = Builder.CreateShuffleVector(V1: Vec, V2: WidenShuffle, Mask);
3896	return replaceInstUsesWith(I&: CI, V: Shuffle);
3897	}
3898	break;
3899	}
3900	case Intrinsic::vector_extract: {
3901	Value *Vec = II->getArgOperand(i: `0`);
3902	Value *Idx = II->getArgOperand(i: `1`);
3903
3904	Type *ReturnType = II->getType();
3905	// (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
3906	// ExtractIdx)
3907	unsigned ExtractIdx = cast<ConstantInt>(Val: Idx)->getZExtValue();
3908	Value InsertTuple, InsertIdx, *InsertValue;
3909	if (match(V: Vec, P: m_Intrinsic<Intrinsic::vector_insert>(Op0: m_Value(V&: InsertTuple),
3910	Op1: m_Value(V&: InsertValue),
3911	Op2: m_Value(V&: InsertIdx))) &&
3912	InsertValue->getType() == ReturnType) {
3913	unsigned Index = cast<ConstantInt>(Val: InsertIdx)->getZExtValue();
3914	// Case where we get the same index right after setting it.
3915	// extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
3916	// InsertValue
3917	if (ExtractIdx == Index)
3918	return replaceInstUsesWith(I&: CI, V: InsertValue);
3919	// If we are getting a different index than what was set in the
3920	// insert.vector intrinsic. We can just set the input tuple to the one up
3921	// in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
3922	// InsertIndex), ExtractIndex)
3923	// --> extract.vector(InsertTuple, ExtractIndex)
3924	else
3925	return replaceOperand(I&: CI, OpNum: `0`, V: InsertTuple);
3926	}
3927
3928	ConstantInt *ALMUpperBound;
3929	if (match(V: Vec, P: m_Intrinsic<Intrinsic::get_active_lane_mask>(
3930	Op0: m_Value(), Op1: m_ConstantInt(CI&: ALMUpperBound)))) {
3931	const auto &Attrs = II->getFunction()->getAttributes().getFnAttrs();
3932	unsigned VScaleMin = Attrs.getVScaleRangeMin();
3933	unsigned ScaleFactor =
3934	cast<VectorType>(Val: ReturnType)->isScalableTy() ? VScaleMin : `1`;
3935	if (ExtractIdx * ScaleFactor >= ALMUpperBound->getZExtValue())
3936	return replaceInstUsesWith(I&: CI,
3937	V: ConstantVector::getNullValue(Ty: ReturnType));
3938	}
3939
3940	auto *DstTy = dyn_cast<VectorType>(Val: ReturnType);
3941	auto *VecTy = dyn_cast<VectorType>(Val: Vec->getType());
3942
3943	if (DstTy && VecTy) {
3944	auto DstEltCnt = DstTy->getElementCount();
3945	auto VecEltCnt = VecTy->getElementCount();
3946	unsigned IdxN = cast<ConstantInt>(Val: Idx)->getZExtValue();
3947
3948	// Extracting the entirety of Vec is a nop.
3949	if (DstEltCnt == VecTy->getElementCount()) {
3950	replaceInstUsesWith(I&: CI, V: Vec);
3951	return eraseInstFromFunction(I&: CI);
3952	}
3953
3954	// Only canonicalize to shufflevector if the destination vector and
3955	// Vec are fixed vectors.
3956	if (VecEltCnt.isScalable() \|\| DstEltCnt.isScalable())
3957	break;
3958
3959	SmallVector<int, `8`> Mask;
3960	for (unsigned i = `0`; i != DstEltCnt.getKnownMinValue(); ++i)
3961	Mask.push_back(Elt: IdxN + i);
3962
3963	Value *Shuffle = Builder.CreateShuffleVector(V: Vec, Mask);
3964	return replaceInstUsesWith(I&: CI, V: Shuffle);
3965	}
3966	break;
3967	}
3968	case Intrinsic::experimental_vp_reverse: {
3969	Value *X;
3970	Value *Vec = II->getArgOperand(i: `0`);
3971	Value *Mask = II->getArgOperand(i: `1`);
3972	if (!match(V: Mask, P: m_AllOnes()))
3973	break;
3974	Value *EVL = II->getArgOperand(i: `2`);
3975	// TODO: Canonicalize experimental.vp.reverse after unop/binops?
3976	// rev(unop rev(X)) --> unop X
3977	if (match(V: Vec,
3978	P: m_OneUse(SubPattern: m_UnOp(X: m_Intrinsic<Intrinsic::experimental_vp_reverse>(
3979	Op0: m_Value(V&: X), Op1: m_AllOnes(), Op2: m_Specific(V: EVL)))))) {
3980	auto *OldUnOp = cast<UnaryOperator>(Val: Vec);
3981	auto *NewUnOp = UnaryOperator::CreateWithCopiedFlags(
3982	Opc: OldUnOp->getOpcode(), V: X, CopyO: OldUnOp, Name: OldUnOp->getName(),
3983	InsertBefore: II->getIterator());
3984	return replaceInstUsesWith(I&: CI, V: NewUnOp);
3985	}
3986	break;
3987	}
3988	case Intrinsic::vector_reduce_or:
3989	case Intrinsic::vector_reduce_and: {
3990	// Canonicalize logical or/and reductions:
3991	// Or reduction for i1 is represented as:
3992	// %val = bitcast <ReduxWidth x i1> to iReduxWidth
3993	// %res = cmp ne iReduxWidth %val, 0
3994	// And reduction for i1 is represented as:
3995	// %val = bitcast <ReduxWidth x i1> to iReduxWidth
3996	// %res = cmp eq iReduxWidth %val, 11111
3997	Value *Arg = II->getArgOperand(i: `0`);
3998	Value *Vect;
3999
4000	if (Value *NewOp =
4001	simplifyReductionOperand(Arg, /CanReorderLanes=/true)) {
4002	replaceUse(U&: II->getOperandUse(i: `0`), NewValue: NewOp);
4003	return II;
4004	}
4005
4006	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
4007	if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType()))
4008	if (FTy->getElementType() == Builder.getInt1Ty()) {
4009	Value *Res = Builder.CreateBitCast(
4010	V: Vect, DestTy: Builder.getIntNTy(N: FTy->getNumElements()));
4011	if (IID == Intrinsic::vector_reduce_and) {
4012	Res = Builder.CreateICmpEQ(
4013	LHS: Res, RHS: ConstantInt::getAllOnesValue(Ty: Res->getType()));
4014	} else {
4015	assert(IID == Intrinsic::vector_reduce_or &&
4016	"Expected or reduction.");
4017	Res = Builder.CreateIsNotNull(Arg: Res);
4018	}
4019	if (Arg != Vect)
4020	Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res,
4021	DestTy: II->getType());
4022	return replaceInstUsesWith(I&: CI, V: Res);
4023	}
4024	}
4025	[[fallthrough]];
4026	}
4027	case Intrinsic::vector_reduce_add: {
4028	if (IID == Intrinsic::vector_reduce_add) {
4029	// Convert vector_reduce_add(ZExt(<n x i1>)) to
4030	// ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
4031	// Convert vector_reduce_add(SExt(<n x i1>)) to
4032	// -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
4033	// Convert vector_reduce_add(<n x i1>) to
4034	// Trunc(ctpop(bitcast <n x i1> to in)).
4035	Value *Arg = II->getArgOperand(i: `0`);
4036	Value *Vect;
4037
4038	if (Value *NewOp =
4039	simplifyReductionOperand(Arg, /CanReorderLanes=/true)) {
4040	replaceUse(U&: II->getOperandUse(i: `0`), NewValue: NewOp);
4041	return II;
4042	}
4043
4044	// vector.reduce.add.vNiM(splat(%x)) -> mul(%x, N)
4045	if (Value *Splat = getSplatValue(V: Arg)) {
4046	ElementCount VecToReduceCount =
4047	cast<VectorType>(Val: Arg->getType())->getElementCount();
4048	if (VecToReduceCount.isFixed()) {
4049	unsigned VectorSize = VecToReduceCount.getFixedValue();
4050	return BinaryOperator::CreateMul(
4051	V1: Splat,
4052	V2: ConstantInt::get(Ty: Splat->getType(), V: VectorSize, /IsSigned=/false,
4053	/ImplicitTrunc=/true));
4054	}
4055	}
4056
4057	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
4058	if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType()))
4059	if (FTy->getElementType() == Builder.getInt1Ty()) {
4060	Value *V = Builder.CreateBitCast(
4061	V: Vect, DestTy: Builder.getIntNTy(N: FTy->getNumElements()));
4062	Value *Res = Builder.CreateUnaryIntrinsic(ID: Intrinsic::ctpop, V);
4063	Res = Builder.CreateZExtOrTrunc(V: Res, DestTy: II->getType());
4064	if (Arg != Vect &&
4065	cast<Instruction>(Val: Arg)->getOpcode() == Instruction::SExt)
4066	Res = Builder.CreateNeg(V: Res);
4067	return replaceInstUsesWith(I&: CI, V: Res);
4068	}
4069	}
4070	}
4071	[[fallthrough]];
4072	}
4073	case Intrinsic::vector_reduce_xor: {
4074	if (IID == Intrinsic::vector_reduce_xor) {
4075	// Exclusive disjunction reduction over the vector with
4076	// (potentially-extended) i1 element type is actually a
4077	// (potentially-extended) arithmetic `add` reduction over the original
4078	// non-extended value:
4079	// vector_reduce_xor(?ext(<n x i1>))
4080	// -->
4081	// ?ext(vector_reduce_add(<n x i1>))
4082	Value *Arg = II->getArgOperand(i: `0`);
4083	Value *Vect;
4084
4085	if (Value *NewOp =
4086	simplifyReductionOperand(Arg, /CanReorderLanes=/true)) {
4087	replaceUse(U&: II->getOperandUse(i: `0`), NewValue: NewOp);
4088	return II;
4089	}
4090
4091	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
4092	if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType()))
4093	if (VTy->getElementType() == Builder.getInt1Ty()) {
4094	Value *Res = Builder.CreateAddReduce(Src: Vect);
4095	if (Arg != Vect)
4096	Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res,
4097	DestTy: II->getType());
4098	return replaceInstUsesWith(I&: CI, V: Res);
4099	}
4100	}
4101	}
4102	[[fallthrough]];
4103	}
4104	case Intrinsic::vector_reduce_mul: {
4105	if (IID == Intrinsic::vector_reduce_mul) {
4106	// Multiplicative reduction over the vector with (potentially-extended)
4107	// i1 element type is actually a (potentially zero-extended)
4108	// logical `and` reduction over the original non-extended value:
4109	// vector_reduce_mul(?ext(<n x i1>))
4110	// -->
4111	// zext(vector_reduce_and(<n x i1>))
4112	Value *Arg = II->getArgOperand(i: `0`);
4113	Value *Vect;
4114
4115	if (Value *NewOp =
4116	simplifyReductionOperand(Arg, /CanReorderLanes=/true)) {
4117	replaceUse(U&: II->getOperandUse(i: `0`), NewValue: NewOp);
4118	return II;
4119	}
4120
4121	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
4122	if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType()))
4123	if (VTy->getElementType() == Builder.getInt1Ty()) {
4124	Value *Res = Builder.CreateAndReduce(Src: Vect);
4125	Res = Builder.CreateZExt(V: Res, DestTy: II->getType());
4126	return replaceInstUsesWith(I&: CI, V: Res);
4127	}
4128	}
4129	}
4130	[[fallthrough]];
4131	}
4132	case Intrinsic::vector_reduce_umin:
4133	case Intrinsic::vector_reduce_umax: {
4134	if (IID == Intrinsic::vector_reduce_umin \|\|
4135	IID == Intrinsic::vector_reduce_umax) {
4136	// UMin/UMax reduction over the vector with (potentially-extended)
4137	// i1 element type is actually a (potentially-extended)
4138	// logical `and`/`or` reduction over the original non-extended value:
4139	// vector_reduce_u{min,max}(?ext(<n x i1>))
4140	// -->
4141	// ?ext(vector_reduce_{and,or}(<n x i1>))
4142	Value *Arg = II->getArgOperand(i: `0`);
4143	Value *Vect;
4144
4145	if (Value *NewOp =
4146	simplifyReductionOperand(Arg, /CanReorderLanes=/true)) {
4147	replaceUse(U&: II->getOperandUse(i: `0`), NewValue: NewOp);
4148	return II;
4149	}
4150
4151	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
4152	if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType()))
4153	if (VTy->getElementType() == Builder.getInt1Ty()) {
4154	Value *Res = IID == Intrinsic::vector_reduce_umin
4155	? Builder.CreateAndReduce(Src: Vect)
4156	: Builder.CreateOrReduce(Src: Vect);
4157	if (Arg != Vect)
4158	Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res,
4159	DestTy: II->getType());
4160	return replaceInstUsesWith(I&: CI, V: Res);
4161	}
4162	}
4163	}
4164	[[fallthrough]];
4165	}
4166	case Intrinsic::vector_reduce_smin:
4167	case Intrinsic::vector_reduce_smax: {
4168	if (IID == Intrinsic::vector_reduce_smin \|\|
4169	IID == Intrinsic::vector_reduce_smax) {
4170	// SMin/SMax reduction over the vector with (potentially-extended)
4171	// i1 element type is actually a (potentially-extended)
4172	// logical `and`/`or` reduction over the original non-extended value:
4173	// vector_reduce_s{min,max}(<n x i1>)
4174	// -->
4175	// vector_reduce_{or,and}(<n x i1>)
4176	// and
4177	// vector_reduce_s{min,max}(sext(<n x i1>))
4178	// -->
4179	// sext(vector_reduce_{or,and}(<n x i1>))
4180	// and
4181	// vector_reduce_s{min,max}(zext(<n x i1>))
4182	// -->
4183	// zext(vector_reduce_{and,or}(<n x i1>))
4184	Value *Arg = II->getArgOperand(i: `0`);
4185	Value *Vect;
4186
4187	if (Value *NewOp =
4188	simplifyReductionOperand(Arg, /CanReorderLanes=/true)) {
4189	replaceUse(U&: II->getOperandUse(i: `0`), NewValue: NewOp);
4190	return II;
4191	}
4192
4193	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
4194	if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType()))
4195	if (VTy->getElementType() == Builder.getInt1Ty()) {
4196	Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
4197	if (Arg != Vect)
4198	ExtOpc = cast<CastInst>(Val: Arg)->getOpcode();
4199	Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
4200	(ExtOpc == Instruction::CastOps::ZExt))
4201	? Builder.CreateAndReduce(Src: Vect)
4202	: Builder.CreateOrReduce(Src: Vect);
4203	if (Arg != Vect)
4204	Res = Builder.CreateCast(Op: ExtOpc, V: Res, DestTy: II->getType());
4205	return replaceInstUsesWith(I&: CI, V: Res);
4206	}
4207	}
4208	}
4209	[[fallthrough]];
4210	}
4211	case Intrinsic::vector_reduce_fmax:
4212	case Intrinsic::vector_reduce_fmin:
4213	case Intrinsic::vector_reduce_fadd:
4214	case Intrinsic::vector_reduce_fmul: {
4215	bool CanReorderLanes = (IID != Intrinsic::vector_reduce_fadd &&
4216	IID != Intrinsic::vector_reduce_fmul) \|\|
4217	II->hasAllowReassoc();
4218	const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd \|\|
4219	IID == Intrinsic::vector_reduce_fmul)
4220	? `1`
4221	: `0`;
4222	Value *Arg = II->getArgOperand(i: ArgIdx);
4223	if (Value *NewOp = simplifyReductionOperand(Arg, CanReorderLanes)) {
4224	replaceUse(U&: II->getOperandUse(i: ArgIdx), NewValue: NewOp);
4225	return nullptr;
4226	}
4227	break;
4228	}
4229	case Intrinsic::is_fpclass: {
4230	if (Instruction I = foldIntrinsicIsFPClass(II&: II))
4231	return I;
4232	break;
4233	}
4234	case Intrinsic::threadlocal_address: {
4235	Align MinAlign = getKnownAlignment(V: II->getArgOperand(i: `0`), DL, CxtI: II, AC: &AC, DT: &DT);
4236	MaybeAlign Align = II->getRetAlign();
4237	if (MinAlign > Align.valueOrOne()) {
4238	II->addRetAttr(Attr: Attribute::getWithAlignment(Context&: II->getContext(), Alignment: MinAlign));
4239	return II;
4240	}
4241	break;
4242	}
4243	case Intrinsic::frexp: {
4244	Value *X;
4245	// The first result is idempotent with the added complication of the struct
4246	// return, and the second result is zero because the value is already
4247	// normalized.
4248	if (match(V: II->getArgOperand(i: `0`), P: m_ExtractValue<`0`>(V: m_Value(V&: X)))) {
4249	if (match(V: X, P: m_Intrinsic<Intrinsic::frexp>(Op0: m_Value()))) {
4250	X = Builder.CreateInsertValue(
4251	Agg: X, Val: Constant::getNullValue(Ty: II->getType()->getStructElementType(N: `1`)),
4252	Idxs: `1`);
4253	return replaceInstUsesWith(I&: *II, V: X);
4254	}
4255	}
4256	break;
4257	}
4258	case Intrinsic::get_active_lane_mask: {
4259	const APInt Op0, Op1;
4260	if (match(V: II->getOperand(i_nocapture: `0`), P: m_StrictlyPositive(V&: Op0)) &&
4261	match(V: II->getOperand(i_nocapture: `1`), P: m_APInt(Res&: Op1))) {
4262	Type *OpTy = II->getOperand(i_nocapture: `0`)->getType();
4263	return replaceInstUsesWith(
4264	I&: *II, V: Builder.CreateIntrinsic(
4265	RetTy: II->getType(), ID: Intrinsic::get_active_lane_mask,
4266	Args: {Constant::getNullValue(Ty: OpTy),
4267	ConstantInt::get(Ty: OpTy, V: Op1->usub_sat(RHS: *Op0))}));
4268	}
4269	break;
4270	}
4271	case Intrinsic::experimental_get_vector_length: {
4272	// get.vector.length(Cnt, MaxLanes) --> Cnt when Cnt <= MaxLanes
4273	unsigned BitWidth =
4274	std::max(a: II->getArgOperand(i: `0`)->getType()->getScalarSizeInBits(),
4275	b: II->getType()->getScalarSizeInBits());
4276	ConstantRange Cnt =
4277	computeConstantRangeIncludingKnownBits(V: II->getArgOperand(i: `0`), ForSigned: false,
4278	SQ: SQ.getWithInstruction(I: II))
4279	.zextOrTrunc(BitWidth);
4280	ConstantRange MaxLanes = cast<ConstantInt>(Val: II->getArgOperand(i: `1`))
4281	->getValue()
4282	.zextOrTrunc(width: Cnt.getBitWidth());
4283	if (cast<ConstantInt>(Val: II->getArgOperand(i: `2`))->isOne())
4284	MaxLanes = MaxLanes.multiply(
4285	Other: getVScaleRange(F: II->getFunction(), BitWidth: Cnt.getBitWidth()));
4286
4287	if (Cnt.icmp(Pred: CmpInst::ICMP_ULE, Other: MaxLanes))
4288	return replaceInstUsesWith(
4289	I&: *II, V: Builder.CreateZExtOrTrunc(V: II->getArgOperand(i: `0`), DestTy: II->getType()));
4290	return nullptr;
4291	}
4292	default: {
4293	// Handle target specific intrinsics
4294	std::optional<Instruction > V = targetInstCombineIntrinsic(II&: II);
4295	if (V)
4296	return *V;
4297	break;
4298	}
4299	}
4300
4301	// Try to fold intrinsic into select/phi operands. This is legal if:
4302	// The intrinsic is speculatable.*
4303	// The operand is one of the following:*
4304	// - a phi.
4305	// - a select with a scalar condition.
4306	// - a select with a vector condition and II is not a cross lane operation.
4307	if (isSafeToSpeculativelyExecuteWithVariableReplaced(I: &CI)) {
4308	for (Value *Op : II->args()) {
4309	if (auto *Sel = dyn_cast<SelectInst>(Val: Op)) {
4310	bool IsVectorCond = Sel->getCondition()->getType()->isVectorTy();
4311	if (IsVectorCond &&
4312	(!isNotCrossLaneOperation(I: II) \|\| !II->getType()->isVectorTy()))
4313	continue;
4314	// Don't replace a scalar select with a more expensive vector select if
4315	// we can't simplify both arms of the select.
4316	bool SimplifyBothArms =
4317	!Op->getType()->isVectorTy() && II->getType()->isVectorTy();
4318	if (Instruction *R = FoldOpIntoSelect(
4319	Op&: II, SI: Sel, /FoldWithMultiUse=/*false, SimplifyBothArms))
4320	return R;
4321	}
4322	if (auto *Phi = dyn_cast<PHINode>(Val: Op))
4323	if (Instruction R = foldOpIntoPhi(I&: II, PN: Phi))
4324	return R;
4325	}
4326	}
4327
4328	if (Instruction *Shuf = foldShuffledIntrinsicOperands(II))
4329	return Shuf;
4330
4331	if (Value *Reverse = foldReversedIntrinsicOperands(II))
4332	return replaceInstUsesWith(I&: *II, V: Reverse);
4333
4334	if (Value Res = foldIdempotentBinaryIntrinsicRecurrence(IC&: this, II))
4335	return replaceInstUsesWith(I&: *II, V: Res);
4336
4337	// Some intrinsics (like experimental_gc_statepoint) can be used in invoke
4338	// context, so it is handled in visitCallBase and we should trigger it.
4339	return visitCallBase(Call&: *II);
4340	}
4341
4342	// Fence instruction simplification
4343	Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) {
4344	auto *NFI = dyn_cast<FenceInst>(Val: FI.getNextNode());
4345	// This check is solely here to handle arbitrary target-dependent syncscopes.
4346	// TODO: Can remove if does not matter in practice.
4347	if (NFI && FI.isIdenticalTo(I: NFI))
4348	return eraseInstFromFunction(I&: FI);
4349
4350	// Returns true if FI1 is identical or stronger fence than FI2.
4351	auto isIdenticalOrStrongerFence = [](FenceInst FI1, FenceInst FI2) {
4352	auto FI1SyncScope = FI1->getSyncScopeID();
4353	// Consider same scope, where scope is global or single-thread.
4354	if (FI1SyncScope != FI2->getSyncScopeID() \|\|
4355	(FI1SyncScope != SyncScope::System &&
4356	FI1SyncScope != SyncScope::SingleThread))
4357	return false;
4358
4359	return isAtLeastOrStrongerThan(AO: FI1->getOrdering(), Other: FI2->getOrdering());
4360	};
4361	if (NFI && isIdenticalOrStrongerFence (NFI, &FI))
4362	return eraseInstFromFunction(I&: FI);
4363
4364	if (auto *PFI = dyn_cast_or_null<FenceInst>(Val: FI.getPrevNode()))
4365	if (isIdenticalOrStrongerFence (PFI, &FI))
4366	return eraseInstFromFunction(I&: FI);
4367	return nullptr;
4368	}
4369
4370	// InvokeInst simplification
4371	Instruction *InstCombinerImpl::visitInvokeInst(InvokeInst &II) {
4372	return visitCallBase(Call&: II);
4373	}
4374
4375	// CallBrInst simplification
4376	Instruction *InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) {
4377	return visitCallBase(Call&: CBI);
4378	}
4379
4380	static Value optimizeModularFormat(CallInst CI, IRBuilderBase &B) {
4381	if (!CI->hasFnAttr(Kind: "modular-format"))
4382	return nullptr;
4383
4384	SmallVector<StringRef> Args(
4385	llvm::split(Str: CI->getFnAttr(Kind: "modular-format").getValueAsString(), Separator: `','`));
4386	// TODO: Make use of the first two arguments
4387	unsigned FirstArgIdx;
4388	[[maybe_unused]] bool Error;
4389	Error = Args [`2`].getAsInteger(Radix: `10`, Result&: FirstArgIdx);
4390	assert(!Error && "invalid first arg index");
4391	--FirstArgIdx;
4392	StringRef FnName = Args [`3`];
4393	StringRef ImplName = Args [`4`];
4394	ArrayRef<StringRef> AllAspects = ArrayRef<StringRef>(Args).drop_front(N: `5`);
4395
4396	if (AllAspects.empty())
4397	return nullptr;
4398
4399	SmallVector<StringRef> NeededAspects;
4400	for (StringRef Aspect : AllAspects) {
4401	if (Aspect == "float") {
4402	if (llvm::any_of(
4403	Range: llvm::make_range(x: std::next(x: CI->arg_begin(), n: FirstArgIdx),
4404	y: CI->arg_end()),
4405	P: [](Value V) { return* V->getType()->isFloatingPointTy(); }))
4406	NeededAspects.push_back(Elt: "float");
4407	} else {
4408	// Unknown aspects are always considered to be needed.
4409	NeededAspects.push_back(Elt: Aspect);
4410	}
4411	}
4412
4413	if (NeededAspects.size() == AllAspects.size())
4414	return nullptr;
4415
4416	Module *M = CI->getModule();
4417	LLVMContext &Ctx = M->getContext();
4418	Function *Callee = CI->getCalledFunction();
4419	FunctionCallee ModularFn = M->getOrInsertFunction(
4420	Name: FnName, T: Callee->getFunctionType(),
4421	AttributeList: Callee->getAttributes().removeFnAttribute(C&: Ctx, Kind: "modular-format"));
4422	CallInst *New = cast<CallInst>(Val: CI->clone());
4423	New->setCalledFunction(ModularFn);
4424	New->removeFnAttr(Kind: "modular-format");
4425	B.Insert(I: New);
4426
4427	const auto ReferenceAspect = [&](StringRef Aspect) {
4428	SmallString<`20`> Name = ImplName;
4429	Name += `'_'`;
4430	Name += Aspect;
4431	Function *RelocNoneFn =
4432	Intrinsic::getOrInsertDeclaration(M, id: Intrinsic::reloc_none);
4433	B.CreateCall(Callee: RelocNoneFn,
4434	Args: {MetadataAsValue::get(Context&: Ctx, MD: MDString::get(Context&: Ctx, Str: Name))});
4435	};
4436
4437	llvm::sort(C&: NeededAspects);
4438	for (StringRef Request : NeededAspects)
4439	ReferenceAspect (Request);
4440
4441	return New;
4442	}
4443
4444	Instruction InstCombinerImpl::tryOptimizeCall(CallInst CI) {
4445	if (!CI->getCalledFunction()) return nullptr;
4446
4447	// Skip optimizing notail and musttail calls so
4448	// LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
4449	// LibCallSimplifier::optimizeCall should try to preserve tail calls though.
4450	if (CI->isMustTailCall() \|\| CI->isNoTailCall())
4451	return nullptr;
4452
4453	auto InstCombineRAUW = [this](Instruction From, Value With) {
4454	replaceInstUsesWith(I&: *From, V: With);
4455	};
4456	auto InstCombineErase = [this](Instruction *I) {
4457	eraseInstFromFunction(I&: *I);
4458	};
4459	LibCallSimplifier Simplifier(DL, &TLI, &DT, &DC, &AC, ORE, BFI, PSI,
4460	InstCombineRAUW, InstCombineErase);
4461	if (Value *With = Simplifier.optimizeCall(CI, B&: Builder)) {
4462	++NumSimplified;
4463	return CI->use_empty() ? CI : replaceInstUsesWith(I&: *CI, V: With);
4464	}
4465	if (Value *With = optimizeModularFormat(CI, B&: Builder)) {
4466	++NumSimplified;
4467	return CI->use_empty() ? CI : replaceInstUsesWith(I&: *CI, V: With);
4468	}
4469
4470	return nullptr;
4471	}
4472
4473	static IntrinsicInst findInitTrampolineFromAlloca(Value TrampMem) {
4474	// Strip off at most one level of pointer casts, looking for an alloca. This
4475	// is good enough in practice and simpler than handling any number of casts.
4476	Value *Underlying = TrampMem->stripPointerCasts();
4477	if (Underlying != TrampMem &&
4478	(!Underlying->hasOneUse() \|\| Underlying->user_back() != TrampMem))
4479	return nullptr;
4480	if (!isa<AllocaInst>(Val: Underlying))
4481	return nullptr;
4482
4483	IntrinsicInst InitTrampoline = nullptr*;
4484	for (User *U : TrampMem->users()) {
4485	IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: U);
4486	if (!II)
4487	return nullptr;
4488	if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
4489	if (InitTrampoline)
4490	// More than one init_trampoline writes to this value. Give up.
4491	return nullptr;
4492	InitTrampoline = II;
4493	continue;
4494	}
4495	if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
4496	// Allow any number of calls to adjust.trampoline.
4497	continue;
4498	return nullptr;
4499	}
4500
4501	// No call to init.trampoline found.
4502	if (!InitTrampoline)
4503	return nullptr;
4504
4505	// Check that the alloca is being used in the expected way.
4506	if (InitTrampoline->getOperand(i_nocapture: `0`) != TrampMem)
4507	return nullptr;
4508
4509	return InitTrampoline;
4510	}
4511
4512	static IntrinsicInst findInitTrampolineFromBB(IntrinsicInst AdjustTramp,
4513	Value *TrampMem) {
4514	// Visit all the previous instructions in the basic block, and try to find a
4515	// init.trampoline which has a direct path to the adjust.trampoline.
4516	for (BasicBlock::iterator I = AdjustTramp->getIterator(),
4517	E = AdjustTramp->getParent()->begin();
4518	I != E;) {
4519	Instruction Inst = &--I;
4520	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val&: I))
4521	if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
4522	II->getOperand(i_nocapture: `0`) == TrampMem)
4523	return II;
4524	if (Inst->mayWriteToMemory())
4525	return nullptr;
4526	}
4527	return nullptr;
4528	}
4529
4530	// Given a call to llvm.adjust.trampoline, find and return the corresponding
4531	// call to llvm.init.trampoline if the call to the trampoline can be optimized
4532	// to a direct call to a function. Otherwise return NULL.
4533	static IntrinsicInst findInitTrampoline(Value Callee) {
4534	Callee = Callee->stripPointerCasts();
4535	IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Val: Callee);
4536	if (!AdjustTramp \|\|
4537	AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
4538	return nullptr;
4539
4540	Value *TrampMem = AdjustTramp->getOperand(i_nocapture: `0`);
4541
4542	if (IntrinsicInst *IT = findInitTrampolineFromAlloca(TrampMem))
4543	return IT;
4544	if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
4545	return IT;
4546	return nullptr;
4547	}
4548
4549	Instruction *InstCombinerImpl::foldPtrAuthIntrinsicCallee(CallBase &Call) {
4550	const Value *Callee = Call.getCalledOperand();
4551	const auto *IPC = dyn_cast<IntToPtrInst>(Val: Callee);
4552	if (!IPC \|\| !IPC->isNoopCast(DL))
4553	return nullptr;
4554
4555	const auto *II = dyn_cast<IntrinsicInst>(Val: IPC->getOperand(i_nocapture: `0`));
4556	if (!II)
4557	return nullptr;
4558
4559	Intrinsic::ID IIID = II->getIntrinsicID();
4560	if (IIID != Intrinsic::ptrauth_resign && IIID != Intrinsic::ptrauth_sign)
4561	return nullptr;
4562
4563	// Isolate the ptrauth bundle from the others.
4564	std::optional<OperandBundleUse> PtrAuthBundleOrNone;
4565	SmallVector<OperandBundleDef, `2`> NewBundles;
4566	for (unsigned BI = `0`, BE = Call.getNumOperandBundles(); BI != BE; ++BI) {
4567	OperandBundleUse Bundle = Call.getOperandBundleAt(Index: BI);
4568	if (Bundle.getTagID() == LLVMContext::OB_ptrauth)
4569	PtrAuthBundleOrNone = Bundle;
4570	else
4571	NewBundles.emplace_back(Args&: Bundle);
4572	}
4573
4574	if (!PtrAuthBundleOrNone)
4575	return nullptr;
4576
4577	Value NewCallee = nullptr*;
4578	switch (IIID) {
4579	// call(ptrauth.resign(p)), ["ptrauth"()] -> call p, ["ptrauth"()]
4580	// assuming the call bundle and the sign operands match.
4581	case Intrinsic::ptrauth_resign: {
4582	// Resign result key should match bundle.
4583	if (II->getOperand(i_nocapture: `3`) != PtrAuthBundleOrNone ->Inputs [`0`])
4584	return nullptr;
4585	// Resign result discriminator should match bundle.
4586	if (II->getOperand(i_nocapture: `4`) != PtrAuthBundleOrNone ->Inputs [`1`])
4587	return nullptr;
4588
4589	// Resign input (auth) key should also match: we can't change the key on
4590	// the new call we're generating, because we don't know what keys are valid.
4591	if (II->getOperand(i_nocapture: `1`) != PtrAuthBundleOrNone ->Inputs [`0`])
4592	return nullptr;
4593
4594	Value *NewBundleOps[] = {II->getOperand(i_nocapture: `1`), II->getOperand(i_nocapture: `2`)};
4595	NewBundles.emplace_back(Args: "ptrauth", Args&: NewBundleOps);
4596	NewCallee = II->getOperand(i_nocapture: `0`);
4597	break;
4598	}
4599
4600	// call(ptrauth.sign(p)), ["ptrauth"()] -> call p
4601	// assuming the call bundle and the sign operands match.
4602	// Non-ptrauth indirect calls are undesirable, but so is ptrauth.sign.
4603	case Intrinsic::ptrauth_sign: {
4604	// Sign key should match bundle.
4605	if (II->getOperand(i_nocapture: `1`) != PtrAuthBundleOrNone ->Inputs [`0`])
4606	return nullptr;
4607	// Sign discriminator should match bundle.
4608	if (II->getOperand(i_nocapture: `2`) != PtrAuthBundleOrNone ->Inputs [`1`])
4609	return nullptr;
4610	NewCallee = II->getOperand(i_nocapture: `0`);
4611	break;
4612	}
4613	default:
4614	llvm_unreachable("unexpected intrinsic ID");
4615	}
4616
4617	if (!NewCallee)
4618	return nullptr;
4619
4620	NewCallee = Builder.CreateBitOrPointerCast(V: NewCallee, DestTy: Callee->getType());
4621	CallBase *NewCall = CallBase::Create(CB: &Call, Bundles: NewBundles);
4622	NewCall->setCalledOperand(NewCallee);
4623	return NewCall;
4624	}
4625
4626	Instruction *InstCombinerImpl::foldPtrAuthConstantCallee(CallBase &Call) {
4627	auto *CPA = dyn_cast<ConstantPtrAuth>(Val: Call.getCalledOperand());
4628	if (!CPA)
4629	return nullptr;
4630
4631	auto *CalleeF = dyn_cast<Function>(Val: CPA->getPointer());
4632	// If the ptrauth constant isn't based on a function pointer, bail out.
4633	if (!CalleeF)
4634	return nullptr;
4635
4636	// Inspect the call ptrauth bundle to check it matches the ptrauth constant.
4637	auto PAB = Call.getOperandBundle(ID: LLVMContext::OB_ptrauth);
4638	if (!PAB)
4639	return nullptr;
4640
4641	auto *Key = cast<ConstantInt>(Val: PAB ->Inputs [`0`]);
4642	Value *Discriminator = PAB ->Inputs [`1`];
4643
4644	// If the bundle doesn't match, this is probably going to fail to auth.
4645	if (!CPA->isKnownCompatibleWith(Key, Discriminator, DL))
4646	return nullptr;
4647
4648	// If the bundle matches the constant, proceed in making this a direct call.
4649	auto *NewCall = CallBase::removeOperandBundle(CB: &Call, ID: LLVMContext::OB_ptrauth);
4650	NewCall->setCalledOperand(CalleeF);
4651	return NewCall;
4652	}
4653
4654	bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
4655	const TargetLibraryInfo *TLI) {
4656	// Note: We only handle cases which can't be driven from generic attributes
4657	// here. So, for example, nonnull and noalias (which are common properties
4658	// of some allocation functions) are expected to be handled via annotation
4659	// of the respective allocator declaration with generic attributes.
4660	bool Changed = false;
4661
4662	if (!Call.getType()->isPointerTy())
4663	return Changed;
4664
4665	std::optional<APInt> Size = getAllocSize(CB: &Call, TLI);
4666	if (Size && *Size != `0`) {
4667	// TODO: We really should just emit deref_or_null here and then
4668	// let the generic inference code combine that with nonnull.
4669	if (Call.hasRetAttr(Kind: Attribute::NonNull)) {
4670	Changed = !Call.hasRetAttr(Kind: Attribute::Dereferenceable);
4671	Call.addRetAttr(Attr: Attribute::getWithDereferenceableBytes(
4672	Context&: Call.getContext(), Bytes: Size ->getLimitedValue()));
4673	} else {
4674	Changed = !Call.hasRetAttr(Kind: Attribute::DereferenceableOrNull);
4675	Call.addRetAttr(Attr: Attribute::getWithDereferenceableOrNullBytes(
4676	Context&: Call.getContext(), Bytes: Size ->getLimitedValue()));
4677	}
4678	}
4679
4680	// Add alignment attribute if alignment is a power of two constant.
4681	Value *Alignment = getAllocAlignment(V: &Call, TLI);
4682	if (!Alignment)
4683	return Changed;
4684
4685	ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Val: Alignment);
4686	if (AlignOpC && AlignOpC->getValue().ult(RHS: llvm::Value::MaximumAlignment)) {
4687	uint64_t AlignmentVal = AlignOpC->getZExtValue();
4688	if (llvm::isPowerOf2_64(Value: AlignmentVal)) {
4689	Align ExistingAlign = Call.getRetAlign().valueOrOne();
4690	Align NewAlign = Align (AlignmentVal);
4691	if (NewAlign > ExistingAlign) {
4692	Call.addRetAttr(
4693	Attr: Attribute::getWithAlignment(Context&: Call.getContext(), Alignment: NewAlign));
4694	Changed = true;
4695	}
4696	}
4697	}
4698	return Changed;
4699	}
4700
4701	/// Improvements for call, callbr and invoke instructions.
4702	Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
4703	bool Changed = annotateAnyAllocSite(Call, TLI: &TLI);
4704
4705	// Mark any parameters that are known to be non-null with the nonnull
4706	// attribute. This is helpful for inlining calls to functions with null
4707	// checks on their arguments.
4708	SmallVector<unsigned, `4`> ArgNos;
4709	unsigned ArgNo = `0`;
4710
4711	for (Value *V : Call.args()) {
4712	if (V->getType()->isPointerTy()) {
4713	// Simplify the nonnull operand if the parameter is known to be nonnull.
4714	// Otherwise, try to infer nonnull for it.
4715	bool HasDereferenceable = Call.getParamDereferenceableBytes(i: ArgNo) > `0`;
4716	if (Call.paramHasAttr(ArgNo, Kind: Attribute::NonNull) \|\|
4717	(HasDereferenceable &&
4718	!NullPointerIsDefined(F: Call.getFunction(),
4719	AS: V->getType()->getPointerAddressSpace()))) {
4720	if (Value *Res = simplifyNonNullOperand(V, HasDereferenceable)) {
4721	replaceOperand(I&: Call, OpNum: ArgNo, V: Res);
4722	Changed = true;
4723	}
4724	} else if (isKnownNonZero(V,
4725	Q: getSimplifyQuery().getWithInstruction(I: &Call))) {
4726	ArgNos.push_back(Elt: ArgNo);
4727	}
4728	}
4729	ArgNo++;
4730	}
4731
4732	assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
4733
4734	if (!ArgNos.empty()) {
4735	AttributeList AS = Call.getAttributes();
4736	LLVMContext &Ctx = Call.getContext();
4737	AS = AS.addParamAttribute(C&: Ctx, ArgNos,
4738	A: Attribute::get(Context&: Ctx, Kind: Attribute::NonNull));
4739	Call.setAttributes(AS);
4740	Changed = true;
4741	}
4742
4743	// If the callee is a pointer to a function, attempt to move any casts to the
4744	// arguments of the call/callbr/invoke.
4745	Value *Callee = Call.getCalledOperand();
4746	Function *CalleeF = dyn_cast<Function>(Val: Callee);
4747	if ((!CalleeF \|\| CalleeF->getFunctionType() != Call.getFunctionType()) &&
4748	transformConstExprCastCall(Call))
4749	return nullptr;
4750
4751	if (CalleeF) {
4752	// Remove the convergent attr on calls when the callee is not convergent.
4753	if (Call.isConvergent() && !CalleeF->isConvergent() &&
4754	!CalleeF->isIntrinsic()) {
4755	LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
4756	<< "\n");
4757	Call.setNotConvergent();
4758	return &Call;
4759	}
4760
4761	// If the call and callee calling conventions don't match, and neither one
4762	// of the calling conventions is compatible with C calling convention
4763	// this call must be unreachable, as the call is undefined.
4764	if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
4765	!(CalleeF->getCallingConv() == llvm::CallingConv::C &&
4766	TargetLibraryInfoImpl::isCallingConvCCompatible(CI: &Call)) &&
4767	!(Call.getCallingConv() == llvm::CallingConv::C &&
4768	TargetLibraryInfoImpl::isCallingConvCCompatible(Callee: CalleeF))) &&
4769	// Only do this for calls to a function with a body. A prototype may
4770	// not actually end up matching the implementation's calling conv for a
4771	// variety of reasons (e.g. it may be written in assembly).
4772	!CalleeF->isDeclaration()) {
4773	Instruction *OldCall = &Call;
4774	CreateNonTerminatorUnreachable(InsertAt: OldCall);
4775	// If OldCall does not return void then replaceInstUsesWith poison.
4776	// This allows ValueHandlers and custom metadata to adjust itself.
4777	if (!OldCall->getType()->isVoidTy())
4778	replaceInstUsesWith(I&: *OldCall, V: PoisonValue::get(T: OldCall->getType()));
4779	if (isa<CallInst>(Val: OldCall))
4780	return eraseInstFromFunction(I&: *OldCall);
4781
4782	// We cannot remove an invoke or a callbr, because it would change thexi
4783	// CFG, just change the callee to a null pointer.
4784	cast<CallBase>(Val: OldCall)->setCalledFunction(
4785	FTy: CalleeF->getFunctionType(),
4786	Fn: Constant::getNullValue(Ty: CalleeF->getType()));
4787	return nullptr;
4788	}
4789	}
4790
4791	// Calling a null function pointer is undefined if a null address isn't
4792	// dereferenceable.
4793	if ((isa<ConstantPointerNull>(Val: Callee) &&
4794	!NullPointerIsDefined(F: Call.getFunction())) \|\|
4795	isa<UndefValue>(Val: Callee)) {
4796	// If Call does not return void then replaceInstUsesWith poison.
4797	// This allows ValueHandlers and custom metadata to adjust itself.
4798	if (!Call.getType()->isVoidTy())
4799	replaceInstUsesWith(I&: Call, V: PoisonValue::get(T: Call.getType()));
4800
4801	if (Call.isTerminator()) {
4802	// Can't remove an invoke or callbr because we cannot change the CFG.
4803	return nullptr;
4804	}
4805
4806	// This instruction is not reachable, just remove it.
4807	CreateNonTerminatorUnreachable(InsertAt: &Call);
4808	return eraseInstFromFunction(I&: Call);
4809	}
4810
4811	if (IntrinsicInst *II = findInitTrampoline(Callee))
4812	return transformCallThroughTrampoline(Call, Tramp&: *II);
4813
4814	// Combine calls involving pointer authentication intrinsics.
4815	if (Instruction *NewCall = foldPtrAuthIntrinsicCallee(Call))
4816	return NewCall;
4817
4818	// Combine calls to ptrauth constants.
4819	if (Instruction *NewCall = foldPtrAuthConstantCallee(Call))
4820	return NewCall;
4821
4822	if (isa<InlineAsm>(Val: Callee) && !Call.doesNotThrow()) {
4823	InlineAsm *IA = cast<InlineAsm>(Val: Callee);
4824	if (!IA->canThrow()) {
4825	// Normal inline asm calls cannot throw - mark them
4826	// 'nounwind'.
4827	Call.setDoesNotThrow();
4828	Changed = true;
4829	}
4830	}
4831
4832	// Try to optimize the call if possible, we require DataLayout for most of
4833	// this. None of these calls are seen as possibly dead so go ahead and
4834	// delete the instruction now.
4835	if (CallInst *CI = dyn_cast<CallInst>(Val: &Call)) {
4836	Instruction *I = tryOptimizeCall(CI);
4837	// If we changed something return the result, etc. Otherwise let
4838	// the fallthrough check.
4839	if (I) return eraseInstFromFunction(I&: *I);
4840	}
4841
4842	if (!Call.use_empty() && !Call.isMustTailCall())
4843	if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
4844	Type *CallTy = Call.getType();
4845	Type *RetArgTy = ReturnedArg->getType();
4846	if (RetArgTy->canLosslesslyBitCastTo(Ty: CallTy))
4847	return replaceInstUsesWith(
4848	I&: Call, V: Builder.CreateBitOrPointerCast(V: ReturnedArg, DestTy: CallTy));
4849	}
4850
4851	// Drop unnecessary callee_type metadata from calls that were converted
4852	// into direct calls.
4853	if (Call.getMetadata(KindID: LLVMContext::MD_callee_type) && !Call.isIndirectCall()) {
4854	Call.setMetadata(KindID: LLVMContext::MD_callee_type, Node: nullptr);
4855	Changed = true;
4856	}
4857
4858	// Drop unnecessary kcfi operand bundles from calls that were converted
4859	// into direct calls.
4860	auto Bundle = Call.getOperandBundle(ID: LLVMContext::OB_kcfi);
4861	if (Bundle && !Call.isIndirectCall()) {
4862	DEBUG_WITH_TYPE(DEBUG_TYPE "-kcfi", {
4863	if (CalleeF) {
4864	ConstantInt FunctionType = nullptr*;
4865	ConstantInt *ExpectedType = cast<ConstantInt>(Bundle->Inputs[`0`]);
4866
4867	if (MDNode *MD = CalleeF->getMetadata(LLVMContext::MD_kcfi_type))
4868	FunctionType = mdconst::extract<ConstantInt>(MD->getOperand(`0`));
4869
4870	if (FunctionType &&
4871	FunctionType->getZExtValue() != ExpectedType->getZExtValue())
4872	dbgs() << Call.getModule()->getName()
4873	<< ": warning: kcfi: " << Call.getCaller()->getName()
4874	<< ": call to " << CalleeF->getName()
4875	<< " using a mismatching function pointer type\n";
4876	}
4877	});
4878
4879	return CallBase::removeOperandBundle(CB: &Call, ID: LLVMContext::OB_kcfi);
4880	}
4881
4882	if (isRemovableAlloc(V: &Call, TLI: &TLI))
4883	return visitAllocSite(FI&: Call);
4884
4885	// Handle intrinsics which can be used in both call and invoke context.
4886	switch (Call.getIntrinsicID()) {
4887	case Intrinsic::experimental_gc_statepoint: {
4888	GCStatepointInst &GCSP = *cast<GCStatepointInst>(Val: &Call);
4889	SmallPtrSet<Value *, `32`> LiveGcValues;
4890	for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4891	GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4892
4893	// Remove the relocation if unused.
4894	if (GCR.use_empty()) {
4895	eraseInstFromFunction(I&: GCR);
4896	continue;
4897	}
4898
4899	Value *DerivedPtr = GCR.getDerivedPtr();
4900	Value *BasePtr = GCR.getBasePtr();
4901
4902	// Undef is undef, even after relocation.
4903	if (isa<UndefValue>(Val: DerivedPtr) \|\| isa<UndefValue>(Val: BasePtr)) {
4904	replaceInstUsesWith(I&: GCR, V: UndefValue::get(T: GCR.getType()));
4905	eraseInstFromFunction(I&: GCR);
4906	continue;
4907	}
4908
4909	if (auto *PT = dyn_cast<PointerType>(Val: GCR.getType())) {
4910	// The relocation of null will be null for most any collector.
4911	// TODO: provide a hook for this in GCStrategy. There might be some
4912	// weird collector this property does not hold for.
4913	if (isa<ConstantPointerNull>(Val: DerivedPtr)) {
4914	// Use null-pointer of gc_relocate's type to replace it.
4915	replaceInstUsesWith(I&: GCR, V: ConstantPointerNull::get(T: PT));
4916	eraseInstFromFunction(I&: GCR);
4917	continue;
4918	}
4919
4920	// isKnownNonNull -> nonnull attribute
4921	if (!GCR.hasRetAttr(Kind: Attribute::NonNull) &&
4922	isKnownNonZero(V: DerivedPtr,
4923	Q: getSimplifyQuery().getWithInstruction(I: &Call))) {
4924	GCR.addRetAttr(Kind: Attribute::NonNull);
4925	// We discovered new fact, re-check users.
4926	Worklist.pushUsersToWorkList(I&: GCR);
4927	}
4928	}
4929
4930	// If we have two copies of the same pointer in the statepoint argument
4931	// list, canonicalize to one. This may let us common gc.relocates.
4932	if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
4933	GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
4934	auto *OpIntTy = GCR.getOperand(i_nocapture: `2`)->getType();
4935	GCR.setOperand(i_nocapture: `2`, Val_nocapture: ConstantInt::get(Ty: OpIntTy, V: GCR.getBasePtrIndex()));
4936	}
4937
4938	// TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
4939	// Canonicalize on the type from the uses to the defs
4940
4941	// TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
4942	LiveGcValues.insert(Ptr: BasePtr);
4943	LiveGcValues.insert(Ptr: DerivedPtr);
4944	}
4945	std::optional<OperandBundleUse> Bundle =
4946	GCSP.getOperandBundle(ID: LLVMContext::OB_gc_live);
4947	unsigned NumOfGCLives = LiveGcValues.size();
4948	if (!Bundle \|\| NumOfGCLives == Bundle ->Inputs.size())
4949	break;
4950	// We can reduce the size of gc live bundle.
4951	DenseMap<Value , unsigned*> Val2Idx;
4952	std::vector<Value *> NewLiveGc;
4953	for (Value *V : Bundle ->Inputs) {
4954	auto [It, Inserted] = Val2Idx.try_emplace(Key: V);
4955	if (!Inserted)
4956	continue;
4957	if (LiveGcValues.count(Ptr: V)) {
4958	It ->second = NewLiveGc.size();
4959	NewLiveGc.push_back(x: V);
4960	} else
4961	It ->second = NumOfGCLives;
4962	}
4963	// Update all gc.relocates
4964	for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4965	GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4966	Value *BasePtr = GCR.getBasePtr();
4967	assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
4968	"Missed live gc for base pointer");
4969	auto *OpIntTy1 = GCR.getOperand(i_nocapture: `1`)->getType();
4970	GCR.setOperand(i_nocapture: `1`, Val_nocapture: ConstantInt::get(Ty: OpIntTy1, V: Val2Idx [BasePtr]));
4971	Value *DerivedPtr = GCR.getDerivedPtr();
4972	assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
4973	"Missed live gc for derived pointer");
4974	auto *OpIntTy2 = GCR.getOperand(i_nocapture: `2`)->getType();
4975	GCR.setOperand(i_nocapture: `2`, Val_nocapture: ConstantInt::get(Ty: OpIntTy2, V: Val2Idx [DerivedPtr]));
4976	}
4977	// Create new statepoint instruction.
4978	OperandBundleDef NewBundle("gc-live", std::move(NewLiveGc));
4979	return CallBase::Create(CB: &Call, Bundle: NewBundle);
4980	}
4981	default: { break; }
4982	}
4983
4984	return Changed ? &Call : nullptr;
4985	}
4986
4987	/// If the callee is a constexpr cast of a function, attempt to move the cast to
4988	/// the arguments of the call/invoke.
4989	/// CallBrInst is not supported.
4990	bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
4991	auto *Callee =
4992	dyn_cast<Function>(Val: Call.getCalledOperand()->stripPointerCasts());
4993	if (!Callee)
4994	return false;
4995
4996	assert(!isa<CallBrInst>(Call) &&
4997	"CallBr's don't have a single point after a def to insert at");
4998
4999	// Don't perform the transform for declarations, which may not be fully
5000	// accurate. For example, void @foo() is commonly used as a placeholder for
5001	// unknown prototypes.
5002	if (Callee->isDeclaration())
5003	return false;
5004
5005	// If this is a call to a thunk function, don't remove the cast. Thunks are
5006	// used to transparently forward all incoming parameters and outgoing return
5007	// values, so it's important to leave the cast in place.
5008	if (Callee->hasFnAttribute(Kind: "thunk"))
5009	return false;
5010
5011	// If this is a call to a naked function, the assembly might be
5012	// using an argument, or otherwise rely on the frame layout,
5013	// the function prototype will mismatch.
5014	if (Callee->hasFnAttribute(Kind: Attribute::Naked))
5015	return false;
5016
5017	// If this is a musttail call, the callee's prototype must match the caller's
5018	// prototype with the exception of pointee types. The code below doesn't
5019	// implement that, so we can't do this transform.
5020	// TODO: Do the transform if it only requires adding pointer casts.
5021	if (Call.isMustTailCall())
5022	return false;
5023
5024	Instruction *Caller = &Call;
5025	const AttributeList &CallerPAL = Call.getAttributes();
5026
5027	// Okay, this is a cast from a function to a different type. Unless doing so
5028	// would cause a type conversion of one of our arguments, change this call to
5029	// be a direct call with arguments casted to the appropriate types.
5030	FunctionType *FT = Callee->getFunctionType();
5031	Type *OldRetTy = Caller->getType();
5032	Type *NewRetTy = FT->getReturnType();
5033
5034	// Check to see if we are changing the return type...
5035	if (OldRetTy != NewRetTy) {
5036
5037	if (NewRetTy->isStructTy())
5038	return false; // TODO: Handle multiple return values.
5039
5040	if (!CastInst::isBitOrNoopPointerCastable(SrcTy: NewRetTy, DestTy: OldRetTy, DL)) {
5041	if (!Caller->use_empty())
5042	return false; // Cannot transform this return value.
5043	}
5044
5045	if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
5046	AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5047	if (RAttrs.overlaps(AM: AttributeFuncs::typeIncompatible(
5048	Ty: NewRetTy, AS: CallerPAL.getRetAttrs())))
5049	return false; // Attribute not compatible with transformed value.
5050	}
5051
5052	// If the callbase is an invoke instruction, and the return value is
5053	// used by a PHI node in a successor, we cannot change the return type of
5054	// the call because there is no place to put the cast instruction (without
5055	// breaking the critical edge). Bail out in this case.
5056	if (!Caller->use_empty()) {
5057	BasicBlock PhisNotSupportedBlock = nullptr*;
5058	if (auto *II = dyn_cast<InvokeInst>(Val: Caller))
5059	PhisNotSupportedBlock = II->getNormalDest();
5060	if (PhisNotSupportedBlock)
5061	for (User *U : Caller->users())
5062	if (PHINode *PN = dyn_cast<PHINode>(Val: U))
5063	if (PN->getParent() == PhisNotSupportedBlock)
5064	return false;
5065	}
5066	}
5067
5068	unsigned NumActualArgs = Call.arg_size();
5069	unsigned NumCommonArgs = std::min(a: FT->getNumParams(), b: NumActualArgs);
5070
5071	// Prevent us turning:
5072	// declare void @takes_i32_inalloca(i32 inalloca)*
5073	// call void bitcast (void (i32)* @takes_i32_inalloca to void (i32))(i32 0)
5074	//
5075	// into:
5076	// call void @takes_i32_inalloca(i32 null)*
5077	//
5078	// Similarly, avoid folding away bitcasts of byval calls.
5079	if (Callee->getAttributes().hasAttrSomewhere(Kind: Attribute::InAlloca) \|\|
5080	Callee->getAttributes().hasAttrSomewhere(Kind: Attribute::Preallocated))
5081	return false;
5082
5083	auto AI = Call.arg_begin();
5084	for (unsigned i = `0`, e = NumCommonArgs; i != e; ++i, ++AI) {
5085	Type *ParamTy = FT->getParamType(i);
5086	Type ActTy = (AI)->getType();
5087
5088	if (!CastInst::isBitOrNoopPointerCastable(SrcTy: ActTy, DestTy: ParamTy, DL))
5089	return false; // Cannot transform this parameter value.
5090
5091	// Check if there are any incompatible attributes we cannot drop safely.
5092	if (AttrBuilder (FT->getContext(), CallerPAL.getParamAttrs(ArgNo: i))
5093	.overlaps(AM: AttributeFuncs::typeIncompatible(
5094	Ty: ParamTy, AS: CallerPAL.getParamAttrs(ArgNo: i),
5095	ASK: AttributeFuncs::ASK_UNSAFE_TO_DROP)))
5096	return false; // Attribute not compatible with transformed value.
5097
5098	if (Call.isInAllocaArgument(ArgNo: i) \|\|
5099	CallerPAL.hasParamAttr(ArgNo: i, Kind: Attribute::Preallocated))
5100	return false; // Cannot transform to and from inalloca/preallocated.
5101
5102	if (CallerPAL.hasParamAttr(ArgNo: i, Kind: Attribute::SwiftError))
5103	return false;
5104
5105	if (CallerPAL.hasParamAttr(ArgNo: i, Kind: Attribute::ByVal) !=
5106	Callee->getAttributes().hasParamAttr(ArgNo: i, Kind: Attribute::ByVal))
5107	return false; // Cannot transform to or from byval.
5108	}
5109
5110	if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
5111	!CallerPAL.isEmpty()) {
5112	// In this case we have more arguments than the new function type, but we
5113	// won't be dropping them. Check that these extra arguments have attributes
5114	// that are compatible with being a vararg call argument.
5115	unsigned SRetIdx;
5116	if (CallerPAL.hasAttrSomewhere(Kind: Attribute::StructRet, Index: &SRetIdx) &&
5117	SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
5118	return false;
5119	}
5120
5121	// Okay, we decided that this is a safe thing to do: go ahead and start
5122	// inserting cast instructions as necessary.
5123	SmallVector<Value *, `8`> Args;
5124	SmallVector<AttributeSet, `8`> ArgAttrs;
5125	Args.reserve(N: NumActualArgs);
5126	ArgAttrs.reserve(N: NumActualArgs);
5127
5128	// Get any return attributes.
5129	AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5130
5131	// If the return value is not being used, the type may not be compatible
5132	// with the existing attributes. Wipe out any problematic attributes.
5133	RAttrs.remove(
5134	AM: AttributeFuncs::typeIncompatible(Ty: NewRetTy, AS: CallerPAL.getRetAttrs()));
5135
5136	LLVMContext &Ctx = Call.getContext();
5137	AI = Call.arg_begin();
5138	for (unsigned i = `0`; i != NumCommonArgs; ++i, ++AI) {
5139	Type *ParamTy = FT->getParamType(i);
5140
5141	Value NewArg = AI;
5142	if ((*AI)->getType() != ParamTy)
5143	NewArg = Builder.CreateBitOrPointerCast(V: *AI, DestTy: ParamTy);
5144	Args.push_back(Elt: NewArg);
5145
5146	// Add any parameter attributes except the ones incompatible with the new
5147	// type. Note that we made sure all incompatible ones are safe to drop.
5148	AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
5149	Ty: ParamTy, AS: CallerPAL.getParamAttrs(ArgNo: i), ASK: AttributeFuncs::ASK_SAFE_TO_DROP);
5150	ArgAttrs.push_back(
5151	Elt: CallerPAL.getParamAttrs(ArgNo: i).removeAttributes(C&: Ctx, AttrsToRemove: IncompatibleAttrs));
5152	}
5153
5154	// If the function takes more arguments than the call was taking, add them
5155	// now.
5156	for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
5157	Args.push_back(Elt: Constant::getNullValue(Ty: FT->getParamType(i)));
5158	ArgAttrs.push_back(Elt: AttributeSet ());
5159	}
5160
5161	// If we are removing arguments to the function, emit an obnoxious warning.
5162	if (FT->getNumParams() < NumActualArgs) {
5163	// TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
5164	if (FT->isVarArg()) {
5165	// Add all of the arguments in their promoted form to the arg list.
5166	for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
5167	Type PTy = getPromotedType(Ty: (AI)->getType());
5168	Value NewArg = AI;
5169	if (PTy != (*AI)->getType()) {
5170	// Must promote to pass through va_arg area!
5171	Instruction::CastOps opcode =
5172	CastInst::getCastOpcode(Val: AI, SrcIsSigned: false, Ty: PTy, DstIsSigned: false*);
5173	NewArg = Builder.CreateCast(Op: opcode, V: *AI, DestTy: PTy);
5174	}
5175	Args.push_back(Elt: NewArg);
5176
5177	// Add any parameter attributes.
5178	ArgAttrs.push_back(Elt: CallerPAL.getParamAttrs(ArgNo: i));
5179	}
5180	}
5181	}
5182
5183	AttributeSet FnAttrs = CallerPAL.getFnAttrs();
5184
5185	if (NewRetTy->isVoidTy())
5186	Caller->setName(""); // Void type should not have a name.
5187
5188	assert((ArgAttrs.size() == FT->getNumParams() \|\| FT->isVarArg()) &&
5189	"missing argument attributes");
5190	AttributeList NewCallerPAL = AttributeList::get(
5191	C&: Ctx, FnAttrs, RetAttrs: AttributeSet::get(C&: Ctx, B: RAttrs), ArgAttrs);
5192
5193	SmallVector<OperandBundleDef, `1`> OpBundles;
5194	Call.getOperandBundlesAsDefs(Defs&: OpBundles);
5195
5196	CallBase *NewCall;
5197	if (InvokeInst *II = dyn_cast<InvokeInst>(Val: Caller)) {
5198	NewCall = Builder.CreateInvoke(Callee, NormalDest: II->getNormalDest(),
5199	UnwindDest: II->getUnwindDest(), Args, OpBundles);
5200	} else {
5201	NewCall = Builder.CreateCall(Callee, Args, OpBundles);
5202	cast<CallInst>(Val: NewCall)->setTailCallKind(
5203	cast<CallInst>(Val: Caller)->getTailCallKind());
5204	}
5205	NewCall->takeName(V: Caller);
5206	NewCall->setCallingConv(Call.getCallingConv());
5207	NewCall->setAttributes(NewCallerPAL);
5208
5209	// Preserve prof metadata if any.
5210	NewCall->copyMetadata(SrcInst: *Caller, WL: {LLVMContext::MD_prof});
5211
5212	// Insert a cast of the return type as necessary.
5213	Instruction *NC = NewCall;
5214	Value *NV = NC;
5215	if (OldRetTy != NV->getType() && !Caller->use_empty()) {
5216	assert(!NV->getType()->isVoidTy());
5217	NV = NC = CastInst::CreateBitOrPointerCast(S: NC, Ty: OldRetTy);
5218	NC->setDebugLoc(Caller->getDebugLoc());
5219
5220	auto OptInsertPt = NewCall->getInsertionPointAfterDef();
5221	assert(OptInsertPt && "No place to insert cast");
5222	InsertNewInstBefore(New: NC, Old: *OptInsertPt);
5223	Worklist.pushUsersToWorkList(I&: *Caller);
5224	}
5225
5226	if (!Caller->use_empty())
5227	replaceInstUsesWith(I&: *Caller, V: NV);
5228	else if (Caller->hasValueHandle()) {
5229	if (OldRetTy == NV->getType())
5230	ValueHandleBase::ValueIsRAUWd(Old: Caller, New: NV);
5231	else
5232	// We cannot call ValueIsRAUWd with a different type, and the
5233	// actual tracked value will disappear.
5234	ValueHandleBase::ValueIsDeleted(V: Caller);
5235	}
5236
5237	eraseInstFromFunction(I&: *Caller);
5238	return true;
5239	}
5240
5241	/// Turn a call to a function created by init_trampoline / adjust_trampoline
5242	/// intrinsic pair into a direct call to the underlying function.
5243	Instruction *
5244	InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
5245	IntrinsicInst &Tramp) {
5246	FunctionType *FTy = Call.getFunctionType();
5247	AttributeList Attrs = Call.getAttributes();
5248
5249	// If the call already has the 'nest' attribute somewhere then give up -
5250	// otherwise 'nest' would occur twice after splicing in the chain.
5251	if (Attrs.hasAttrSomewhere(Kind: Attribute::Nest))
5252	return nullptr;
5253
5254	Function *NestF = cast<Function>(Val: Tramp.getArgOperand(i: `1`)->stripPointerCasts());
5255	FunctionType *NestFTy = NestF->getFunctionType();
5256
5257	AttributeList NestAttrs = NestF->getAttributes();
5258	if (!NestAttrs.isEmpty()) {
5259	unsigned NestArgNo = `0`;
5260	Type NestTy = nullptr*;
5261	AttributeSet NestAttr;
5262
5263	// Look for a parameter marked with the 'nest' attribute.
5264	for (FunctionType::param_iterator I = NestFTy->param_begin(),
5265	E = NestFTy->param_end();
5266	I != E; ++NestArgNo, ++I) {
5267	AttributeSet AS = NestAttrs.getParamAttrs(ArgNo: NestArgNo);
5268	if (AS.hasAttribute(Kind: Attribute::Nest)) {
5269	// Record the parameter type and any other attributes.
5270	NestTy = *I;
5271	NestAttr = AS;
5272	break;
5273	}
5274	}
5275
5276	if (NestTy) {
5277	std::vector<Value*> NewArgs;
5278	std::vector<AttributeSet> NewArgAttrs;
5279	NewArgs.reserve(n: Call.arg_size() + `1`);
5280	NewArgAttrs.reserve(n: Call.arg_size());
5281
5282	// Insert the nest argument into the call argument list, which may
5283	// mean appending it. Likewise for attributes.
5284
5285	{
5286	unsigned ArgNo = `0`;
5287	auto I = Call.arg_begin(), E = Call.arg_end();
5288	do {
5289	if (ArgNo == NestArgNo) {
5290	// Add the chain argument and attributes.
5291	Value *NestVal = Tramp.getArgOperand(i: `2`);
5292	if (NestVal->getType() != NestTy)
5293	NestVal = Builder.CreateBitCast(V: NestVal, DestTy: NestTy, Name: "nest");
5294	NewArgs.push_back(x: NestVal);
5295	NewArgAttrs.push_back(x: NestAttr);
5296	}
5297
5298	if (I == E)
5299	break;
5300
5301	// Add the original argument and attributes.
5302	NewArgs.push_back(x: *I);
5303	NewArgAttrs.push_back(x: Attrs.getParamAttrs(ArgNo));
5304
5305	++ArgNo;
5306	++I;
5307	} while (true);
5308	}
5309
5310	// The trampoline may have been bitcast to a bogus type (FTy).
5311	// Handle this by synthesizing a new function type, equal to FTy
5312	// with the chain parameter inserted.
5313
5314	std::vector<Type*> NewTypes;
5315	NewTypes.reserve(n: FTy->getNumParams()+`1`);
5316
5317	// Insert the chain's type into the list of parameter types, which may
5318	// mean appending it.
5319	{
5320	unsigned ArgNo = `0`;
5321	FunctionType::param_iterator I = FTy->param_begin(),
5322	E = FTy->param_end();
5323
5324	do {
5325	if (ArgNo == NestArgNo)
5326	// Add the chain's type.
5327	NewTypes.push_back(x: NestTy);
5328
5329	if (I == E)
5330	break;
5331
5332	// Add the original type.
5333	NewTypes.push_back(x: *I);
5334
5335	++ArgNo;
5336	++I;
5337	} while (true);
5338	}
5339
5340	// Replace the trampoline call with a direct call. Let the generic
5341	// code sort out any function type mismatches.
5342	FunctionType *NewFTy =
5343	FunctionType::get(Result: FTy->getReturnType(), Params: NewTypes, isVarArg: FTy->isVarArg());
5344	AttributeList NewPAL =
5345	AttributeList::get(C&: FTy->getContext(), FnAttrs: Attrs.getFnAttrs(),
5346	RetAttrs: Attrs.getRetAttrs(), ArgAttrs: NewArgAttrs);
5347
5348	SmallVector<OperandBundleDef, `1`> OpBundles;
5349	Call.getOperandBundlesAsDefs(Defs&: OpBundles);
5350
5351	Instruction *NewCaller;
5352	if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &Call)) {
5353	NewCaller = InvokeInst::Create(Ty: NewFTy, Func: NestF, IfNormal: II->getNormalDest(),
5354	IfException: II->getUnwindDest(), Args: NewArgs, Bundles: OpBundles);
5355	cast<InvokeInst>(Val: NewCaller)->setCallingConv(II->getCallingConv());
5356	cast<InvokeInst>(Val: NewCaller)->setAttributes(NewPAL);
5357	} else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Val: &Call)) {
5358	NewCaller =
5359	CallBrInst::Create(Ty: NewFTy, Func: NestF, DefaultDest: CBI->getDefaultDest(),
5360	IndirectDests: CBI->getIndirectDests(), Args: NewArgs, Bundles: OpBundles);
5361	cast<CallBrInst>(Val: NewCaller)->setCallingConv(CBI->getCallingConv());
5362	cast<CallBrInst>(Val: NewCaller)->setAttributes(NewPAL);
5363	} else {
5364	NewCaller = CallInst::Create(Ty: NewFTy, Func: NestF, Args: NewArgs, Bundles: OpBundles);
5365	cast<CallInst>(Val: NewCaller)->setTailCallKind(
5366	cast<CallInst>(Val&: Call).getTailCallKind());
5367	cast<CallInst>(Val: NewCaller)->setCallingConv(
5368	cast<CallInst>(Val&: Call).getCallingConv());
5369	cast<CallInst>(Val: NewCaller)->setAttributes(NewPAL);
5370	}
5371	NewCaller->setDebugLoc(Call.getDebugLoc());
5372
5373	return NewCaller;
5374	}
5375	}
5376
5377	// Replace the trampoline call with a direct call. Since there is no 'nest'
5378	// parameter, there is no need to adjust the argument list. Let the generic
5379	// code sort out any function type mismatches.
5380	Call.setCalledFunction(FTy, Fn: NestF);
5381	return &Call;
5382	}
5383

Browse the source code of llvm_projects/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp