InstCombineCalls.cpp source code [llvm_projects/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp]

1	//===- InstCombineCalls.cpp -----------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the visitCall, visitInvoke, and visitCallBr functions.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "InstCombineInternal.h"
14	#include "llvm/ADT/APFloat.h"
15	#include "llvm/ADT/APInt.h"
16	#include "llvm/ADT/APSInt.h"
17	#include "llvm/ADT/ArrayRef.h"
18	#include "llvm/ADT/STLFunctionalExtras.h"
19	#include "llvm/ADT/SmallBitVector.h"
20	#include "llvm/ADT/SmallVector.h"
21	#include "llvm/ADT/Statistic.h"
22	#include "llvm/Analysis/AliasAnalysis.h"
23	#include "llvm/Analysis/AssumeBundleQueries.h"
24	#include "llvm/Analysis/AssumptionCache.h"
25	#include "llvm/Analysis/InstructionSimplify.h"
26	#include "llvm/Analysis/Loads.h"
27	#include "llvm/Analysis/MemoryBuiltins.h"
28	#include "llvm/Analysis/ValueTracking.h"
29	#include "llvm/Analysis/VectorUtils.h"
30	#include "llvm/IR/AttributeMask.h"
31	#include "llvm/IR/Attributes.h"
32	#include "llvm/IR/BasicBlock.h"
33	#include "llvm/IR/Constant.h"
34	#include "llvm/IR/Constants.h"
35	#include "llvm/IR/DataLayout.h"
36	#include "llvm/IR/DebugInfo.h"
37	#include "llvm/IR/DerivedTypes.h"
38	#include "llvm/IR/Function.h"
39	#include "llvm/IR/GlobalVariable.h"
40	#include "llvm/IR/InlineAsm.h"
41	#include "llvm/IR/InstrTypes.h"
42	#include "llvm/IR/Instruction.h"
43	#include "llvm/IR/Instructions.h"
44	#include "llvm/IR/IntrinsicInst.h"
45	#include "llvm/IR/Intrinsics.h"
46	#include "llvm/IR/IntrinsicsAArch64.h"
47	#include "llvm/IR/IntrinsicsAMDGPU.h"
48	#include "llvm/IR/IntrinsicsARM.h"
49	#include "llvm/IR/IntrinsicsHexagon.h"
50	#include "llvm/IR/LLVMContext.h"
51	#include "llvm/IR/Metadata.h"
52	#include "llvm/IR/PatternMatch.h"
53	#include "llvm/IR/Statepoint.h"
54	#include "llvm/IR/Type.h"
55	#include "llvm/IR/User.h"
56	#include "llvm/IR/Value.h"
57	#include "llvm/IR/ValueHandle.h"
58	#include "llvm/Support/AtomicOrdering.h"
59	#include "llvm/Support/Casting.h"
60	#include "llvm/Support/CommandLine.h"
61	#include "llvm/Support/Compiler.h"
62	#include "llvm/Support/Debug.h"
63	#include "llvm/Support/ErrorHandling.h"
64	#include "llvm/Support/KnownBits.h"
65	#include "llvm/Support/KnownFPClass.h"
66	#include "llvm/Support/MathExtras.h"
67	#include "llvm/Support/raw_ostream.h"
68	#include "llvm/Transforms/InstCombine/InstCombiner.h"
69	#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
70	#include "llvm/Transforms/Utils/Local.h"
71	#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
72	#include <algorithm>
73	#include <cassert>
74	#include <cstdint>
75	#include <optional>
76	#include <utility>
77	#include <vector>
78
79	#define DEBUG_TYPE "instcombine"
80	#include "llvm/Transforms/Utils/InstructionWorklist.h"
81
82	using namespace llvm;
83	using namespace PatternMatch;
84
85	STATISTIC(NumSimplified, "Number of library calls simplified");
86
87	static cl::opt<unsigned> GuardWideningWindow(
88	"instcombine-guard-widening-window",
89	cl::init(Val: `3`),
90	cl::desc ("How wide an instruction window to bypass looking for "
91	"another guard"));
92
93	/// Return the specified type promoted as it would be to pass though a va_arg
94	/// area.
95	static Type getPromotedType(Type Ty) {
96	if (IntegerType* ITy = dyn_cast<IntegerType>(Val: Ty)) {
97	if (ITy->getBitWidth() < `32`)
98	return Type::getInt32Ty(C&: Ty->getContext());
99	}
100	return Ty;
101	}
102
103	/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
104	/// TODO: This should probably be integrated with visitAllocSites, but that
105	/// requires a deeper change to allow either unread or unwritten objects.
106	static bool hasUndefSource(AnyMemTransferInst *MI) {
107	auto *Src = MI->getRawSource();
108	while (isa<GetElementPtrInst>(Val: Src)) {
109	if (!Src->hasOneUse())
110	return false;
111	Src = cast<Instruction>(Val: Src)->getOperand(i: `0`);
112	}
113	return isa<AllocaInst>(Val: Src) && Src->hasOneUse();
114	}
115
116	Instruction InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst MI) {
117	Align DstAlign = getKnownAlignment(V: MI->getRawDest(), DL, CxtI: MI, AC: &AC, DT: &DT);
118	MaybeAlign CopyDstAlign = MI->getDestAlign();
119	if (!CopyDstAlign \|\| *CopyDstAlign < DstAlign) {
120	MI->setDestAlignment(DstAlign);
121	return MI;
122	}
123
124	Align SrcAlign = getKnownAlignment(V: MI->getRawSource(), DL, CxtI: MI, AC: &AC, DT: &DT);
125	MaybeAlign CopySrcAlign = MI->getSourceAlign();
126	if (!CopySrcAlign \|\| *CopySrcAlign < SrcAlign) {
127	MI->setSourceAlignment(SrcAlign);
128	return MI;
129	}
130
131	// If we have a store to a location which is known constant, we can conclude
132	// that the store must be storing the constant value (else the memory
133	// wouldn't be constant), and this must be a noop.
134	if (!isModSet(MRI: AA->getModRefInfoMask(P: MI->getDest()))) {
135	// Set the size of the copy to 0, it will be deleted on the next iteration.
136	MI->setLength(Constant::getNullValue(Ty: MI->getLength()->getType()));
137	return MI;
138	}
139
140	// If the source is provably undef, the memcpy/memmove doesn't do anything
141	// (unless the transfer is volatile).
142	if (hasUndefSource(MI) && !MI->isVolatile()) {
143	// Set the size of the copy to 0, it will be deleted on the next iteration.
144	MI->setLength(Constant::getNullValue(Ty: MI->getLength()->getType()));
145	return MI;
146	}
147
148	// If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
149	// load/store.
150	ConstantInt *MemOpLength = dyn_cast<ConstantInt>(Val: MI->getLength());
151	if (!MemOpLength) return nullptr;
152
153	// Source and destination pointer types are always "i8" for intrinsic. See*
154	// if the size is something we can handle with a single primitive load/store.
155	// A single load+store correctly handles overlapping memory in the memmove
156	// case.
157	uint64_t Size = MemOpLength->getLimitedValue();
158	assert(Size && "0-sized memory transferring should be removed already.");
159
160	if (Size > `8` \|\| (Size&(Size-`1`)))
161	return nullptr; // If not 1/2/4/8 bytes, exit.
162
163	// If it is an atomic and alignment is less than the size then we will
164	// introduce the unaligned memory access which will be later transformed
165	// into libcall in CodeGen. This is not evident performance gain so disable
166	// it now.
167	if (MI->isAtomic())
168	if (CopyDstAlign < Size \|\| CopySrcAlign < Size)
169	return nullptr;
170
171	// Use an integer load+store unless we can find something better.
172	IntegerType* IntType = IntegerType::get(C&: MI->getContext(), NumBits: Size<<`3`);
173
174	// If the memcpy has metadata describing the members, see if we can get the
175	// TBAA, scope and noalias tags describing our copy.
176	AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(AccessSize: Size);
177
178	Value *Src = MI->getArgOperand(i: `1`);
179	Value *Dest = MI->getArgOperand(i: `0`);
180	LoadInst *L = Builder.CreateLoad(Ty: IntType, Ptr: Src);
181	// Alignment from the mem intrinsic will be better, so use it.
182	L->setAlignment(*CopySrcAlign);
183	L->setAAMetadata(AACopyMD);
184	MDNode *LoopMemParallelMD =
185	MI->getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access);
186	if (LoopMemParallelMD)
187	L->setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access, Node: LoopMemParallelMD);
188	MDNode *AccessGroupMD = MI->getMetadata(KindID: LLVMContext::MD_access_group);
189	if (AccessGroupMD)
190	L->setMetadata(KindID: LLVMContext::MD_access_group, Node: AccessGroupMD);
191
192	StoreInst *S = Builder.CreateStore(Val: L, Ptr: Dest);
193	// Alignment from the mem intrinsic will be better, so use it.
194	S->setAlignment(*CopyDstAlign);
195	S->setAAMetadata(AACopyMD);
196	if (LoopMemParallelMD)
197	S->setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access, Node: LoopMemParallelMD);
198	if (AccessGroupMD)
199	S->setMetadata(KindID: LLVMContext::MD_access_group, Node: AccessGroupMD);
200	S->copyMetadata(SrcInst: *MI, WL: LLVMContext::MD_DIAssignID);
201
202	if (auto *MT = dyn_cast<MemTransferInst>(Val: MI)) {
203	// non-atomics can be volatile
204	L->setVolatile(MT->isVolatile());
205	S->setVolatile(MT->isVolatile());
206	}
207	if (MI->isAtomic()) {
208	// atomics have to be unordered
209	L->setOrdering(AtomicOrdering::Unordered);
210	S->setOrdering(AtomicOrdering::Unordered);
211	}
212
213	// Set the size of the copy to 0, it will be deleted on the next iteration.
214	MI->setLength(Constant::getNullValue(Ty: MemOpLength->getType()));
215	return MI;
216	}
217
218	Instruction InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst MI) {
219	const Align KnownAlignment =
220	getKnownAlignment(V: MI->getDest(), DL, CxtI: MI, AC: &AC, DT: &DT);
221	MaybeAlign MemSetAlign = MI->getDestAlign();
222	if (!MemSetAlign \|\| *MemSetAlign < KnownAlignment) {
223	MI->setDestAlignment(KnownAlignment);
224	return MI;
225	}
226
227	// If we have a store to a location which is known constant, we can conclude
228	// that the store must be storing the constant value (else the memory
229	// wouldn't be constant), and this must be a noop.
230	if (!isModSet(MRI: AA->getModRefInfoMask(P: MI->getDest()))) {
231	// Set the size of the copy to 0, it will be deleted on the next iteration.
232	MI->setLength(Constant::getNullValue(Ty: MI->getLength()->getType()));
233	return MI;
234	}
235
236	// Remove memset with an undef value.
237	// FIXME: This is technically incorrect because it might overwrite a poison
238	// value. Change to PoisonValue once #52930 is resolved.
239	if (isa<UndefValue>(Val: MI->getValue())) {
240	// Set the size of the copy to 0, it will be deleted on the next iteration.
241	MI->setLength(Constant::getNullValue(Ty: MI->getLength()->getType()));
242	return MI;
243	}
244
245	// Extract the length and alignment and fill if they are constant.
246	ConstantInt *LenC = dyn_cast<ConstantInt>(Val: MI->getLength());
247	ConstantInt *FillC = dyn_cast<ConstantInt>(Val: MI->getValue());
248	if (!LenC \|\| !FillC \|\| !FillC->getType()->isIntegerTy(Bitwidth: `8`))
249	return nullptr;
250	const uint64_t Len = LenC->getLimitedValue();
251	assert(Len && "0-sized memory setting should be removed already.");
252	const Align Alignment = MI->getDestAlign().valueOrOne();
253
254	// If it is an atomic and alignment is less than the size then we will
255	// introduce the unaligned memory access which will be later transformed
256	// into libcall in CodeGen. This is not evident performance gain so disable
257	// it now.
258	if (MI->isAtomic() && Alignment < Len)
259	return nullptr;
260
261	// memset(s,c,n) -> store s, c (for n=1,2,4,8)
262	if (Len <= `8` && isPowerOf2_32(Value: (uint32_t)Len)) {
263	Value *Dest = MI->getDest();
264
265	// Extract the fill value and store.
266	Constant *FillVal = ConstantInt::get(
267	Context&: MI->getContext(), V: APInt::getSplat(NewLen: Len * `8`, V: FillC->getValue()));
268	StoreInst *S = Builder.CreateStore(Val: FillVal, Ptr: Dest, isVolatile: MI->isVolatile());
269	S->copyMetadata(SrcInst: *MI, WL: LLVMContext::MD_DIAssignID);
270	auto replaceOpForAssignmentMarkers = [FillC, FillVal](auto *DbgAssign) {
271	if (llvm::is_contained(DbgAssign->location_ops(), FillC))
272	DbgAssign->replaceVariableLocationOp(FillC, FillVal);
273	};
274	for_each(Range: at::getAssignmentMarkers(Inst: S), F: replaceOpForAssignmentMarkers);
275	for_each(Range: at::getDVRAssignmentMarkers(Inst: S), F: replaceOpForAssignmentMarkers);
276
277	S->setAlignment(Alignment);
278	if (MI->isAtomic())
279	S->setOrdering(AtomicOrdering::Unordered);
280
281	// Set the size of the copy to 0, it will be deleted on the next iteration.
282	MI->setLength(Constant::getNullValue(Ty: LenC->getType()));
283	return MI;
284	}
285
286	return nullptr;
287	}
288
289	// TODO, Obvious Missing Transforms:
290	// Narrow width by halfs excluding zero/undef lanes*
291	Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
292	Value *LoadPtr = II.getArgOperand(i: `0`);
293	const Align Alignment =
294	cast<ConstantInt>(Val: II.getArgOperand(i: `1`))->getAlignValue();
295
296	// If the mask is all ones or undefs, this is a plain vector load of the 1st
297	// argument.
298	if (maskIsAllOneOrUndef(Mask: II.getArgOperand(i: `2`))) {
299	LoadInst *L = Builder.CreateAlignedLoad(Ty: II.getType(), Ptr: LoadPtr, Align: Alignment,
300	Name: "unmaskedload");
301	L->copyMetadata(SrcInst: II);
302	return L;
303	}
304
305	// If we can unconditionally load from this address, replace with a
306	// load/select idiom. TODO: use DT for context sensitive query
307	if (isDereferenceablePointer(V: LoadPtr, Ty: II.getType(),
308	DL: II.getDataLayout(), CtxI: &II, AC: &AC)) {
309	LoadInst *LI = Builder.CreateAlignedLoad(Ty: II.getType(), Ptr: LoadPtr, Align: Alignment,
310	Name: "unmaskedload");
311	LI->copyMetadata(SrcInst: II);
312	return Builder.CreateSelect(C: II.getArgOperand(i: `2`), True: LI, False: II.getArgOperand(i: `3`));
313	}
314
315	return nullptr;
316	}
317
318	// TODO, Obvious Missing Transforms:
319	// Single constant active lane -> store*
320	// Narrow width by halfs excluding zero/undef lanes*
321	Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
322	auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: `3`));
323	if (!ConstMask)
324	return nullptr;
325
326	// If the mask is all zeros, this instruction does nothing.
327	if (ConstMask->isNullValue())
328	return eraseInstFromFunction(I&: II);
329
330	// If the mask is all ones, this is a plain vector store of the 1st argument.
331	if (ConstMask->isAllOnesValue()) {
332	Value *StorePtr = II.getArgOperand(i: `1`);
333	Align Alignment = cast<ConstantInt>(Val: II.getArgOperand(i: `2`))->getAlignValue();
334	StoreInst *S =
335	new StoreInst (II.getArgOperand(i: `0`), StorePtr, false, Alignment);
336	S->copyMetadata(SrcInst: II);
337	return S;
338	}
339
340	if (isa<ScalableVectorType>(Val: ConstMask->getType()))
341	return nullptr;
342
343	// Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
344	APInt DemandedElts = possiblyDemandedEltsInMask(Mask: ConstMask);
345	APInt PoisonElts(DemandedElts.getBitWidth(), `0`);
346	if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: `0`), DemandedElts,
347	PoisonElts))
348	return replaceOperand(I&: II, OpNum: `0`, V);
349
350	return nullptr;
351	}
352
353	// TODO, Obvious Missing Transforms:
354	// Single constant active lane load -> load*
355	// Dereferenceable address & few lanes -> scalarize speculative load/selects*
356	// Adjacent vector addresses -> masked.load*
357	// Narrow width by halfs excluding zero/undef lanes*
358	// Vector incrementing address -> vector masked load*
359	Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
360	auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: `2`));
361	if (!ConstMask)
362	return nullptr;
363
364	// Vector splat address w/known mask -> scalar load
365	// Fold the gather to load the source vector first lane
366	// because it is reloading the same value each time
367	if (ConstMask->isAllOnesValue())
368	if (auto *SplatPtr = getSplatValue(V: II.getArgOperand(i: `0`))) {
369	auto *VecTy = cast<VectorType>(Val: II.getType());
370	const Align Alignment =
371	cast<ConstantInt>(Val: II.getArgOperand(i: `1`))->getAlignValue();
372	LoadInst *L = Builder.CreateAlignedLoad(Ty: VecTy->getElementType(), Ptr: SplatPtr,
373	Align: Alignment, Name: "load.scalar");
374	Value *Shuf =
375	Builder.CreateVectorSplat(EC: VecTy->getElementCount(), V: L, Name: "broadcast");
376	return replaceInstUsesWith(I&: II, V: cast<Instruction>(Val: Shuf));
377	}
378
379	return nullptr;
380	}
381
382	// TODO, Obvious Missing Transforms:
383	// Single constant active lane -> store*
384	// Adjacent vector addresses -> masked.store*
385	// Narrow store width by halfs excluding zero/undef lanes*
386	// Vector incrementing address -> vector masked store*
387	Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
388	auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: `3`));
389	if (!ConstMask)
390	return nullptr;
391
392	// If the mask is all zeros, a scatter does nothing.
393	if (ConstMask->isNullValue())
394	return eraseInstFromFunction(I&: II);
395
396	// Vector splat address -> scalar store
397	if (auto *SplatPtr = getSplatValue(V: II.getArgOperand(i: `1`))) {
398	// scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
399	if (auto *SplatValue = getSplatValue(V: II.getArgOperand(i: `0`))) {
400	if (maskContainsAllOneOrUndef(Mask: ConstMask)) {
401	Align Alignment =
402	cast<ConstantInt>(Val: II.getArgOperand(i: `2`))->getAlignValue();
403	StoreInst S = new* StoreInst (SplatValue, SplatPtr, /IsVolatile=/false,
404	Alignment);
405	S->copyMetadata(SrcInst: II);
406	return S;
407	}
408	}
409	// scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
410	// lastlane), ptr
411	if (ConstMask->isAllOnesValue()) {
412	Align Alignment = cast<ConstantInt>(Val: II.getArgOperand(i: `2`))->getAlignValue();
413	VectorType *WideLoadTy = cast<VectorType>(Val: II.getArgOperand(i: `1`)->getType());
414	ElementCount VF = WideLoadTy->getElementCount();
415	Value *RunTimeVF = Builder.CreateElementCount(Ty: Builder.getInt32Ty(), EC: VF);
416	Value *LastLane = Builder.CreateSub(LHS: RunTimeVF, RHS: Builder.getInt32(C: `1`));
417	Value *Extract =
418	Builder.CreateExtractElement(Vec: II.getArgOperand(i: `0`), Idx: LastLane);
419	StoreInst *S =
420	new StoreInst (Extract, SplatPtr, /IsVolatile=/false, Alignment);
421	S->copyMetadata(SrcInst: II);
422	return S;
423	}
424	}
425	if (isa<ScalableVectorType>(Val: ConstMask->getType()))
426	return nullptr;
427
428	// Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
429	APInt DemandedElts = possiblyDemandedEltsInMask(Mask: ConstMask);
430	APInt PoisonElts(DemandedElts.getBitWidth(), `0`);
431	if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: `0`), DemandedElts,
432	PoisonElts))
433	return replaceOperand(I&: II, OpNum: `0`, V);
434	if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: `1`), DemandedElts,
435	PoisonElts))
436	return replaceOperand(I&: II, OpNum: `1`, V);
437
438	return nullptr;
439	}
440
441	/// This function transforms launder.invariant.group and strip.invariant.group
442	/// like:
443	/// launder(launder(%x)) -> launder(%x) (the result is not the argument)
444	/// launder(strip(%x)) -> launder(%x)
445	/// strip(strip(%x)) -> strip(%x) (the result is not the argument)
446	/// strip(launder(%x)) -> strip(%x)
447	/// This is legal because it preserves the most recent information about
448	/// the presence or absence of invariant.group.
449	static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II,
450	InstCombinerImpl &IC) {
451	auto *Arg = II.getArgOperand(i: `0`);
452	auto *StrippedArg = Arg->stripPointerCasts();
453	auto *StrippedInvariantGroupsArg = StrippedArg;
454	while (auto *Intr = dyn_cast<IntrinsicInst>(Val: StrippedInvariantGroupsArg)) {
455	if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
456	Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
457	break;
458	StrippedInvariantGroupsArg = Intr->getArgOperand(i: `0`)->stripPointerCasts();
459	}
460	if (StrippedArg == StrippedInvariantGroupsArg)
461	return nullptr; // No launders/strips to remove.
462
463	Value Result = nullptr*;
464
465	if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
466	Result = IC.Builder.CreateLaunderInvariantGroup(Ptr: StrippedInvariantGroupsArg);
467	else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
468	Result = IC.Builder.CreateStripInvariantGroup(Ptr: StrippedInvariantGroupsArg);
469	else
470	llvm_unreachable(
471	"simplifyInvariantGroupIntrinsic only handles launder and strip");
472	if (Result->getType()->getPointerAddressSpace() !=
473	II.getType()->getPointerAddressSpace())
474	Result = IC.Builder.CreateAddrSpaceCast(V: Result, DestTy: II.getType());
475
476	return cast<Instruction>(Val: Result);
477	}
478
479	static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
480	assert((II.getIntrinsicID() == Intrinsic::cttz \|\|
481	II.getIntrinsicID() == Intrinsic::ctlz) &&
482	"Expected cttz or ctlz intrinsic");
483	bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
484	Value *Op0 = II.getArgOperand(i: `0`);
485	Value *Op1 = II.getArgOperand(i: `1`);
486	Value *X;
487	// ctlz(bitreverse(x)) -> cttz(x)
488	// cttz(bitreverse(x)) -> ctlz(x)
489	if (match(V: Op0, P: m_BitReverse(Op0: m_Value(V&: X)))) {
490	Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
491	Function *F =
492	Intrinsic::getOrInsertDeclaration(M: II.getModule(), id: ID, Tys: II.getType());
493	return CallInst::Create(Func: F, Args: {X, II.getArgOperand(i: `1`)});
494	}
495
496	if (II.getType()->isIntOrIntVectorTy(BitWidth: `1`)) {
497	// ctlz/cttz i1 Op0 --> not Op0
498	if (match(V: Op1, P: m_Zero()))
499	return BinaryOperator::CreateNot(Op: Op0);
500	// If zero is poison, then the input can be assumed to be "true", so the
501	// instruction simplifies to "false".
502	assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
503	return IC.replaceInstUsesWith(I&: II, V: ConstantInt::getNullValue(Ty: II.getType()));
504	}
505
506	// If ctlz/cttz is only used as a shift amount, set is_zero_poison to true.
507	if (II.hasOneUse() && match(V: Op1, P: m_Zero()) &&
508	match(V: II.user_back(), P: m_Shift(L: m_Value(), R: m_Specific(V: &II)))) {
509	II.dropUBImplyingAttrsAndMetadata();
510	return IC.replaceOperand(I&: II, OpNum: `1`, V: IC.Builder.getTrue());
511	}
512
513	Constant *C;
514
515	if (IsTZ) {
516	// cttz(-x) -> cttz(x)
517	if (match(V: Op0, P: m_Neg(V: m_Value(V&: X))))
518	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
519
520	// cttz(-x & x) -> cttz(x)
521	if (match(V: Op0, P: m_c_And(L: m_Neg(V: m_Value(V&: X)), R: m_Deferred(V: X))))
522	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
523
524	// cttz(sext(x)) -> cttz(zext(x))
525	if (match(V: Op0, P: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: X))))) {
526	auto *Zext = IC.Builder.CreateZExt(V: X, DestTy: II.getType());
527	auto *CttzZext =
528	IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: Zext, RHS: Op1);
529	return IC.replaceInstUsesWith(I&: II, V: CttzZext);
530	}
531
532	// Zext doesn't change the number of trailing zeros, so narrow:
533	// cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
534	if (match(V: Op0, P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X)))) && match(V: Op1, P: m_One())) {
535	auto *Cttz = IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: X,
536	RHS: IC.Builder.getTrue());
537	auto *ZextCttz = IC.Builder.CreateZExt(V: Cttz, DestTy: II.getType());
538	return IC.replaceInstUsesWith(I&: II, V: ZextCttz);
539	}
540
541	// cttz(abs(x)) -> cttz(x)
542	// cttz(nabs(x)) -> cttz(x)
543	Value *Y;
544	SelectPatternFlavor SPF = matchSelectPattern(V: Op0, LHS&: X, RHS&: Y).Flavor;
545	if (SPF == SPF_ABS \|\| SPF == SPF_NABS)
546	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
547
548	if (match(V: Op0, P: m_Intrinsic<Intrinsic::abs>(Op0: m_Value(V&: X))))
549	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
550
551	// cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
552	if (match(V: Op0, P: m_Shl(L: m_ImmConstant(C), R: m_Value(V&: X))) &&
553	match(V: Op1, P: m_One())) {
554	Value *ConstCttz =
555	IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: C, RHS: Op1);
556	return BinaryOperator::CreateAdd(V1: ConstCttz, V2: X);
557	}
558
559	// cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)
560	if (match(V: Op0, P: m_Exact(SubPattern: m_LShr(L: m_ImmConstant(C), R: m_Value(V&: X)))) &&
561	match(V: Op1, P: m_One())) {
562	Value *ConstCttz =
563	IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: C, RHS: Op1);
564	return BinaryOperator::CreateSub(V1: ConstCttz, V2: X);
565	}
566
567	// cttz(add(lshr(UINT_MAX, %val), 1)) --> sub(width, %val)
568	if (match(V: Op0, P: m_Add(L: m_LShr(L: m_AllOnes(), R: m_Value(V&: X)), R: m_One()))) {
569	Value *Width =
570	ConstantInt::get(Ty: II.getType(), V: II.getType()->getScalarSizeInBits());
571	return BinaryOperator::CreateSub(V1: Width, V2: X);
572	}
573	} else {
574	// ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
575	if (match(V: Op0, P: m_LShr(L: m_ImmConstant(C), R: m_Value(V&: X))) &&
576	match(V: Op1, P: m_One())) {
577	Value *ConstCtlz =
578	IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::ctlz, LHS: C, RHS: Op1);
579	return BinaryOperator::CreateAdd(V1: ConstCtlz, V2: X);
580	}
581
582	// ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)
583	if (match(V: Op0, P: m_NUWShl(L: m_ImmConstant(C), R: m_Value(V&: X))) &&
584	match(V: Op1, P: m_One())) {
585	Value *ConstCtlz =
586	IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::ctlz, LHS: C, RHS: Op1);
587	return BinaryOperator::CreateSub(V1: ConstCtlz, V2: X);
588	}
589	}
590
591	// cttz(Pow2) -> Log2(Pow2)
592	// ctlz(Pow2) -> BitWidth - 1 - Log2(Pow2)
593	if (auto *R = IC.tryGetLog2(Op: Op0, AssumeNonZero: match(V: Op1, P: m_One()))) {
594	if (IsTZ)
595	return IC.replaceInstUsesWith(I&: II, V: R);
596	BinaryOperator *BO = BinaryOperator::CreateSub(
597	V1: ConstantInt::get(Ty: R->getType(), V: R->getType()->getScalarSizeInBits() - `1`),
598	V2: R);
599	BO->setHasNoSignedWrap();
600	BO->setHasNoUnsignedWrap();
601	return BO;
602	}
603
604	KnownBits Known = IC.computeKnownBits(V: Op0, CxtI: &II);
605
606	// Create a mask for bits above (ctlz) or below (cttz) the first known one.
607	unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
608	: Known.countMaxLeadingZeros();
609	unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
610	: Known.countMinLeadingZeros();
611
612	// If all bits above (ctlz) or below (cttz) the first known one are known
613	// zero, this value is constant.
614	// FIXME: This should be in InstSimplify because we're replacing an
615	// instruction with a constant.
616	if (PossibleZeros == DefiniteZeros) {
617	auto *C = ConstantInt::get(Ty: Op0->getType(), V: DefiniteZeros);
618	return IC.replaceInstUsesWith(I&: II, V: C);
619	}
620
621	// If the input to cttz/ctlz is known to be non-zero,
622	// then change the 'ZeroIsPoison' parameter to 'true'
623	// because we know the zero behavior can't affect the result.
624	if (!Known.One.isZero() \|\|
625	isKnownNonZero(V: Op0, Q: IC.getSimplifyQuery().getWithInstruction(I: &II))) {
626	if (!match(V: II.getArgOperand(i: `1`), P: m_One()))
627	return IC.replaceOperand(I&: II, OpNum: `1`, V: IC.Builder.getTrue());
628	}
629
630	// Add range attribute since known bits can't completely reflect what we know.
631	unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
632	if (BitWidth != `1` && !II.hasRetAttr(Kind: Attribute::Range) &&
633	!II.getMetadata(KindID: LLVMContext::MD_range)) {
634	ConstantRange Range(APInt (BitWidth, DefiniteZeros),
635	APInt (BitWidth, PossibleZeros + `1`));
636	II.addRangeRetAttr(CR: Range);
637	return &II;
638	}
639
640	return nullptr;
641	}
642
643	static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
644	assert(II.getIntrinsicID() == Intrinsic::ctpop &&
645	"Expected ctpop intrinsic");
646	Type *Ty = II.getType();
647	unsigned BitWidth = Ty->getScalarSizeInBits();
648	Value *Op0 = II.getArgOperand(i: `0`);
649	Value X, Y;
650
651	// ctpop(bitreverse(x)) -> ctpop(x)
652	// ctpop(bswap(x)) -> ctpop(x)
653	if (match(V: Op0, P: m_BitReverse(Op0: m_Value(V&: X))) \|\| match(V: Op0, P: m_BSwap(Op0: m_Value(V&: X))))
654	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
655
656	// ctpop(rot(x)) -> ctpop(x)
657	if ((match(V: Op0, P: m_FShl(Op0: m_Value(V&: X), Op1: m_Value(V&: Y), Op2: m_Value())) \|\|
658	match(V: Op0, P: m_FShr(Op0: m_Value(V&: X), Op1: m_Value(V&: Y), Op2: m_Value()))) &&
659	X == Y)
660	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
661
662	// ctpop(x \| -x) -> bitwidth - cttz(x, false)
663	if (Op0->hasOneUse() &&
664	match(V: Op0, P: m_c_Or(L: m_Value(V&: X), R: m_Neg(V: m_Deferred(V: X))))) {
665	auto *Cttz = IC.Builder.CreateIntrinsic(ID: Intrinsic::cttz, Types: Ty,
666	Args: {X, IC.Builder.getFalse()});
667	auto *Bw = ConstantInt::get(Ty, V: APInt (BitWidth, BitWidth));
668	return IC.replaceInstUsesWith(I&: II, V: IC.Builder.CreateSub(LHS: Bw, RHS: Cttz));
669	}
670
671	// ctpop(~x & (x - 1)) -> cttz(x, false)
672	if (match(V: Op0,
673	P: m_c_And(L: m_Not(V: m_Value(V&: X)), R: m_Add(L: m_Deferred(V: X), R: m_AllOnes())))) {
674	Function *F =
675	Intrinsic::getOrInsertDeclaration(M: II.getModule(), id: Intrinsic::cttz, Tys: Ty);
676	return CallInst::Create(Func: F, Args: {X, IC.Builder.getFalse()});
677	}
678
679	// Zext doesn't change the number of set bits, so narrow:
680	// ctpop (zext X) --> zext (ctpop X)
681	if (match(V: Op0, P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X))))) {
682	Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(ID: Intrinsic::ctpop, V: X);
683	return CastInst::Create(Instruction::ZExt, S: NarrowPop, Ty);
684	}
685
686	KnownBits Known(BitWidth);
687	IC.computeKnownBits(V: Op0, Known, CxtI: &II);
688
689	// If all bits are zero except for exactly one fixed bit, then the result
690	// must be 0 or 1, and we can get that answer by shifting to LSB:
691	// ctpop (X & 32) --> (X & 32) >> 5
692	// TODO: Investigate removing this as its likely unnecessary given the below
693	// `isKnownToBeAPowerOfTwo` check.
694	if ((~Known.Zero).isPowerOf2())
695	return BinaryOperator::CreateLShr(
696	V1: Op0, V2: ConstantInt::get(Ty, V: (~Known.Zero).exactLogBase2()));
697
698	// More generally we can also handle non-constant power of 2 patterns such as
699	// shl/shr(Pow2, X), (X & -X), etc... by transforming:
700	// ctpop(Pow2OrZero) --> icmp ne X, 0
701	if (IC.isKnownToBeAPowerOfTwo(V: Op0, / OrZero / true))
702	return CastInst::Create(Instruction::ZExt,
703	S: IC.Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: Op0,
704	RHS: Constant::getNullValue(Ty)),
705	Ty);
706
707	// Add range attribute since known bits can't completely reflect what we know.
708	if (BitWidth != `1`) {
709	ConstantRange OldRange =
710	II.getRange().value_or(u: ConstantRange::getFull(BitWidth));
711
712	unsigned Lower = Known.countMinPopulation();
713	unsigned Upper = Known.countMaxPopulation() + `1`;
714
715	if (Lower == `0` && OldRange.contains(Val: APInt::getZero(numBits: BitWidth)) &&
716	isKnownNonZero(V: Op0, Q: IC.getSimplifyQuery().getWithInstruction(I: &II)))
717	Lower = `1`;
718
719	ConstantRange Range(APInt (BitWidth, Lower), APInt (BitWidth, Upper));
720	Range = Range.intersectWith(CR: OldRange, Type: ConstantRange::Unsigned);
721
722	if (Range != OldRange) {
723	II.addRangeRetAttr(CR: Range);
724	return &II;
725	}
726	}
727
728	return nullptr;
729	}
730
731	/// Convert a table lookup to shufflevector if the mask is constant.
732	/// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
733	/// which case we could lower the shufflevector with rev64 instructions
734	/// as it's actually a byte reverse.
735	static Value simplifyNeonTbl1(const* IntrinsicInst &II,
736	InstCombiner::BuilderTy &Builder) {
737	// Bail out if the mask is not a constant.
738	auto *C = dyn_cast<Constant>(Val: II.getArgOperand(i: `1`));
739	if (!C)
740	return nullptr;
741
742	auto *VecTy = cast<FixedVectorType>(Val: II.getType());
743	unsigned NumElts = VecTy->getNumElements();
744
745	// Only perform this transformation for <8 x i8> vector types.
746	if (!VecTy->getElementType()->isIntegerTy(Bitwidth: `8`) \|\| NumElts != `8`)
747	return nullptr;
748
749	int Indexes[`8`];
750
751	for (unsigned I = `0`; I < NumElts; ++I) {
752	Constant *COp = C->getAggregateElement(Elt: I);
753
754	if (!COp \|\| !isa<ConstantInt>(Val: COp))
755	return nullptr;
756
757	Indexes[I] = cast<ConstantInt>(Val: COp)->getLimitedValue();
758
759	// Make sure the mask indices are in range.
760	if ((unsigned)Indexes[I] >= NumElts)
761	return nullptr;
762	}
763
764	auto *V1 = II.getArgOperand(i: `0`);
765	auto *V2 = Constant::getNullValue(Ty: V1->getType());
766	return Builder.CreateShuffleVector(V1, V2, Mask: ArrayRef(Indexes));
767	}
768
769	// Returns true iff the 2 intrinsics have the same operands, limiting the
770	// comparison to the first NumOperands.
771	static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
772	unsigned NumOperands) {
773	assert(I.arg_size() >= NumOperands && "Not enough operands");
774	assert(E.arg_size() >= NumOperands && "Not enough operands");
775	for (unsigned i = `0`; i < NumOperands; i++)
776	if (I.getArgOperand(i) != E.getArgOperand(i))
777	return false;
778	return true;
779	}
780
781	// Remove trivially empty start/end intrinsic ranges, i.e. a start
782	// immediately followed by an end (ignoring debuginfo or other
783	// start/end intrinsics in between). As this handles only the most trivial
784	// cases, tracking the nesting level is not needed:
785	//
786	// call @llvm.foo.start(i1 0)
787	// call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
788	// call @llvm.foo.end(i1 0)
789	// call @llvm.foo.end(i1 0) ; &I
790	static bool
791	removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC,
792	std::function<bool(const IntrinsicInst &)> IsStart) {
793	// We start from the end intrinsic and scan backwards, so that InstCombine
794	// has already processed (and potentially removed) all the instructions
795	// before the end intrinsic.
796	BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
797	for (; BI != BE; ++BI) {
798	if (auto I = dyn_cast<IntrinsicInst>(Val: &BI)) {
799	if (I->isDebugOrPseudoInst() \|\|
800	I->getIntrinsicID() == EndI.getIntrinsicID())
801	continue;
802	if (IsStart (*I)) {
803	if (haveSameOperands(I: EndI, E: *I, NumOperands: EndI.arg_size())) {
804	IC.eraseInstFromFunction(I&: *I);
805	IC.eraseInstFromFunction(I&: EndI);
806	return true;
807	}
808	// Skip start intrinsics that don't pair with this end intrinsic.
809	continue;
810	}
811	}
812	break;
813	}
814
815	return false;
816	}
817
818	Instruction *InstCombinerImpl::visitVAEndInst(VAEndInst &I) {
819	removeTriviallyEmptyRange(EndI&: I, IC&: *this, IsStart: [&I](const IntrinsicInst &II) {
820	// Bail out on the case where the source va_list of a va_copy is destroyed
821	// immediately by a follow-up va_end.
822	return II.getIntrinsicID() == Intrinsic::vastart \|\|
823	(II.getIntrinsicID() == Intrinsic::vacopy &&
824	I.getArgOperand(i: `0`) != II.getArgOperand(i: `1`));
825	});
826	return nullptr;
827	}
828
829	static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) {
830	assert(Call.arg_size() > `1` && "Need at least 2 args to swap");
831	Value Arg0 = Call.getArgOperand(i: `0`), Arg1 = Call.getArgOperand(i: `1`);
832	if (isa<Constant>(Val: Arg0) && !isa<Constant>(Val: Arg1)) {
833	Call.setArgOperand(i: `0`, v: Arg1);
834	Call.setArgOperand(i: `1`, v: Arg0);
835	return &Call;
836	}
837	return nullptr;
838	}
839
840	/// Creates a result tuple for an overflow intrinsic \p II with a given
841	/// \p Result and a constant \p Overflow value.
842	static Instruction createOverflowTuple(IntrinsicInst II, Value *Result,
843	Constant *Overflow) {
844	Constant *V[] = {PoisonValue::get(T: Result->getType()), Overflow};
845	StructType *ST = cast<StructType>(Val: II->getType());
846	Constant *Struct = ConstantStruct::get(T: ST, V);
847	return InsertValueInst::Create(Agg: Struct, Val: Result, Idxs: `0`);
848	}
849
850	Instruction *
851	InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
852	WithOverflowInst *WO = cast<WithOverflowInst>(Val: II);
853	Value OperationResult = nullptr*;
854	Constant OverflowResult = nullptr*;
855	if (OptimizeOverflowCheck(BinaryOp: WO->getBinaryOp(), IsSigned: WO->isSigned(), LHS: WO->getLHS(),
856	RHS: WO->getRHS(), CtxI&: *WO, OperationResult, OverflowResult))
857	return createOverflowTuple(II: WO, Result: OperationResult, Overflow: OverflowResult);
858
859	// See whether we can optimize the overflow check with assumption information.
860	for (User *U : WO->users()) {
861	if (!match(V: U, P: m_ExtractValue<`1`>(V: m_Value())))
862	continue;
863
864	for (auto &AssumeVH : AC.assumptionsFor(V: U)) {
865	if (!AssumeVH)
866	continue;
867	CallInst *I = cast<CallInst>(Val&: AssumeVH);
868	if (!match(V: I->getArgOperand(i: `0`), P: m_Not(V: m_Specific(V: U))))
869	continue;
870	if (!isValidAssumeForContext(I, CxtI: II, /DT=/nullptr,
871	/AllowEphemerals=/true))
872	continue;
873	Value *Result =
874	Builder.CreateBinOp(Opc: WO->getBinaryOp(), LHS: WO->getLHS(), RHS: WO->getRHS());
875	Result->takeName(V: WO);
876	if (auto *Inst = dyn_cast<Instruction>(Val: Result)) {
877	if (WO->isSigned())
878	Inst->setHasNoSignedWrap();
879	else
880	Inst->setHasNoUnsignedWrap();
881	}
882	return createOverflowTuple(II: WO, Result,
883	Overflow: ConstantInt::getFalse(Ty: U->getType()));
884	}
885	}
886
887	return nullptr;
888	}
889
890	static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
891	Ty = Ty->getScalarType();
892	return F.getDenormalMode(FPType: Ty->getFltSemantics()).Input == DenormalMode::IEEE;
893	}
894
895	static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) {
896	Ty = Ty->getScalarType();
897	return F.getDenormalMode(FPType: Ty->getFltSemantics()).inputsAreZero();
898	}
899
900	/// \returns the compare predicate type if the test performed by
901	/// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the
902	/// floating-point environment assumed for \p F for type \p Ty
903	static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask,
904	const Function &F, Type *Ty) {
905	switch (static_cast<unsigned>(Mask)) {
906	case fcZero:
907	if (inputDenormalIsIEEE(F, Ty))
908	return FCmpInst::FCMP_OEQ;
909	break;
910	case fcZero \| fcSubnormal:
911	if (inputDenormalIsDAZ(F, Ty))
912	return FCmpInst::FCMP_OEQ;
913	break;
914	case fcPositive \| fcNegZero:
915	if (inputDenormalIsIEEE(F, Ty))
916	return FCmpInst::FCMP_OGE;
917	break;
918	case fcPositive \| fcNegZero \| fcNegSubnormal:
919	if (inputDenormalIsDAZ(F, Ty))
920	return FCmpInst::FCMP_OGE;
921	break;
922	case fcPosSubnormal \| fcPosNormal \| fcPosInf:
923	if (inputDenormalIsIEEE(F, Ty))
924	return FCmpInst::FCMP_OGT;
925	break;
926	case fcNegative \| fcPosZero:
927	if (inputDenormalIsIEEE(F, Ty))
928	return FCmpInst::FCMP_OLE;
929	break;
930	case fcNegative \| fcPosZero \| fcPosSubnormal:
931	if (inputDenormalIsDAZ(F, Ty))
932	return FCmpInst::FCMP_OLE;
933	break;
934	case fcNegSubnormal \| fcNegNormal \| fcNegInf:
935	if (inputDenormalIsIEEE(F, Ty))
936	return FCmpInst::FCMP_OLT;
937	break;
938	case fcPosNormal \| fcPosInf:
939	if (inputDenormalIsDAZ(F, Ty))
940	return FCmpInst::FCMP_OGT;
941	break;
942	case fcNegNormal \| fcNegInf:
943	if (inputDenormalIsDAZ(F, Ty))
944	return FCmpInst::FCMP_OLT;
945	break;
946	case ~fcZero & ~fcNan:
947	if (inputDenormalIsIEEE(F, Ty))
948	return FCmpInst::FCMP_ONE;
949	break;
950	case ~(fcZero \| fcSubnormal) & ~fcNan:
951	if (inputDenormalIsDAZ(F, Ty))
952	return FCmpInst::FCMP_ONE;
953	break;
954	default:
955	break;
956	}
957
958	return FCmpInst::BAD_FCMP_PREDICATE;
959	}
960
961	Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
962	Value *Src0 = II.getArgOperand(i: `0`);
963	Value *Src1 = II.getArgOperand(i: `1`);
964	const ConstantInt *CMask = cast<ConstantInt>(Val: Src1);
965	FPClassTest Mask = static_cast<FPClassTest>(CMask->getZExtValue());
966	const bool IsUnordered = (Mask & fcNan) == fcNan;
967	const bool IsOrdered = (Mask & fcNan) == fcNone;
968	const FPClassTest OrderedMask = Mask & ~fcNan;
969	const FPClassTest OrderedInvertedMask = ~OrderedMask & ~fcNan;
970
971	const bool IsStrict =
972	II.getFunction()->getAttributes().hasFnAttr(Kind: Attribute::StrictFP);
973
974	Value *FNegSrc;
975	if (match(V: Src0, P: m_FNeg(X: m_Value(V&: FNegSrc)))) {
976	// is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask)
977
978	II.setArgOperand(i: `1`, v: ConstantInt::get(Ty: Src1->getType(), V: fneg(Mask)));
979	return replaceOperand(I&: II, OpNum: `0`, V: FNegSrc);
980	}
981
982	Value *FAbsSrc;
983	if (match(V: Src0, P: m_FAbs(Op0: m_Value(V&: FAbsSrc)))) {
984	II.setArgOperand(i: `1`, v: ConstantInt::get(Ty: Src1->getType(), V: inverse_fabs(Mask)));
985	return replaceOperand(I&: II, OpNum: `0`, V: FAbsSrc);
986	}
987
988	if ((OrderedMask == fcInf \|\| OrderedInvertedMask == fcInf) &&
989	(IsOrdered \|\| IsUnordered) && !IsStrict) {
990	// is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf
991	// is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf
992	// is.fpclass(x, fcInf\|fcNan) -> fcmp ueq fabs(x), +inf
993	// is.fpclass(x, ~(fcInf\|fcNan)) -> fcmp une fabs(x), +inf
994	Constant *Inf = ConstantFP::getInfinity(Ty: Src0->getType());
995	FCmpInst::Predicate Pred =
996	IsUnordered ? FCmpInst::FCMP_UEQ : FCmpInst::FCMP_OEQ;
997	if (OrderedInvertedMask == fcInf)
998	Pred = IsUnordered ? FCmpInst::FCMP_UNE : FCmpInst::FCMP_ONE;
999
1000	Value *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Src0);
1001	Value *CmpInf = Builder.CreateFCmp(P: Pred, LHS: Fabs, RHS: Inf);
1002	CmpInf->takeName(V: &II);
1003	return replaceInstUsesWith(I&: II, V: CmpInf);
1004	}
1005
1006	if ((OrderedMask == fcPosInf \|\| OrderedMask == fcNegInf) &&
1007	(IsOrdered \|\| IsUnordered) && !IsStrict) {
1008	// is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf
1009	// is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf
1010	// is.fpclass(x, fcPosInf\|fcNan) -> fcmp ueq x, +inf
1011	// is.fpclass(x, fcNegInf\|fcNan) -> fcmp ueq x, -inf
1012	Constant *Inf =
1013	ConstantFP::getInfinity(Ty: Src0->getType(), Negative: OrderedMask == fcNegInf);
1014	Value *EqInf = IsUnordered ? Builder.CreateFCmpUEQ(LHS: Src0, RHS: Inf)
1015	: Builder.CreateFCmpOEQ(LHS: Src0, RHS: Inf);
1016
1017	EqInf->takeName(V: &II);
1018	return replaceInstUsesWith(I&: II, V: EqInf);
1019	}
1020
1021	if ((OrderedInvertedMask == fcPosInf \|\| OrderedInvertedMask == fcNegInf) &&
1022	(IsOrdered \|\| IsUnordered) && !IsStrict) {
1023	// is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf
1024	// is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf
1025	// is.fpclass(x, ~fcPosInf\|fcNan) -> fcmp une x, +inf
1026	// is.fpclass(x, ~fcNegInf\|fcNan) -> fcmp une x, -inf
1027	Constant *Inf = ConstantFP::getInfinity(Ty: Src0->getType(),
1028	Negative: OrderedInvertedMask == fcNegInf);
1029	Value *NeInf = IsUnordered ? Builder.CreateFCmpUNE(LHS: Src0, RHS: Inf)
1030	: Builder.CreateFCmpONE(LHS: Src0, RHS: Inf);
1031	NeInf->takeName(V: &II);
1032	return replaceInstUsesWith(I&: II, V: NeInf);
1033	}
1034
1035	if (Mask == fcNan && !IsStrict) {
1036	// Equivalent of isnan. Replace with standard fcmp if we don't care about FP
1037	// exceptions.
1038	Value *IsNan =
1039	Builder.CreateFCmpUNO(LHS: Src0, RHS: ConstantFP::getZero(Ty: Src0->getType()));
1040	IsNan->takeName(V: &II);
1041	return replaceInstUsesWith(I&: II, V: IsNan);
1042	}
1043
1044	if (Mask == (~fcNan & fcAllFlags) && !IsStrict) {
1045	// Equivalent of !isnan. Replace with standard fcmp.
1046	Value *FCmp =
1047	Builder.CreateFCmpORD(LHS: Src0, RHS: ConstantFP::getZero(Ty: Src0->getType()));
1048	FCmp->takeName(V: &II);
1049	return replaceInstUsesWith(I&: II, V: FCmp);
1050	}
1051
1052	FCmpInst::Predicate PredType = FCmpInst::BAD_FCMP_PREDICATE;
1053
1054	// Try to replace with an fcmp with 0
1055	//
1056	// is.fpclass(x, fcZero) -> fcmp oeq x, 0.0
1057	// is.fpclass(x, fcZero \| fcNan) -> fcmp ueq x, 0.0
1058	// is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0
1059	// is.fpclass(x, ~fcZero) -> fcmp une x, 0.0
1060	//
1061	// is.fpclass(x, fcPosSubnormal \| fcPosNormal \| fcPosInf) -> fcmp ogt x, 0.0
1062	// is.fpclass(x, fcPositive \| fcNegZero) -> fcmp oge x, 0.0
1063	//
1064	// is.fpclass(x, fcNegSubnormal \| fcNegNormal \| fcNegInf) -> fcmp olt x, 0.0
1065	// is.fpclass(x, fcNegative \| fcPosZero) -> fcmp ole x, 0.0
1066	//
1067	if (!IsStrict && (IsOrdered \|\| IsUnordered) &&
1068	(PredType = fpclassTestIsFCmp0(Mask: OrderedMask, F: *II.getFunction(),
1069	Ty: Src0->getType())) !=
1070	FCmpInst::BAD_FCMP_PREDICATE) {
1071	Constant *Zero = ConstantFP::getZero(Ty: Src0->getType());
1072	// Equivalent of == 0.
1073	Value *FCmp = Builder.CreateFCmp(
1074	P: IsUnordered ? FCmpInst::getUnorderedPredicate(Pred: PredType) : PredType,
1075	LHS: Src0, RHS: Zero);
1076
1077	FCmp->takeName(V: &II);
1078	return replaceInstUsesWith(I&: II, V: FCmp);
1079	}
1080
1081	KnownFPClass Known = computeKnownFPClass(Val: Src0, Interested: Mask, CtxI: &II);
1082
1083	// Clear test bits we know must be false from the source value.
1084	// fp_class (nnan x), qnan\|snan\|other -> fp_class (nnan x), other
1085	// fp_class (ninf x), ninf\|pinf\|other -> fp_class (ninf x), other
1086	if ((Mask & Known.KnownFPClasses) != Mask) {
1087	II.setArgOperand(
1088	i: `1`, v: ConstantInt::get(Ty: Src1->getType(), V: Mask & Known.KnownFPClasses));
1089	return &II;
1090	}
1091
1092	// If none of the tests which can return false are possible, fold to true.
1093	// fp_class (nnan x), ~(qnan\|snan) -> true
1094	// fp_class (ninf x), ~(ninf\|pinf) -> true
1095	if (Mask == Known.KnownFPClasses)
1096	return replaceInstUsesWith(I&: II, V: ConstantInt::get(Ty: II.getType(), V: true));
1097
1098	return nullptr;
1099	}
1100
1101	static std::optional<bool> getKnownSign(Value Op, const* SimplifyQuery &SQ) {
1102	KnownBits Known = computeKnownBits(V: Op, Q: SQ);
1103	if (Known.isNonNegative())
1104	return false;
1105	if (Known.isNegative())
1106	return true;
1107
1108	Value X, Y;
1109	if (match(V: Op, P: m_NSWSub(L: m_Value(V&: X), R: m_Value(V&: Y))))
1110	return isImpliedByDomCondition(Pred: ICmpInst::ICMP_SLT, LHS: X, RHS: Y, ContextI: SQ.CxtI, DL: SQ.DL);
1111
1112	return std::nullopt;
1113	}
1114
1115	static std::optional<bool> getKnownSignOrZero(Value *Op,
1116	const SimplifyQuery &SQ) {
1117	if (std::optional<bool> Sign = getKnownSign(Op, SQ))
1118	return Sign;
1119
1120	Value X, Y;
1121	if (match(V: Op, P: m_NSWSub(L: m_Value(V&: X), R: m_Value(V&: Y))))
1122	return isImpliedByDomCondition(Pred: ICmpInst::ICMP_SLE, LHS: X, RHS: Y, ContextI: SQ.CxtI, DL: SQ.DL);
1123
1124	return std::nullopt;
1125	}
1126
1127	/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
1128	static bool signBitMustBeTheSame(Value Op0, Value Op1,
1129	const SimplifyQuery &SQ) {
1130	std::optional<bool> Known1 = getKnownSign(Op: Op1, SQ);
1131	if (!Known1)
1132	return false;
1133	std::optional<bool> Known0 = getKnownSign(Op: Op0, SQ);
1134	if (!Known0)
1135	return false;
1136	return Known0 == Known1;
1137	}
1138
1139	/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
1140	/// can trigger other combines.
1141	static Instruction moveAddAfterMinMax(IntrinsicInst II,
1142	InstCombiner::BuilderTy &Builder) {
1143	Intrinsic::ID MinMaxID = II->getIntrinsicID();
1144	assert((MinMaxID == Intrinsic::smax \|\| MinMaxID == Intrinsic::smin \|\|
1145	MinMaxID == Intrinsic::umax \|\| MinMaxID == Intrinsic::umin) &&
1146	"Expected a min or max intrinsic");
1147
1148	// TODO: Match vectors with undef elements, but undef may not propagate.
1149	Value Op0 = II->getArgOperand(i: `0`), Op1 = II->getArgOperand(i: `1`);
1150	Value *X;
1151	const APInt C0, C1;
1152	if (!match(V: Op0, P: m_OneUse(SubPattern: m_Add(L: m_Value(V&: X), R: m_APInt(Res&: C0)))) \|\|
1153	!match(V: Op1, P: m_APInt(Res&: C1)))
1154	return nullptr;
1155
1156	// Check for necessary no-wrap and overflow constraints.
1157	bool IsSigned = MinMaxID == Intrinsic::smax \|\| MinMaxID == Intrinsic::smin;
1158	auto *Add = cast<BinaryOperator>(Val: Op0);
1159	if ((IsSigned && !Add->hasNoSignedWrap()) \|\|
1160	(!IsSigned && !Add->hasNoUnsignedWrap()))
1161	return nullptr;
1162
1163	// If the constant difference overflows, then instsimplify should reduce the
1164	// min/max to the add or C1.
1165	bool Overflow;
1166	APInt CDiff =
1167	IsSigned ? C1->ssub_ov(RHS: C0, Overflow) : C1->usub_ov(RHS: C0, Overflow);
1168	assert(!Overflow && "Expected simplify of min/max");
1169
1170	// min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
1171	// Note: the "mismatched" no-overflow setting does not propagate.
1172	Constant *NewMinMaxC = ConstantInt::get(Ty: II->getType(), V: CDiff);
1173	Value *NewMinMax = Builder.CreateBinaryIntrinsic(ID: MinMaxID, LHS: X, RHS: NewMinMaxC);
1174	return IsSigned ? BinaryOperator::CreateNSWAdd(V1: NewMinMax, V2: Add->getOperand(i_nocapture: `1`))
1175	: BinaryOperator::CreateNUWAdd(V1: NewMinMax, V2: Add->getOperand(i_nocapture: `1`));
1176	}
1177	/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
1178	Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
1179	Type *Ty = MinMax1.getType();
1180
1181	// We are looking for a tree of:
1182	// max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
1183	// Where the min and max could be reversed
1184	Instruction *MinMax2;
1185	BinaryOperator *AddSub;
1186	const APInt MinValue, MaxValue;
1187	if (match(V: &MinMax1, P: m_SMin(L: m_Instruction(I&: MinMax2), R: m_APInt(Res&: MaxValue)))) {
1188	if (!match(V: MinMax2, P: m_SMax(L: m_BinOp(I&: AddSub), R: m_APInt(Res&: MinValue))))
1189	return nullptr;
1190	} else if (match(V: &MinMax1,
1191	P: m_SMax(L: m_Instruction(I&: MinMax2), R: m_APInt(Res&: MinValue)))) {
1192	if (!match(V: MinMax2, P: m_SMin(L: m_BinOp(I&: AddSub), R: m_APInt(Res&: MaxValue))))
1193	return nullptr;
1194	} else
1195	return nullptr;
1196
1197	// Check that the constants clamp a saturate, and that the new type would be
1198	// sensible to convert to.
1199	if (!(MaxValue + `1`).isPowerOf2() \|\| -MinValue != *MaxValue + `1`)
1200	return nullptr;
1201	// In what bitwidth can this be treated as saturating arithmetics?
1202	unsigned NewBitWidth = (*MaxValue + `1`).logBase2() + `1`;
1203	// FIXME: This isn't quite right for vectors, but using the scalar type is a
1204	// good first approximation for what should be done there.
1205	if (!shouldChangeType(FromBitWidth: Ty->getScalarType()->getIntegerBitWidth(), ToBitWidth: NewBitWidth))
1206	return nullptr;
1207
1208	// Also make sure that the inner min/max and the add/sub have one use.
1209	if (!MinMax2->hasOneUse() \|\| !AddSub->hasOneUse())
1210	return nullptr;
1211
1212	// Create the new type (which can be a vector type)
1213	Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
1214
1215	Intrinsic::ID IntrinsicID;
1216	if (AddSub->getOpcode() == Instruction::Add)
1217	IntrinsicID = Intrinsic::sadd_sat;
1218	else if (AddSub->getOpcode() == Instruction::Sub)
1219	IntrinsicID = Intrinsic::ssub_sat;
1220	else
1221	return nullptr;
1222
1223	// The two operands of the add/sub must be nsw-truncatable to the NewTy. This
1224	// is usually achieved via a sext from a smaller type.
1225	if (ComputeMaxSignificantBits(Op: AddSub->getOperand(i_nocapture: `0`), CxtI: AddSub) > NewBitWidth \|\|
1226	ComputeMaxSignificantBits(Op: AddSub->getOperand(i_nocapture: `1`), CxtI: AddSub) > NewBitWidth)
1227	return nullptr;
1228
1229	// Finally create and return the sat intrinsic, truncated to the new type
1230	Value *AT = Builder.CreateTrunc(V: AddSub->getOperand(i_nocapture: `0`), DestTy: NewTy);
1231	Value *BT = Builder.CreateTrunc(V: AddSub->getOperand(i_nocapture: `1`), DestTy: NewTy);
1232	Value *Sat = Builder.CreateIntrinsic(ID: IntrinsicID, Types: NewTy, Args: {AT, BT});
1233	return CastInst::Create(Instruction::SExt, S: Sat, Ty);
1234	}
1235
1236
1237	/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
1238	/// can only be one of two possible constant values -- turn that into a select
1239	/// of constants.
1240	static Instruction foldClampRangeOfTwo(IntrinsicInst II,
1241	InstCombiner::BuilderTy &Builder) {
1242	Value I0 = II->getArgOperand(i: `0`), I1 = II->getArgOperand(i: `1`);
1243	Value *X;
1244	const APInt C0, C1;
1245	if (!match(V: I1, P: m_APInt(Res&: C1)) \|\| !I0->hasOneUse())
1246	return nullptr;
1247
1248	CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
1249	switch (II->getIntrinsicID()) {
1250	case Intrinsic::smax:
1251	if (match(V: I0, P: m_SMin(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && C0 == C1 + `1`)
1252	Pred = ICmpInst::ICMP_SGT;
1253	break;
1254	case Intrinsic::smin:
1255	if (match(V: I0, P: m_SMax(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && C1 == C0 + `1`)
1256	Pred = ICmpInst::ICMP_SLT;
1257	break;
1258	case Intrinsic::umax:
1259	if (match(V: I0, P: m_UMin(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && C0 == C1 + `1`)
1260	Pred = ICmpInst::ICMP_UGT;
1261	break;
1262	case Intrinsic::umin:
1263	if (match(V: I0, P: m_UMax(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && C1 == C0 + `1`)
1264	Pred = ICmpInst::ICMP_ULT;
1265	break;
1266	default:
1267	llvm_unreachable("Expected min/max intrinsic");
1268	}
1269	if (Pred == CmpInst::BAD_ICMP_PREDICATE)
1270	return nullptr;
1271
1272	// max (min X, 42), 41 --> X > 41 ? 42 : 41
1273	// min (max X, 42), 43 --> X < 43 ? 42 : 43
1274	Value *Cmp = Builder.CreateICmp(P: Pred, LHS: X, RHS: I1);
1275	return SelectInst::Create(C: Cmp, S1: ConstantInt::get(Ty: II->getType(), V: *C0), S2: I1);
1276	}
1277
1278	/// If this min/max has a constant operand and an operand that is a matching
1279	/// min/max with a constant operand, constant-fold the 2 constant operands.
1280	static Value reassociateMinMaxWithConstants(IntrinsicInst II,
1281	IRBuilderBase &Builder,
1282	const SimplifyQuery &SQ) {
1283	Intrinsic::ID MinMaxID = II->getIntrinsicID();
1284	auto *LHS = dyn_cast<MinMaxIntrinsic>(Val: II->getArgOperand(i: `0`));
1285	if (!LHS)
1286	return nullptr;
1287
1288	Constant C0, C1;
1289	if (!match(V: LHS->getArgOperand(i: `1`), P: m_ImmConstant(C&: C0)) \|\|
1290	!match(V: II->getArgOperand(i: `1`), P: m_ImmConstant(C&: C1)))
1291	return nullptr;
1292
1293	// max (max X, C0), C1 --> max X, (max C0, C1)
1294	// min (min X, C0), C1 --> min X, (min C0, C1)
1295	// umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1)
1296	// smin (umin X, nneg C0), nneg C1 --> umin X, (smin C0, C1)
1297	Intrinsic::ID InnerMinMaxID = LHS->getIntrinsicID();
1298	if (InnerMinMaxID != MinMaxID &&
1299	!(((MinMaxID == Intrinsic::umax && InnerMinMaxID == Intrinsic::smax) \|\|
1300	(MinMaxID == Intrinsic::smin && InnerMinMaxID == Intrinsic::umin)) &&
1301	isKnownNonNegative(V: C0, SQ) && isKnownNonNegative(V: C1, SQ)))
1302	return nullptr;
1303
1304	ICmpInst::Predicate Pred = MinMaxIntrinsic::getPredicate(ID: MinMaxID);
1305	Value *CondC = Builder.CreateICmp(P: Pred, LHS: C0, RHS: C1);
1306	Value *NewC = Builder.CreateSelect(C: CondC, True: C0, False: C1);
1307	return Builder.CreateIntrinsic(ID: InnerMinMaxID, Types: II->getType(),
1308	Args: {LHS->getArgOperand(i: `0`), NewC});
1309	}
1310
1311	/// If this min/max has a matching min/max operand with a constant, try to push
1312	/// the constant operand into this instruction. This can enable more folds.
1313	static Instruction *
1314	reassociateMinMaxWithConstantInOperand(IntrinsicInst *II,
1315	InstCombiner::BuilderTy &Builder) {
1316	// Match and capture a min/max operand candidate.
1317	Value X, Y;
1318	Constant *C;
1319	Instruction *Inner;
1320	if (!match(V: II, P: m_c_MaxOrMin(L: m_OneUse(SubPattern: m_CombineAnd(
1321	L: m_Instruction(I&: Inner),
1322	R: m_MaxOrMin(L: m_Value(V&: X), R: m_ImmConstant(C)))),
1323	R: m_Value(V&: Y))))
1324	return nullptr;
1325
1326	// The inner op must match. Check for constants to avoid infinite loops.
1327	Intrinsic::ID MinMaxID = II->getIntrinsicID();
1328	auto *InnerMM = dyn_cast<IntrinsicInst>(Val: Inner);
1329	if (!InnerMM \|\| InnerMM->getIntrinsicID() != MinMaxID \|\|
1330	match(V: X, P: m_ImmConstant()) \|\| match(V: Y, P: m_ImmConstant()))
1331	return nullptr;
1332
1333	// max (max X, C), Y --> max (max X, Y), C
1334	Function *MinMax = Intrinsic::getOrInsertDeclaration(M: II->getModule(),
1335	id: MinMaxID, Tys: II->getType());
1336	Value *NewInner = Builder.CreateBinaryIntrinsic(ID: MinMaxID, LHS: X, RHS: Y);
1337	NewInner->takeName(V: Inner);
1338	return CallInst::Create(Func: MinMax, Args: {NewInner, C});
1339	}
1340
1341	/// Reduce a sequence of min/max intrinsics with a common operand.
1342	static Instruction factorizeMinMaxTree(IntrinsicInst II) {
1343	// Match 3 of the same min/max ops. Example: umin(umin(), umin()).
1344	auto *LHS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: `0`));
1345	auto *RHS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: `1`));
1346	Intrinsic::ID MinMaxID = II->getIntrinsicID();
1347	if (!LHS \|\| !RHS \|\| LHS->getIntrinsicID() != MinMaxID \|\|
1348	RHS->getIntrinsicID() != MinMaxID \|\|
1349	(!LHS->hasOneUse() && !RHS->hasOneUse()))
1350	return nullptr;
1351
1352	Value *A = LHS->getArgOperand(i: `0`);
1353	Value *B = LHS->getArgOperand(i: `1`);
1354	Value *C = RHS->getArgOperand(i: `0`);
1355	Value *D = RHS->getArgOperand(i: `1`);
1356
1357	// Look for a common operand.
1358	Value MinMaxOp = nullptr*;
1359	Value ThirdOp = nullptr*;
1360	if (LHS->hasOneUse()) {
1361	// If the LHS is only used in this chain and the RHS is used outside of it,
1362	// reuse the RHS min/max because that will eliminate the LHS.
1363	if (D == A \|\| C == A) {
1364	// min(min(a, b), min(c, a)) --> min(min(c, a), b)
1365	// min(min(a, b), min(a, d)) --> min(min(a, d), b)
1366	MinMaxOp = RHS;
1367	ThirdOp = B;
1368	} else if (D == B \|\| C == B) {
1369	// min(min(a, b), min(c, b)) --> min(min(c, b), a)
1370	// min(min(a, b), min(b, d)) --> min(min(b, d), a)
1371	MinMaxOp = RHS;
1372	ThirdOp = A;
1373	}
1374	} else {
1375	assert(RHS->hasOneUse() && "Expected one-use operand");
1376	// Reuse the LHS. This will eliminate the RHS.
1377	if (D == A \|\| D == B) {
1378	// min(min(a, b), min(c, a)) --> min(min(a, b), c)
1379	// min(min(a, b), min(c, b)) --> min(min(a, b), c)
1380	MinMaxOp = LHS;
1381	ThirdOp = C;
1382	} else if (C == A \|\| C == B) {
1383	// min(min(a, b), min(b, d)) --> min(min(a, b), d)
1384	// min(min(a, b), min(c, b)) --> min(min(a, b), d)
1385	MinMaxOp = LHS;
1386	ThirdOp = D;
1387	}
1388	}
1389
1390	if (!MinMaxOp \|\| !ThirdOp)
1391	return nullptr;
1392
1393	Module *Mod = II->getModule();
1394	Function *MinMax =
1395	Intrinsic::getOrInsertDeclaration(M: Mod, id: MinMaxID, Tys: II->getType());
1396	return CallInst::Create(Func: MinMax, Args: { MinMaxOp, ThirdOp });
1397	}
1398
1399	/// If all arguments of the intrinsic are unary shuffles with the same mask,
1400	/// try to shuffle after the intrinsic.
1401	Instruction *
1402	InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) {
1403	if (!isTriviallyVectorizable(ID: II->getIntrinsicID()) \|\|
1404	!II->getCalledFunction()->isSpeculatable())
1405	return nullptr;
1406
1407	Value *X;
1408	Constant *C;
1409	ArrayRef<int> Mask;
1410	auto *NonConstArg = find_if_not(Range: II->args(), P: [&II](Use &Arg) {
1411	return isa<Constant>(Val: Arg.get()) \|\|
1412	isVectorIntrinsicWithScalarOpAtArg(ID: II->getIntrinsicID(),
1413	ScalarOpdIdx: Arg.getOperandNo(), TTI: nullptr);
1414	});
1415	if (!NonConstArg \|\|
1416	!match(V: NonConstArg, P: m_Shuffle(v1: m_Value(V&: X), v2: m_Poison(), mask: m_Mask (Mask))))
1417	return nullptr;
1418
1419	// At least 1 operand must be a shuffle with 1 use because we are creating 2
1420	// instructions.
1421	if (none_of(Range: II->args(), P: [](Value *V) {
1422	return isa<ShuffleVectorInst>(Val: V) && V->hasOneUse();
1423	}))
1424	return nullptr;
1425
1426	// See if all arguments are shuffled with the same mask.
1427	SmallVector<Value *, `4`> NewArgs;
1428	Type *SrcTy = X->getType();
1429	for (Use &Arg : II->args()) {
1430	if (isVectorIntrinsicWithScalarOpAtArg(ID: II->getIntrinsicID(),
1431	ScalarOpdIdx: Arg.getOperandNo(), TTI: nullptr))
1432	NewArgs.push_back(Elt: Arg);
1433	else if (match(V: &Arg,
1434	P: m_Shuffle(v1: m_Value(V&: X), v2: m_Poison(), mask: m_SpecificMask (Mask))) &&
1435	X->getType() == SrcTy)
1436	NewArgs.push_back(Elt: X);
1437	else if (match(V: &Arg, P: m_ImmConstant(C))) {
1438	// If it's a constant, try find the constant that would be shuffled to C.
1439	if (Constant *ShuffledC =
1440	unshuffleConstant(ShMask: Mask, C, NewCTy: cast<VectorType>(Val: SrcTy)))
1441	NewArgs.push_back(Elt: ShuffledC);
1442	else
1443	return nullptr;
1444	} else
1445	return nullptr;
1446	}
1447
1448	// intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
1449	Instruction FPI = isa<FPMathOperator>(Val: II) ? II : nullptr*;
1450	// Result type might be a different vector width.
1451	// TODO: Check that the result type isn't widened?
1452	VectorType *ResTy =
1453	VectorType::get(ElementType: II->getType()->getScalarType(), Other: cast<VectorType>(Val: SrcTy));
1454	Value *NewIntrinsic =
1455	Builder.CreateIntrinsic(RetTy: ResTy, ID: II->getIntrinsicID(), Args: NewArgs, FMFSource: FPI);
1456	return new ShuffleVectorInst (NewIntrinsic, Mask);
1457	}
1458
1459	/// If all arguments of the intrinsic are reverses, try to pull the reverse
1460	/// after the intrinsic.
1461	Value InstCombinerImpl::foldReversedIntrinsicOperands(IntrinsicInst II) {
1462	if (!isTriviallyVectorizable(ID: II->getIntrinsicID()))
1463	return nullptr;
1464
1465	// At least 1 operand must be a reverse with 1 use because we are creating 2
1466	// instructions.
1467	if (none_of(Range: II->args(), P: [](Value *V) {
1468	return match(V, P: m_OneUse(SubPattern: m_VecReverse(Op0: m_Value())));
1469	}))
1470	return nullptr;
1471
1472	Value *X;
1473	Constant *C;
1474	SmallVector<Value *> NewArgs;
1475	for (Use &Arg : II->args()) {
1476	if (isVectorIntrinsicWithScalarOpAtArg(ID: II->getIntrinsicID(),
1477	ScalarOpdIdx: Arg.getOperandNo(), TTI: nullptr))
1478	NewArgs.push_back(Elt: Arg);
1479	else if (match(V: &Arg, P: m_VecReverse(Op0: m_Value(V&: X))))
1480	NewArgs.push_back(Elt: X);
1481	else if (isSplatValue(V: Arg))
1482	NewArgs.push_back(Elt: Arg);
1483	else if (match(V: &Arg, P: m_ImmConstant(C)))
1484	NewArgs.push_back(Elt: Builder.CreateVectorReverse(V: C));
1485	else
1486	return nullptr;
1487	}
1488
1489	// intrinsic (reverse X), (reverse Y), ... --> reverse (intrinsic X, Y, ...)
1490	Instruction FPI = isa<FPMathOperator>(Val: II) ? II : nullptr*;
1491	Instruction *NewIntrinsic = Builder.CreateIntrinsic(
1492	RetTy: II->getType(), ID: II->getIntrinsicID(), Args: NewArgs, FMFSource: FPI);
1493	return Builder.CreateVectorReverse(V: NewIntrinsic);
1494	}
1495
1496	/// Fold the following cases and accepts bswap and bitreverse intrinsics:
1497	/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
1498	/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
1499	template <Intrinsic::ID IntrID>
1500	static Instruction foldBitOrderCrossLogicOp(Value V,
1501	InstCombiner::BuilderTy &Builder) {
1502	static_assert(IntrID == Intrinsic::bswap \|\| IntrID == Intrinsic::bitreverse,
1503	"This helper only supports BSWAP and BITREVERSE intrinsics");
1504
1505	Value X, Y;
1506	// Find bitwise logic op. Check that it is a BinaryOperator explicitly so we
1507	// don't match ConstantExpr that aren't meaningful for this transform.
1508	if (match(V, P: m_OneUse(SubPattern: m_BitwiseLogic(L: m_Value(V&: X), R: m_Value(V&: Y)))) &&
1509	isa<BinaryOperator>(Val: V)) {
1510	Value OldReorderX, OldReorderY;
1511	BinaryOperator::BinaryOps Op = cast<BinaryOperator>(Val: V)->getOpcode();
1512
1513	// If both X and Y are bswap/bitreverse, the transform reduces the number
1514	// of instructions even if there's multiuse.
1515	// If only one operand is bswap/bitreverse, we need to ensure the operand
1516	// have only one use.
1517	if (match(X, m_Intrinsic<IntrID>(m_Value(V&: OldReorderX))) &&
1518	match(Y, m_Intrinsic<IntrID>(m_Value(V&: OldReorderY)))) {
1519	return BinaryOperator::Create(Op, S1: OldReorderX, S2: OldReorderY);
1520	}
1521
1522	if (match(X, m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: OldReorderX))))) {
1523	Value *NewReorder = Builder.CreateUnaryIntrinsic(ID: IntrID, V: Y);
1524	return BinaryOperator::Create(Op, S1: OldReorderX, S2: NewReorder);
1525	}
1526
1527	if (match(Y, m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: OldReorderY))))) {
1528	Value *NewReorder = Builder.CreateUnaryIntrinsic(ID: IntrID, V: X);
1529	return BinaryOperator::Create(Op, S1: NewReorder, S2: OldReorderY);
1530	}
1531	}
1532	return nullptr;
1533	}
1534
1535	static Value simplifyReductionOperand(Value Arg, bool CanReorderLanes) {
1536	if (!CanReorderLanes)
1537	return nullptr;
1538
1539	Value *V;
1540	if (match(V: Arg, P: m_VecReverse(Op0: m_Value(V))))
1541	return V;
1542
1543	ArrayRef<int> Mask;
1544	if (!isa<FixedVectorType>(Val: Arg->getType()) \|\|
1545	!match(V: Arg, P: m_Shuffle(v1: m_Value(V), v2: m_Undef(), mask: m_Mask (Mask))) \|\|
1546	!cast<ShuffleVectorInst>(Val: Arg)->isSingleSource())
1547	return nullptr;
1548
1549	int Sz = Mask.size();
1550	SmallBitVector UsedIndices(Sz);
1551	for (int Idx : Mask) {
1552	if (Idx == PoisonMaskElem \|\| UsedIndices.test(Idx))
1553	return nullptr;
1554	UsedIndices.set(Idx);
1555	}
1556
1557	// Can remove shuffle iff just shuffled elements, no repeats, undefs, or
1558	// other changes.
1559	return UsedIndices.all() ? V : nullptr;
1560	}
1561
1562	/// Fold an unsigned minimum of trailing or leading zero bits counts:
1563	/// umin(cttz(CtOp, ZeroUndef), ConstOp) --> cttz(CtOp \| (1 << ConstOp))
1564	/// umin(ctlz(CtOp, ZeroUndef), ConstOp) --> ctlz(CtOp \| (SignedMin
1565	/// >> ConstOp))
1566	template <Intrinsic::ID IntrID>
1567	static Value *
1568	foldMinimumOverTrailingOrLeadingZeroCount(Value I0, Value I1,
1569	const DataLayout &DL,
1570	InstCombiner::BuilderTy &Builder) {
1571	static_assert(IntrID == Intrinsic::cttz \|\| IntrID == Intrinsic::ctlz,
1572	"This helper only supports cttz and ctlz intrinsics");
1573
1574	Value *CtOp;
1575	Value *ZeroUndef;
1576	if (!match(I0,
1577	m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: CtOp), m_Value(V&: ZeroUndef)))))
1578	return nullptr;
1579
1580	unsigned BitWidth = I1->getType()->getScalarSizeInBits();
1581	auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
1582	if (!match(I1, m_CheckedInt(LessBitWidth)))
1583	// We have a constant >= BitWidth (which can be handled by CVP)
1584	// or a non-splat vector with elements < and >= BitWidth
1585	return nullptr;
1586
1587	Type *Ty = I1->getType();
1588	Constant *NewConst = ConstantFoldBinaryOpOperands(
1589	Opcode: IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr,
1590	LHS: IntrID == Intrinsic::cttz
1591	? ConstantInt::get(Ty, V: `1`)
1592	: ConstantInt::get(Ty, V: APInt::getSignedMinValue(numBits: BitWidth)),
1593	RHS: cast<Constant>(Val: I1), DL);
1594	return Builder.CreateBinaryIntrinsic(
1595	ID: IntrID, LHS: Builder.CreateOr(LHS: CtOp, RHS: NewConst),
1596	RHS: ConstantInt::getTrue(Ty: ZeroUndef->getType()));
1597	}
1598
1599	/// Return whether "X LOp (Y ROp Z)" is always equal to
1600	/// "(X LOp Y) ROp (X LOp Z)".
1601	static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW,
1602	bool HasNSW, Intrinsic::ID ROp) {
1603	switch (ROp) {
1604	case Intrinsic::umax:
1605	case Intrinsic::umin:
1606	if (HasNUW && LOp == Instruction::Add)
1607	return true;
1608	if (HasNUW && LOp == Instruction::Shl)
1609	return true;
1610	return false;
1611	case Intrinsic::smax:
1612	case Intrinsic::smin:
1613	return HasNSW && LOp == Instruction::Add;
1614	default:
1615	return false;
1616	}
1617	}
1618
1619	// Attempts to factorise a common term
1620	// in an instruction that has the form "(A op' B) op (C op' D)
1621	// where op is an intrinsic and op' is a binop
1622	static Value *
1623	foldIntrinsicUsingDistributiveLaws(IntrinsicInst *II,
1624	InstCombiner::BuilderTy &Builder) {
1625	Value LHS = II->getOperand(i_nocapture: `0`), RHS = II->getOperand(i_nocapture: `1`);
1626	Intrinsic::ID TopLevelOpcode = II->getIntrinsicID();
1627
1628	OverflowingBinaryOperator *Op0 = dyn_cast<OverflowingBinaryOperator>(Val: LHS);
1629	OverflowingBinaryOperator *Op1 = dyn_cast<OverflowingBinaryOperator>(Val: RHS);
1630
1631	if (!Op0 \|\| !Op1)
1632	return nullptr;
1633
1634	if (Op0->getOpcode() != Op1->getOpcode())
1635	return nullptr;
1636
1637	if (!Op0->hasOneUse() \|\| !Op1->hasOneUse())
1638	return nullptr;
1639
1640	Instruction::BinaryOps InnerOpcode =
1641	static_cast<Instruction::BinaryOps>(Op0->getOpcode());
1642	bool HasNUW = Op0->hasNoUnsignedWrap() && Op1->hasNoUnsignedWrap();
1643	bool HasNSW = Op0->hasNoSignedWrap() && Op1->hasNoSignedWrap();
1644
1645	if (!leftDistributesOverRight(LOp: InnerOpcode, HasNUW, HasNSW, ROp: TopLevelOpcode))
1646	return nullptr;
1647
1648	Value *A = Op0->getOperand(i_nocapture: `0`);
1649	Value *B = Op0->getOperand(i_nocapture: `1`);
1650	Value *C = Op1->getOperand(i_nocapture: `0`);
1651	Value *D = Op1->getOperand(i_nocapture: `1`);
1652
1653	// Attempts to swap variables such that A equals C or B equals D,
1654	// if the inner operation is commutative.
1655	if (Op0->isCommutative() && A != C && B != D) {
1656	if (A == D \|\| B == C)
1657	std::swap(a&: C, b&: D);
1658	else
1659	return nullptr;
1660	}
1661
1662	BinaryOperator *NewBinop;
1663	if (A == C) {
1664	Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(ID: TopLevelOpcode, LHS: B, RHS: D);
1665	NewBinop =
1666	cast<BinaryOperator>(Val: Builder.CreateBinOp(Opc: InnerOpcode, LHS: A, RHS: NewIntrinsic));
1667	} else if (B == D) {
1668	Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(ID: TopLevelOpcode, LHS: A, RHS: C);
1669	NewBinop =
1670	cast<BinaryOperator>(Val: Builder.CreateBinOp(Opc: InnerOpcode, LHS: NewIntrinsic, RHS: B));
1671	} else {
1672	return nullptr;
1673	}
1674
1675	NewBinop->setHasNoUnsignedWrap(HasNUW);
1676	NewBinop->setHasNoSignedWrap(HasNSW);
1677
1678	return NewBinop;
1679	}
1680
1681	/// CallInst simplification. This mostly only handles folding of intrinsic
1682	/// instructions. For normal calls, it allows visitCallBase to do the heavy
1683	/// lifting.
1684	Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
1685	// Don't try to simplify calls without uses. It will not do anything useful,
1686	// but will result in the following folds being skipped.
1687	if (!CI.use_empty()) {
1688	SmallVector<Value *, `8`> Args(CI.args());
1689	if (Value *V = simplifyCall(Call: &CI, Callee: CI.getCalledOperand(), Args,
1690	Q: SQ.getWithInstruction(I: &CI)))
1691	return replaceInstUsesWith(I&: CI, V);
1692	}
1693
1694	if (Value *FreedOp = getFreedOperand(CB: &CI, TLI: &TLI))
1695	return visitFree(FI&: CI, FreedOp);
1696
1697	// If the caller function (i.e. us, the function that contains this CallInst)
1698	// is nounwind, mark the call as nounwind, even if the callee isn't.
1699	if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1700	CI.setDoesNotThrow();
1701	return &CI;
1702	}
1703
1704	IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: &CI);
1705	if (!II)
1706	return visitCallBase(Call&: CI);
1707
1708	// Intrinsics cannot occur in an invoke or a callbr, so handle them here
1709	// instead of in visitCallBase.
1710	if (auto *MI = dyn_cast<AnyMemIntrinsic>(Val: II)) {
1711	if (ConstantInt *NumBytes = dyn_cast<ConstantInt>(Val: MI->getLength())) {
1712	// memmove/cpy/set of zero bytes is a noop.
1713	if (NumBytes->isNullValue())
1714	return eraseInstFromFunction(I&: CI);
1715
1716	// For atomic unordered mem intrinsics if len is not a positive or
1717	// not a multiple of element size then behavior is undefined.
1718	if (MI->isAtomic() &&
1719	(NumBytes->isNegative() \|\|
1720	(NumBytes->getZExtValue() % MI->getElementSizeInBytes() != `0`))) {
1721	CreateNonTerminatorUnreachable(InsertAt: MI);
1722	assert(MI->getType()->isVoidTy() &&
1723	"non void atomic unordered mem intrinsic");
1724	return eraseInstFromFunction(I&: *MI);
1725	}
1726	}
1727
1728	// No other transformations apply to volatile transfers.
1729	if (MI->isVolatile())
1730	return nullptr;
1731
1732	if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(Val: MI)) {
1733	// memmove(x,x,size) -> noop.
1734	if (MTI->getSource() == MTI->getDest())
1735	return eraseInstFromFunction(I&: CI);
1736	}
1737
1738	auto IsPointerUndefined = [MI](Value *Ptr) {
1739	return isa<ConstantPointerNull>(Val: Ptr) &&
1740	!NullPointerIsDefined(
1741	F: MI->getFunction(),
1742	AS: cast<PointerType>(Val: Ptr->getType())->getAddressSpace());
1743	};
1744	bool SrcIsUndefined = false;
1745	// If we can determine a pointer alignment that is bigger than currently
1746	// set, update the alignment.
1747	if (auto *MTI = dyn_cast<AnyMemTransferInst>(Val: MI)) {
1748	if (Instruction *I = SimplifyAnyMemTransfer(MI: MTI))
1749	return I;
1750	SrcIsUndefined = IsPointerUndefined (MTI->getRawSource());
1751	} else if (auto *MSI = dyn_cast<AnyMemSetInst>(Val: MI)) {
1752	if (Instruction *I = SimplifyAnyMemSet(MI: MSI))
1753	return I;
1754	}
1755
1756	// If src/dest is null, this memory intrinsic must be a noop.
1757	if (SrcIsUndefined \|\| IsPointerUndefined (MI->getRawDest())) {
1758	Builder.CreateAssumption(Cond: Builder.CreateIsNull(Arg: MI->getLength()));
1759	return eraseInstFromFunction(I&: CI);
1760	}
1761
1762	// If we have a memmove and the source operation is a constant global,
1763	// then the source and dest pointers can't alias, so we can change this
1764	// into a call to memcpy.
1765	if (auto *MMI = dyn_cast<AnyMemMoveInst>(Val: MI)) {
1766	if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(Val: MMI->getSource()))
1767	if (GVSrc->isConstant()) {
1768	Module *M = CI.getModule();
1769	Intrinsic::ID MemCpyID =
1770	MMI->isAtomic()
1771	? Intrinsic::memcpy_element_unordered_atomic
1772	: Intrinsic::memcpy;
1773	Type *Tys[`3`] = { CI.getArgOperand(i: `0`)->getType(),
1774	CI.getArgOperand(i: `1`)->getType(),
1775	CI.getArgOperand(i: `2`)->getType() };
1776	CI.setCalledFunction(
1777	Intrinsic::getOrInsertDeclaration(M, id: MemCpyID, Tys));
1778	return II;
1779	}
1780	}
1781	}
1782
1783	// For fixed width vector result intrinsics, use the generic demanded vector
1784	// support.
1785	if (auto *IIFVTy = dyn_cast<FixedVectorType>(Val: II->getType())) {
1786	auto VWidth = IIFVTy->getNumElements();
1787	APInt PoisonElts(VWidth, `0`);
1788	APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
1789	if (Value *V = SimplifyDemandedVectorElts(V: II, DemandedElts: AllOnesEltMask, PoisonElts)) {
1790	if (V != II)
1791	return replaceInstUsesWith(I&: *II, V);
1792	return II;
1793	}
1794	}
1795
1796	if (II->isCommutative()) {
1797	if (auto Pair = matchSymmetricPair(LHS: II->getOperand(i_nocapture: `0`), RHS: II->getOperand(i_nocapture: `1`))) {
1798	replaceOperand(I&: *II, OpNum: `0`, V: Pair ->first);
1799	replaceOperand(I&: *II, OpNum: `1`, V: Pair ->second);
1800	return II;
1801	}
1802
1803	if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(Call&: CI))
1804	return NewCall;
1805	}
1806
1807	// Unused constrained FP intrinsic calls may have declared side effect, which
1808	// prevents it from being removed. In some cases however the side effect is
1809	// actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
1810	// returns a replacement, the call may be removed.
1811	if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(Val: CI)) {
1812	if (simplifyConstrainedFPCall(Call: &CI, Q: SQ.getWithInstruction(I: &CI)))
1813	return eraseInstFromFunction(I&: CI);
1814	}
1815
1816	Intrinsic::ID IID = II->getIntrinsicID();
1817	switch (IID) {
1818	case Intrinsic::objectsize: {
1819	SmallVector<Instruction *> InsertedInstructions;
1820	if (Value V = lowerObjectSizeCall(ObjectSize: II, DL, TLI: &TLI, AA, /MustSucceed=/*false,
1821	InsertedInstructions: &InsertedInstructions)) {
1822	for (Instruction *Inserted : InsertedInstructions)
1823	Worklist.add(I: Inserted);
1824	return replaceInstUsesWith(I&: CI, V);
1825	}
1826	return nullptr;
1827	}
1828	case Intrinsic::abs: {
1829	Value *IIOperand = II->getArgOperand(i: `0`);
1830	bool IntMinIsPoison = cast<Constant>(Val: II->getArgOperand(i: `1`))->isOneValue();
1831
1832	// abs(-x) -> abs(x)
1833	// TODO: Copy nsw if it was present on the neg?
1834	Value *X;
1835	if (match(V: IIOperand, P: m_Neg(V: m_Value(V&: X))))
1836	return replaceOperand(I&: *II, OpNum: `0`, V: X);
1837	if (match(V: IIOperand, P: m_c_Select(L: m_Neg(V: m_Value(V&: X)), R: m_Deferred(V: X))))
1838	return replaceOperand(I&: *II, OpNum: `0`, V: X);
1839
1840	Value *Y;
1841	// abs(a abs(b)) -> abs(a * b)*
1842	if (match(V: IIOperand,
1843	P: m_OneUse(SubPattern: m_c_Mul(L: m_Value(V&: X),
1844	R: m_Intrinsic<Intrinsic::abs>(Op0: m_Value(V&: Y)))))) {
1845	bool NSW =
1846	cast<Instruction>(Val: IIOperand)->hasNoSignedWrap() && IntMinIsPoison;
1847	auto *XY = NSW ? Builder.CreateNSWMul(LHS: X, RHS: Y) : Builder.CreateMul(LHS: X, RHS: Y);
1848	return replaceOperand(I&: *II, OpNum: `0`, V: XY);
1849	}
1850
1851	if (std::optional<bool> Known =
1852	getKnownSignOrZero(Op: IIOperand, SQ: SQ.getWithInstruction(I: II))) {
1853	// abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)
1854	// abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)
1855	if (!*Known)
1856	return replaceInstUsesWith(I&: *II, V: IIOperand);
1857
1858	// abs(x) -> -x if x < 0
1859	// abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y)
1860	if (IntMinIsPoison)
1861	return BinaryOperator::CreateNSWNeg(Op: IIOperand);
1862	return BinaryOperator::CreateNeg(Op: IIOperand);
1863	}
1864
1865	// abs (sext X) --> zext (abs X)*
1866	// Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
1867	if (match(V: IIOperand, P: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: X))))) {
1868	Value *NarrowAbs =
1869	Builder.CreateBinaryIntrinsic(ID: Intrinsic::abs, LHS: X, RHS: Builder.getFalse());
1870	return CastInst::Create(Instruction::ZExt, S: NarrowAbs, Ty: II->getType());
1871	}
1872
1873	// Match a complicated way to check if a number is odd/even:
1874	// abs (srem X, 2) --> and X, 1
1875	const APInt *C;
1876	if (match(V: IIOperand, P: m_SRem(L: m_Value(V&: X), R: m_APInt(Res&: C))) && *C == `2`)
1877	return BinaryOperator::CreateAnd(V1: X, V2: ConstantInt::get(Ty: II->getType(), V: `1`));
1878
1879	break;
1880	}
1881	case Intrinsic::umin: {
1882	Value I0 = II->getArgOperand(i: `0`), I1 = II->getArgOperand(i: `1`);
1883	// umin(x, 1) == zext(x != 0)
1884	if (match(V: I1, P: m_One())) {
1885	assert(II->getType()->getScalarSizeInBits() != `1` &&
1886	"Expected simplify of umin with max constant");
1887	Value *Zero = Constant::getNullValue(Ty: I0->getType());
1888	Value *Cmp = Builder.CreateICmpNE(LHS: I0, RHS: Zero);
1889	return CastInst::Create(Instruction::ZExt, S: Cmp, Ty: II->getType());
1890	}
1891	// umin(cttz(x), const) --> cttz(x \| (1 << const))
1892	if (Value *FoldedCttz =
1893	foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::cttz>(
1894	I0, I1, DL, Builder))
1895	return replaceInstUsesWith(I&: *II, V: FoldedCttz);
1896	// umin(ctlz(x), const) --> ctlz(x \| (SignedMin >> const))
1897	if (Value *FoldedCtlz =
1898	foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::ctlz>(
1899	I0, I1, DL, Builder))
1900	return replaceInstUsesWith(I&: *II, V: FoldedCtlz);
1901	[[fallthrough]];
1902	}
1903	case Intrinsic::umax: {
1904	Value I0 = II->getArgOperand(i: `0`), I1 = II->getArgOperand(i: `1`);
1905	Value X, Y;
1906	if (match(V: I0, P: m_ZExt(Op: m_Value(V&: X))) && match(V: I1, P: m_ZExt(Op: m_Value(V&: Y))) &&
1907	(I0->hasOneUse() \|\| I1->hasOneUse()) && X->getType() == Y->getType()) {
1908	Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y);
1909	return CastInst::Create(Instruction::ZExt, S: NarrowMaxMin, Ty: II->getType());
1910	}
1911	Constant *C;
1912	if (match(V: I0, P: m_ZExt(Op: m_Value(V&: X))) && match(V: I1, P: m_Constant(C)) &&
1913	I0->hasOneUse()) {
1914	if (Constant *NarrowC = getLosslessUnsignedTrunc(C, TruncTy: X->getType())) {
1915	Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: NarrowC);
1916	return CastInst::Create(Instruction::ZExt, S: NarrowMaxMin, Ty: II->getType());
1917	}
1918	}
1919	// If C is not 0:
1920	// umax(nuw_shl(x, C), x + 1) -> x == 0 ? 1 : nuw_shl(x, C)
1921	// If C is not 0 or 1:
1922	// umax(nuw_mul(x, C), x + 1) -> x == 0 ? 1 : nuw_mul(x, C)
1923	auto foldMaxMulShift = [&](Value A, Value B) -> Instruction * {
1924	const APInt *C;
1925	Value *X;
1926	if (!match(V: A, P: m_NUWShl(L: m_Value(V&: X), R: m_APInt(Res&: C))) &&
1927	!(match(V: A, P: m_NUWMul(L: m_Value(V&: X), R: m_APInt(Res&: C))) && !C->isOne()))
1928	return nullptr;
1929	if (C->isZero())
1930	return nullptr;
1931	if (!match(V: B, P: m_OneUse(SubPattern: m_Add(L: m_Specific(V: X), R: m_One()))))
1932	return nullptr;
1933
1934	Value *Cmp = Builder.CreateICmpEQ(LHS: X, RHS: ConstantInt::get(Ty: X->getType(), V: `0`));
1935	Value *NewSelect =
1936	Builder.CreateSelect(C: Cmp, True: ConstantInt::get(Ty: X->getType(), V: `1`), False: A);
1937	return replaceInstUsesWith(I&: *II, V: NewSelect);
1938	};
1939
1940	if (IID == Intrinsic::umax) {
1941	if (Instruction *I = foldMaxMulShift (I0, I1))
1942	return I;
1943	if (Instruction *I = foldMaxMulShift (I1, I0))
1944	return I;
1945	}
1946
1947	// If both operands of unsigned min/max are sign-extended, it is still ok
1948	// to narrow the operation.
1949	[[fallthrough]];
1950	}
1951	case Intrinsic::smax:
1952	case Intrinsic::smin: {
1953	Value I0 = II->getArgOperand(i: `0`), I1 = II->getArgOperand(i: `1`);
1954	Value X, Y;
1955	if (match(V: I0, P: m_SExt(Op: m_Value(V&: X))) && match(V: I1, P: m_SExt(Op: m_Value(V&: Y))) &&
1956	(I0->hasOneUse() \|\| I1->hasOneUse()) && X->getType() == Y->getType()) {
1957	Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y);
1958	return CastInst::Create(Instruction::SExt, S: NarrowMaxMin, Ty: II->getType());
1959	}
1960
1961	Constant *C;
1962	if (match(V: I0, P: m_SExt(Op: m_Value(V&: X))) && match(V: I1, P: m_Constant(C)) &&
1963	I0->hasOneUse()) {
1964	if (Constant *NarrowC = getLosslessSignedTrunc(C, TruncTy: X->getType())) {
1965	Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: NarrowC);
1966	return CastInst::Create(Instruction::SExt, S: NarrowMaxMin, Ty: II->getType());
1967	}
1968	}
1969
1970	// smax(smin(X, MinC), MaxC) -> smin(smax(X, MaxC), MinC) if MinC s>= MaxC
1971	// umax(umin(X, MinC), MaxC) -> umin(umax(X, MaxC), MinC) if MinC u>= MaxC
1972	const APInt MinC, MaxC;
1973	auto CreateCanonicalClampForm = [&](bool IsSigned) {
1974	auto MaxIID = IsSigned ? Intrinsic::smax : Intrinsic::umax;
1975	auto MinIID = IsSigned ? Intrinsic::smin : Intrinsic::umin;
1976	Value *NewMax = Builder.CreateBinaryIntrinsic(
1977	ID: MaxIID, LHS: X, RHS: ConstantInt::get(Ty: X->getType(), V: *MaxC));
1978	return replaceInstUsesWith(
1979	I&: *II, V: Builder.CreateBinaryIntrinsic(
1980	ID: MinIID, LHS: NewMax, RHS: ConstantInt::get(Ty: X->getType(), V: *MinC)));
1981	};
1982	if (IID == Intrinsic::smax &&
1983	match(V: I0, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::smin>(Op0: m_Value(V&: X),
1984	Op1: m_APInt(Res&: MinC)))) &&
1985	match(V: I1, P: m_APInt(Res&: MaxC)) && MinC->sgt(RHS: *MaxC))
1986	return CreateCanonicalClampForm (true);
1987	if (IID == Intrinsic::umax &&
1988	match(V: I0, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::umin>(Op0: m_Value(V&: X),
1989	Op1: m_APInt(Res&: MinC)))) &&
1990	match(V: I1, P: m_APInt(Res&: MaxC)) && MinC->ugt(RHS: *MaxC))
1991	return CreateCanonicalClampForm (false);
1992
1993	// umin(i1 X, i1 Y) -> and i1 X, Y
1994	// smax(i1 X, i1 Y) -> and i1 X, Y
1995	if ((IID == Intrinsic::umin \|\| IID == Intrinsic::smax) &&
1996	II->getType()->isIntOrIntVectorTy(BitWidth: `1`)) {
1997	return BinaryOperator::CreateAnd(V1: I0, V2: I1);
1998	}
1999
2000	// umax(i1 X, i1 Y) -> or i1 X, Y
2001	// smin(i1 X, i1 Y) -> or i1 X, Y
2002	if ((IID == Intrinsic::umax \|\| IID == Intrinsic::smin) &&
2003	II->getType()->isIntOrIntVectorTy(BitWidth: `1`)) {
2004	return BinaryOperator::CreateOr(V1: I0, V2: I1);
2005	}
2006
2007	// smin(smax(X, -1), 1) -> scmp(X, 0)
2008	// smax(smin(X, 1), -1) -> scmp(X, 0)
2009	// At this point, smax(smin(X, 1), -1) is changed to smin(smax(X, -1)
2010	// And i1's have been changed to and/ors
2011	// So we only need to check for smin
2012	if (IID == Intrinsic::smin) {
2013	if (match(V: I0, P: m_OneUse(SubPattern: m_SMax(L: m_Value(V&: X), R: m_AllOnes()))) &&
2014	match(V: I1, P: m_One())) {
2015	Value *Zero = ConstantInt::get(Ty: X->getType(), V: `0`);
2016	return replaceInstUsesWith(
2017	I&: CI,
2018	V: Builder.CreateIntrinsic(RetTy: II->getType(), ID: Intrinsic::scmp, Args: {X, Zero}));
2019	}
2020	}
2021
2022	if (IID == Intrinsic::smax \|\| IID == Intrinsic::smin) {
2023	// smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
2024	// smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
2025	// TODO: Canonicalize neg after min/max if I1 is constant.
2026	if (match(V: I0, P: m_NSWNeg(V: m_Value(V&: X))) && match(V: I1, P: m_NSWNeg(V: m_Value(V&: Y))) &&
2027	(I0->hasOneUse() \|\| I1->hasOneUse())) {
2028	Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMaxID: IID);
2029	Value *InvMaxMin = Builder.CreateBinaryIntrinsic(ID: InvID, LHS: X, RHS: Y);
2030	return BinaryOperator::CreateNSWNeg(Op: InvMaxMin);
2031	}
2032	}
2033
2034	// (umax X, (xor X, Pow2))
2035	// -> (or X, Pow2)
2036	// (umin X, (xor X, Pow2))
2037	// -> (and X, ~Pow2)
2038	// (smax X, (xor X, Pos_Pow2))
2039	// -> (or X, Pos_Pow2)
2040	// (smin X, (xor X, Pos_Pow2))
2041	// -> (and X, ~Pos_Pow2)
2042	// (smax X, (xor X, Neg_Pow2))
2043	// -> (and X, ~Neg_Pow2)
2044	// (smin X, (xor X, Neg_Pow2))
2045	// -> (or X, Neg_Pow2)
2046	if ((match(V: I0, P: m_c_Xor(L: m_Specific(V: I1), R: m_Value(V&: X))) \|\|
2047	match(V: I1, P: m_c_Xor(L: m_Specific(V: I0), R: m_Value(V&: X)))) &&
2048	isKnownToBeAPowerOfTwo(V: X, / OrZero / true)) {
2049	bool UseOr = IID == Intrinsic::smax \|\| IID == Intrinsic::umax;
2050	bool UseAndN = IID == Intrinsic::smin \|\| IID == Intrinsic::umin;
2051
2052	if (IID == Intrinsic::smax \|\| IID == Intrinsic::smin) {
2053	auto KnownSign = getKnownSign(Op: X, SQ: SQ.getWithInstruction(I: II));
2054	if (KnownSign == std::nullopt) {
2055	UseOr = false;
2056	UseAndN = false;
2057	} else if (KnownSign /* true is Signed. /) {
2058	UseOr ^= true;
2059	UseAndN ^= true;
2060	Type *Ty = I0->getType();
2061	// Negative power of 2 must be IntMin. It's possible to be able to
2062	// prove negative / power of 2 without actually having known bits, so
2063	// just get the value by hand.
2064	X = Constant::getIntegerValue(
2065	Ty, V: APInt::getSignedMinValue(numBits: Ty->getScalarSizeInBits()));
2066	}
2067	}
2068	if (UseOr)
2069	return BinaryOperator::CreateOr(V1: I0, V2: X);
2070	else if (UseAndN)
2071	return BinaryOperator::CreateAnd(V1: I0, V2: Builder.CreateNot(V: X));
2072	}
2073
2074	// If we can eliminate ~A and Y is free to invert:
2075	// max ~A, Y --> ~(min A, ~Y)
2076	//
2077	// Examples:
2078	// max ~A, ~Y --> ~(min A, Y)
2079	// max ~A, C --> ~(min A, ~C)
2080	// max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
2081	auto moveNotAfterMinMax = [&](Value X, Value Y) -> Instruction * {
2082	Value *A;
2083	if (match(V: X, P: m_OneUse(SubPattern: m_Not(V: m_Value(V&: A)))) &&
2084	!isFreeToInvert(V: A, WillInvertAllUses: A->hasOneUse())) {
2085	if (Value *NotY = getFreelyInverted(V: Y, WillInvertAllUses: Y->hasOneUse(), Builder: &Builder)) {
2086	Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMaxID: IID);
2087	Value *InvMaxMin = Builder.CreateBinaryIntrinsic(ID: InvID, LHS: A, RHS: NotY);
2088	return BinaryOperator::CreateNot(Op: InvMaxMin);
2089	}
2090	}
2091	return nullptr;
2092	};
2093
2094	if (Instruction *I = moveNotAfterMinMax (I0, I1))
2095	return I;
2096	if (Instruction *I = moveNotAfterMinMax (I1, I0))
2097	return I;
2098
2099	if (Instruction *I = moveAddAfterMinMax(II, Builder))
2100	return I;
2101
2102	// minmax (X & NegPow2C, Y & NegPow2C) --> minmax(X, Y) & NegPow2C
2103	const APInt *RHSC;
2104	if (match(V: I0, P: m_OneUse(SubPattern: m_And(L: m_Value(V&: X), R: m_NegatedPower2(V&: RHSC)))) &&
2105	match(V: I1, P: m_OneUse(SubPattern: m_And(L: m_Value(V&: Y), R: m_SpecificInt(V: *RHSC)))))
2106	return BinaryOperator::CreateAnd(V1: Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y),
2107	V2: ConstantInt::get(Ty: II->getType(), V: *RHSC));
2108
2109	// smax(X, -X) --> abs(X)
2110	// smin(X, -X) --> -abs(X)
2111	// umax(X, -X) --> -abs(X)
2112	// umin(X, -X) --> abs(X)
2113	if (isKnownNegation(X: I0, Y: I1)) {
2114	// We can choose either operand as the input to abs(), but if we can
2115	// eliminate the only use of a value, that's better for subsequent
2116	// transforms/analysis.
2117	if (I0->hasOneUse() && !I1->hasOneUse())
2118	std::swap(a&: I0, b&: I1);
2119
2120	// This is some variant of abs(). See if we can propagate 'nsw' to the abs
2121	// operation and potentially its negation.
2122	bool IntMinIsPoison = isKnownNegation(X: I0, Y: I1, / NeedNSW / true);
2123	Value *Abs = Builder.CreateBinaryIntrinsic(
2124	ID: Intrinsic::abs, LHS: I0,
2125	RHS: ConstantInt::getBool(Context&: II->getContext(), V: IntMinIsPoison));
2126
2127	// We don't have a "nabs" intrinsic, so negate if needed based on the
2128	// max/min operation.
2129	if (IID == Intrinsic::smin \|\| IID == Intrinsic::umax)
2130	Abs = Builder.CreateNeg(V: Abs, Name: "nabs", HasNSW: IntMinIsPoison);
2131	return replaceInstUsesWith(I&: CI, V: Abs);
2132	}
2133
2134	if (Instruction *Sel = foldClampRangeOfTwo(II, Builder))
2135	return Sel;
2136
2137	if (Instruction SAdd = matchSAddSubSat(MinMax1&: II))
2138	return SAdd;
2139
2140	if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder, SQ))
2141	return replaceInstUsesWith(I&: *II, V: NewMinMax);
2142
2143	if (Instruction *R = reassociateMinMaxWithConstantInOperand(II, Builder))
2144	return R;
2145
2146	if (Instruction *NewMinMax = factorizeMinMaxTree(II))
2147	return NewMinMax;
2148
2149	// Try to fold minmax with constant RHS based on range information
2150	if (match(V: I1, P: m_APIntAllowPoison(Res&: RHSC))) {
2151	ICmpInst::Predicate Pred =
2152	ICmpInst::getNonStrictPredicate(pred: MinMaxIntrinsic::getPredicate(ID: IID));
2153	bool IsSigned = MinMaxIntrinsic::isSigned(ID: IID);
2154	ConstantRange LHS_CR = computeConstantRangeIncludingKnownBits(
2155	V: I0, ForSigned: IsSigned, SQ: SQ.getWithInstruction(I: II));
2156	if (!LHS_CR.isFullSet()) {
2157	if (LHS_CR.icmp(Pred, Other: *RHSC))
2158	return replaceInstUsesWith(I&: *II, V: I0);
2159	if (LHS_CR.icmp(Pred: ICmpInst::getSwappedPredicate(pred: Pred), Other: *RHSC))
2160	return replaceInstUsesWith(I&: *II,
2161	V: ConstantInt::get(Ty: II->getType(), V: *RHSC));
2162	}
2163	}
2164
2165	if (Value *V = foldIntrinsicUsingDistributiveLaws(II, Builder))
2166	return replaceInstUsesWith(I&: *II, V);
2167
2168	break;
2169	}
2170	case Intrinsic::scmp: {
2171	Value I0 = II->getArgOperand(i: `0`), I1 = II->getArgOperand(i: `1`);
2172	Value LHS, RHS;
2173	if (match(V: I0, P: m_NSWSub(L: m_Value(V&: LHS), R: m_Value(V&: RHS))) && match(V: I1, P: m_Zero()))
2174	return replaceInstUsesWith(
2175	I&: CI,
2176	V: Builder.CreateIntrinsic(RetTy: II->getType(), ID: Intrinsic::scmp, Args: {LHS, RHS}));
2177	break;
2178	}
2179	case Intrinsic::bitreverse: {
2180	Value *IIOperand = II->getArgOperand(i: `0`);
2181	// bitrev (zext i1 X to ?) --> X ? SignBitC : 0
2182	Value *X;
2183	if (match(V: IIOperand, P: m_ZExt(Op: m_Value(V&: X))) &&
2184	X->getType()->isIntOrIntVectorTy(BitWidth: `1`)) {
2185	Type *Ty = II->getType();
2186	APInt SignBit = APInt::getSignMask(BitWidth: Ty->getScalarSizeInBits());
2187	return SelectInst::Create(C: X, S1: ConstantInt::get(Ty, V: SignBit),
2188	S2: ConstantInt::getNullValue(Ty));
2189	}
2190
2191	if (Instruction *crossLogicOpFold =
2192	foldBitOrderCrossLogicOp<Intrinsic::bitreverse>(V: IIOperand, Builder))
2193	return crossLogicOpFold;
2194
2195	break;
2196	}
2197	case Intrinsic::bswap: {
2198	Value *IIOperand = II->getArgOperand(i: `0`);
2199
2200	// Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
2201	// inverse-shift-of-bswap:
2202	// bswap (shl X, Y) --> lshr (bswap X), Y
2203	// bswap (lshr X, Y) --> shl (bswap X), Y
2204	Value X, Y;
2205	if (match(V: IIOperand, P: m_OneUse(SubPattern: m_LogicalShift(L: m_Value(V&: X), R: m_Value(V&: Y))))) {
2206	unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
2207	if (MaskedValueIsZero(V: Y, Mask: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: `3`))) {
2208	Value *NewSwap = Builder.CreateUnaryIntrinsic(ID: Intrinsic::bswap, V: X);
2209	BinaryOperator::BinaryOps InverseShift =
2210	cast<BinaryOperator>(Val: IIOperand)->getOpcode() == Instruction::Shl
2211	? Instruction::LShr
2212	: Instruction::Shl;
2213	return BinaryOperator::Create(Op: InverseShift, S1: NewSwap, S2: Y);
2214	}
2215	}
2216
2217	KnownBits Known = computeKnownBits(V: IIOperand, CxtI: II);
2218	uint64_t LZ = alignDown(Value: Known.countMinLeadingZeros(), Align: `8`);
2219	uint64_t TZ = alignDown(Value: Known.countMinTrailingZeros(), Align: `8`);
2220	unsigned BW = Known.getBitWidth();
2221
2222	// bswap(x) -> shift(x) if x has exactly one "active byte"
2223	if (BW - LZ - TZ == `8`) {
2224	assert(LZ != TZ && "active byte cannot be in the middle");
2225	if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
2226	return BinaryOperator::CreateNUWShl(
2227	V1: IIOperand, V2: ConstantInt::get(Ty: IIOperand->getType(), V: LZ - TZ));
2228	// -> lshr(x) if the "active byte" is in the high part of x
2229	return BinaryOperator::CreateExactLShr(
2230	V1: IIOperand, V2: ConstantInt::get(Ty: IIOperand->getType(), V: TZ - LZ));
2231	}
2232
2233	// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
2234	if (match(V: IIOperand, P: m_Trunc(Op: m_BSwap(Op0: m_Value(V&: X))))) {
2235	unsigned C = X->getType()->getScalarSizeInBits() - BW;
2236	Value *CV = ConstantInt::get(Ty: X->getType(), V: C);
2237	Value *V = Builder.CreateLShr(LHS: X, RHS: CV);
2238	return new TruncInst (V, IIOperand->getType());
2239	}
2240
2241	if (Instruction *crossLogicOpFold =
2242	foldBitOrderCrossLogicOp<Intrinsic::bswap>(V: IIOperand, Builder)) {
2243	return crossLogicOpFold;
2244	}
2245
2246	// Try to fold into bitreverse if bswap is the root of the expression tree.
2247	if (Instruction BitOp = matchBSwapOrBitReverse(I&: II, /MatchBSwaps/ false,
2248	/MatchBitReversals/ true))
2249	return BitOp;
2250	break;
2251	}
2252	case Intrinsic::masked_load:
2253	if (Value SimplifiedMaskedOp = simplifyMaskedLoad(II&: II))
2254	return replaceInstUsesWith(I&: CI, V: SimplifiedMaskedOp);
2255	break;
2256	case Intrinsic::masked_store:
2257	return simplifyMaskedStore(II&: *II);
2258	case Intrinsic::masked_gather:
2259	return simplifyMaskedGather(II&: *II);
2260	case Intrinsic::masked_scatter:
2261	return simplifyMaskedScatter(II&: *II);
2262	case Intrinsic::launder_invariant_group:
2263	case Intrinsic::strip_invariant_group:
2264	if (auto SkippedBarrier = simplifyInvariantGroupIntrinsic(II&: II, IC&: *this))
2265	return replaceInstUsesWith(I&: *II, V: SkippedBarrier);
2266	break;
2267	case Intrinsic::powi:
2268	if (ConstantInt *Power = dyn_cast<ConstantInt>(Val: II->getArgOperand(i: `1`))) {
2269	// 0 and 1 are handled in instsimplify
2270	// powi(x, -1) -> 1/x
2271	if (Power->isMinusOne())
2272	return BinaryOperator::CreateFDivFMF(V1: ConstantFP::get(Ty: CI.getType(), V: `1.0`),
2273	V2: II->getArgOperand(i: `0`), FMFSource: II);
2274	// powi(x, 2) -> xx*
2275	if (Power->equalsInt(V: `2`))
2276	return BinaryOperator::CreateFMulFMF(V1: II->getArgOperand(i: `0`),
2277	V2: II->getArgOperand(i: `0`), FMFSource: II);
2278
2279	if (!Power->getValue()[`0`]) {
2280	Value *X;
2281	// If power is even:
2282	// powi(-x, p) -> powi(x, p)
2283	// powi(fabs(x), p) -> powi(x, p)
2284	// powi(copysign(x, y), p) -> powi(x, p)
2285	if (match(V: II->getArgOperand(i: `0`), P: m_FNeg(X: m_Value(V&: X))) \|\|
2286	match(V: II->getArgOperand(i: `0`), P: m_FAbs(Op0: m_Value(V&: X))) \|\|
2287	match(V: II->getArgOperand(i: `0`),
2288	P: m_Intrinsic<Intrinsic::copysign>(Op0: m_Value(V&: X), Op1: m_Value())))
2289	return replaceOperand(I&: *II, OpNum: `0`, V: X);
2290	}
2291	}
2292	break;
2293
2294	case Intrinsic::cttz:
2295	case Intrinsic::ctlz:
2296	if (auto I = foldCttzCtlz(II&: II, IC&: *this))
2297	return I;
2298	break;
2299
2300	case Intrinsic::ctpop:
2301	if (auto I = foldCtpop(II&: II, IC&: *this))
2302	return I;
2303	break;
2304
2305	case Intrinsic::fshl:
2306	case Intrinsic::fshr: {
2307	Value Op0 = II->getArgOperand(i: `0`), Op1 = II->getArgOperand(i: `1`);
2308	Type *Ty = II->getType();
2309	unsigned BitWidth = Ty->getScalarSizeInBits();
2310	Constant *ShAmtC;
2311	if (match(V: II->getArgOperand(i: `2`), P: m_ImmConstant(C&: ShAmtC))) {
2312	// Canonicalize a shift amount constant operand to modulo the bit-width.
2313	Constant *WidthC = ConstantInt::get(Ty, V: BitWidth);
2314	Constant *ModuloC =
2315	ConstantFoldBinaryOpOperands(Opcode: Instruction::URem, LHS: ShAmtC, RHS: WidthC, DL);
2316	if (!ModuloC)
2317	return nullptr;
2318	if (ModuloC != ShAmtC)
2319	return replaceOperand(I&: *II, OpNum: `2`, V: ModuloC);
2320
2321	assert(match(ConstantFoldCompareInstOperands(ICmpInst::ICMP_UGT, WidthC,
2322	ShAmtC, DL),
2323	m_One()) &&
2324	"Shift amount expected to be modulo bitwidth");
2325
2326	// Canonicalize funnel shift right by constant to funnel shift left. This
2327	// is not entirely arbitrary. For historical reasons, the backend may
2328	// recognize rotate left patterns but miss rotate right patterns.
2329	if (IID == Intrinsic::fshr) {
2330	// fshr X, Y, C --> fshl X, Y, (BitWidth - C) if C is not zero.
2331	if (!isKnownNonZero(V: ShAmtC, Q: SQ.getWithInstruction(I: II)))
2332	return nullptr;
2333
2334	Constant *LeftShiftC = ConstantExpr::getSub(C1: WidthC, C2: ShAmtC);
2335	Module *Mod = II->getModule();
2336	Function *Fshl =
2337	Intrinsic::getOrInsertDeclaration(M: Mod, id: Intrinsic::fshl, Tys: Ty);
2338	return CallInst::Create(Func: Fshl, Args: { Op0, Op1, LeftShiftC });
2339	}
2340	assert(IID == Intrinsic::fshl &&
2341	"All funnel shifts by simple constants should go left");
2342
2343	// fshl(X, 0, C) --> shl X, C
2344	// fshl(X, undef, C) --> shl X, C
2345	if (match(V: Op1, P: m_ZeroInt()) \|\| match(V: Op1, P: m_Undef()))
2346	return BinaryOperator::CreateShl(V1: Op0, V2: ShAmtC);
2347
2348	// fshl(0, X, C) --> lshr X, (BW-C)
2349	// fshl(undef, X, C) --> lshr X, (BW-C)
2350	if (match(V: Op0, P: m_ZeroInt()) \|\| match(V: Op0, P: m_Undef()))
2351	return BinaryOperator::CreateLShr(V1: Op1,
2352	V2: ConstantExpr::getSub(C1: WidthC, C2: ShAmtC));
2353
2354	// fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
2355	if (Op0 == Op1 && BitWidth == `16` && match(V: ShAmtC, P: m_SpecificInt(V: `8`))) {
2356	Module *Mod = II->getModule();
2357	Function *Bswap =
2358	Intrinsic::getOrInsertDeclaration(M: Mod, id: Intrinsic::bswap, Tys: Ty);
2359	return CallInst::Create(Func: Bswap, Args: { Op0 });
2360	}
2361	if (Instruction *BitOp =
2362	matchBSwapOrBitReverse(I&: II, /MatchBSwaps/* true,
2363	/MatchBitReversals/ true))
2364	return BitOp;
2365	}
2366
2367	// fshl(X, X, Neg(Y)) --> fshr(X, X, Y)
2368	// fshr(X, X, Neg(Y)) --> fshl(X, X, Y)
2369	// if BitWidth is a power-of-2
2370	Value *Y;
2371	if (Op0 == Op1 && isPowerOf2_32(Value: BitWidth) &&
2372	match(V: II->getArgOperand(i: `2`), P: m_Neg(V: m_Value(V&: Y)))) {
2373	Module *Mod = II->getModule();
2374	Function *OppositeShift = Intrinsic::getOrInsertDeclaration(
2375	M: Mod, id: IID == Intrinsic::fshl ? Intrinsic::fshr : Intrinsic::fshl, Tys: Ty);
2376	return CallInst::Create(Func: OppositeShift, Args: {Op0, Op1, Y});
2377	}
2378
2379	// fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) if bitwidth is a
2380	// power-of-2
2381	if (IID == Intrinsic::fshl && isPowerOf2_32(Value: BitWidth) &&
2382	match(V: Op1, P: m_ZeroInt())) {
2383	Value *Op2 = II->getArgOperand(i: `2`);
2384	Value *And = Builder.CreateAnd(LHS: Op2, RHS: ConstantInt::get(Ty, V: BitWidth - `1`));
2385	return BinaryOperator::CreateShl(V1: Op0, V2: And);
2386	}
2387
2388	// Left or right might be masked.
2389	if (SimplifyDemandedInstructionBits(Inst&: *II))
2390	return &CI;
2391
2392	// The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
2393	// so only the low bits of the shift amount are demanded if the bitwidth is
2394	// a power-of-2.
2395	if (!isPowerOf2_32(Value: BitWidth))
2396	break;
2397	APInt Op2Demanded = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: Log2_32_Ceil(Value: BitWidth));
2398	KnownBits Op2Known(BitWidth);
2399	if (SimplifyDemandedBits(I: II, OpNo: `2`, DemandedMask: Op2Demanded, Known&: Op2Known))
2400	return &CI;
2401	break;
2402	}
2403	case Intrinsic::ptrmask: {
2404	unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType());
2405	KnownBits Known(BitWidth);
2406	if (SimplifyDemandedInstructionBits(Inst&: *II, Known))
2407	return II;
2408
2409	Value InnerPtr, InnerMask;
2410	bool Changed = false;
2411	// Combine:
2412	// (ptrmask (ptrmask p, A), B)
2413	// -> (ptrmask p, (and A, B))
2414	if (match(V: II->getArgOperand(i: `0`),
2415	P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ptrmask>(Op0: m_Value(V&: InnerPtr),
2416	Op1: m_Value(V&: InnerMask))))) {
2417	assert(II->getArgOperand(`1`)->getType() == InnerMask->getType() &&
2418	"Mask types must match");
2419	// TODO: If InnerMask == Op1, we could copy attributes from inner
2420	// callsite -> outer callsite.
2421	Value *NewMask = Builder.CreateAnd(LHS: II->getArgOperand(i: `1`), RHS: InnerMask);
2422	replaceOperand(I&: CI, OpNum: `0`, V: InnerPtr);
2423	replaceOperand(I&: CI, OpNum: `1`, V: NewMask);
2424	Changed = true;
2425	}
2426
2427	// See if we can deduce non-null.
2428	if (!CI.hasRetAttr(Kind: Attribute::NonNull) &&
2429	(Known.isNonZero() \|\|
2430	isKnownNonZero(V: II, Q: getSimplifyQuery().getWithInstruction(I: II)))) {
2431	CI.addRetAttr(Kind: Attribute::NonNull);
2432	Changed = true;
2433	}
2434
2435	unsigned NewAlignmentLog =
2436	std::min(a: Value::MaxAlignmentExponent,
2437	b: std::min(a: BitWidth - `1`, b: Known.countMinTrailingZeros()));
2438	// Known bits will capture if we had alignment information associated with
2439	// the pointer argument.
2440	if (NewAlignmentLog > Log2(A: CI.getRetAlign().valueOrOne())) {
2441	CI.addRetAttr(Attr: Attribute::getWithAlignment(
2442	Context&: CI.getContext(), Alignment: Align (uint64_t(`1`) << NewAlignmentLog)));
2443	Changed = true;
2444	}
2445	if (Changed)
2446	return &CI;
2447	break;
2448	}
2449	case Intrinsic::uadd_with_overflow:
2450	case Intrinsic::sadd_with_overflow: {
2451	if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2452	return I;
2453
2454	// Given 2 constant operands whose sum does not overflow:
2455	// uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
2456	// saddo (X +nsw C0), C1 -> saddo X, C0 + C1
2457	Value *X;
2458	const APInt C0, C1;
2459	Value *Arg0 = II->getArgOperand(i: `0`);
2460	Value *Arg1 = II->getArgOperand(i: `1`);
2461	bool IsSigned = IID == Intrinsic::sadd_with_overflow;
2462	bool HasNWAdd = IsSigned
2463	? match(V: Arg0, P: m_NSWAddLike(L: m_Value(V&: X), R: m_APInt(Res&: C0)))
2464	: match(V: Arg0, P: m_NUWAddLike(L: m_Value(V&: X), R: m_APInt(Res&: C0)));
2465	if (HasNWAdd && match(V: Arg1, P: m_APInt(Res&: C1))) {
2466	bool Overflow;
2467	APInt NewC =
2468	IsSigned ? C1->sadd_ov(RHS: C0, Overflow) : C1->uadd_ov(RHS: C0, Overflow);
2469	if (!Overflow)
2470	return replaceInstUsesWith(
2471	I&: *II, V: Builder.CreateBinaryIntrinsic(
2472	ID: IID, LHS: X, RHS: ConstantInt::get(Ty: Arg1->getType(), V: NewC)));
2473	}
2474	break;
2475	}
2476
2477	case Intrinsic::umul_with_overflow:
2478	case Intrinsic::smul_with_overflow:
2479	case Intrinsic::usub_with_overflow:
2480	if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2481	return I;
2482	break;
2483
2484	case Intrinsic::ssub_with_overflow: {
2485	if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2486	return I;
2487
2488	Constant *C;
2489	Value *Arg0 = II->getArgOperand(i: `0`);
2490	Value *Arg1 = II->getArgOperand(i: `1`);
2491	// Given a constant C that is not the minimum signed value
2492	// for an integer of a given bit width:
2493	//
2494	// ssubo X, C -> saddo X, -C
2495	if (match(V: Arg1, P: m_Constant(C)) && C->isNotMinSignedValue()) {
2496	Value *NegVal = ConstantExpr::getNeg(C);
2497	// Build a saddo call that is equivalent to the discovered
2498	// ssubo call.
2499	return replaceInstUsesWith(
2500	I&: *II, V: Builder.CreateBinaryIntrinsic(ID: Intrinsic::sadd_with_overflow,
2501	LHS: Arg0, RHS: NegVal));
2502	}
2503
2504	break;
2505	}
2506
2507	case Intrinsic::uadd_sat:
2508	case Intrinsic::sadd_sat:
2509	case Intrinsic::usub_sat:
2510	case Intrinsic::ssub_sat: {
2511	SaturatingInst *SI = cast<SaturatingInst>(Val: II);
2512	Type *Ty = SI->getType();
2513	Value *Arg0 = SI->getLHS();
2514	Value *Arg1 = SI->getRHS();
2515
2516	// Make use of known overflow information.
2517	OverflowResult OR = computeOverflow(BinaryOp: SI->getBinaryOp(), IsSigned: SI->isSigned(),
2518	LHS: Arg0, RHS: Arg1, CxtI: SI);
2519	switch (OR) {
2520	case OverflowResult::MayOverflow:
2521	break;
2522	case OverflowResult::NeverOverflows:
2523	if (SI->isSigned())
2524	return BinaryOperator::CreateNSW(Opc: SI->getBinaryOp(), V1: Arg0, V2: Arg1);
2525	else
2526	return BinaryOperator::CreateNUW(Opc: SI->getBinaryOp(), V1: Arg0, V2: Arg1);
2527	case OverflowResult::AlwaysOverflowsLow: {
2528	unsigned BitWidth = Ty->getScalarSizeInBits();
2529	APInt Min = APSInt::getMinValue(numBits: BitWidth, Unsigned: !SI->isSigned());
2530	return replaceInstUsesWith(I&: *SI, V: ConstantInt::get(Ty, V: Min));
2531	}
2532	case OverflowResult::AlwaysOverflowsHigh: {
2533	unsigned BitWidth = Ty->getScalarSizeInBits();
2534	APInt Max = APSInt::getMaxValue(numBits: BitWidth, Unsigned: !SI->isSigned());
2535	return replaceInstUsesWith(I&: *SI, V: ConstantInt::get(Ty, V: Max));
2536	}
2537	}
2538
2539	// usub_sat((sub nuw C, A), C1) -> usub_sat(usub_sat(C, C1), A)
2540	// which after that:
2541	// usub_sat((sub nuw C, A), C1) -> usub_sat(C - C1, A) if C1 u< C
2542	// usub_sat((sub nuw C, A), C1) -> 0 otherwise
2543	Constant C, C1;
2544	Value *A;
2545	if (IID == Intrinsic::usub_sat &&
2546	match(V: Arg0, P: m_NUWSub(L: m_ImmConstant(C), R: m_Value(V&: A))) &&
2547	match(V: Arg1, P: m_ImmConstant(C&: C1))) {
2548	auto *NewC = Builder.CreateBinaryIntrinsic(ID: Intrinsic::usub_sat, LHS: C, RHS: C1);
2549	auto *NewSub =
2550	Builder.CreateBinaryIntrinsic(ID: Intrinsic::usub_sat, LHS: NewC, RHS: A);
2551	return replaceInstUsesWith(I&: *SI, V: NewSub);
2552	}
2553
2554	// ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
2555	if (IID == Intrinsic::ssub_sat && match(V: Arg1, P: m_Constant(C)) &&
2556	C->isNotMinSignedValue()) {
2557	Value *NegVal = ConstantExpr::getNeg(C);
2558	return replaceInstUsesWith(
2559	I&: *II, V: Builder.CreateBinaryIntrinsic(
2560	ID: Intrinsic::sadd_sat, LHS: Arg0, RHS: NegVal));
2561	}
2562
2563	// sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
2564	// sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
2565	// if Val and Val2 have the same sign
2566	if (auto *Other = dyn_cast<IntrinsicInst>(Val: Arg0)) {
2567	Value *X;
2568	const APInt Val, Val2;
2569	APInt NewVal;
2570	bool IsUnsigned =
2571	IID == Intrinsic::uadd_sat \|\| IID == Intrinsic::usub_sat;
2572	if (Other->getIntrinsicID() == IID &&
2573	match(V: Arg1, P: m_APInt(Res&: Val)) &&
2574	match(V: Other->getArgOperand(i: `0`), P: m_Value(V&: X)) &&
2575	match(V: Other->getArgOperand(i: `1`), P: m_APInt(Res&: Val2))) {
2576	if (IsUnsigned)
2577	NewVal = Val->uadd_sat(RHS: *Val2);
2578	else if (Val->isNonNegative() == Val2->isNonNegative()) {
2579	bool Overflow;
2580	NewVal = Val->sadd_ov(RHS: *Val2, Overflow);
2581	if (Overflow) {
2582	// Both adds together may add more than SignedMaxValue
2583	// without saturating the final result.
2584	break;
2585	}
2586	} else {
2587	// Cannot fold saturated addition with different signs.
2588	break;
2589	}
2590
2591	return replaceInstUsesWith(
2592	I&: *II, V: Builder.CreateBinaryIntrinsic(
2593	ID: IID, LHS: X, RHS: ConstantInt::get(Ty: II->getType(), V: NewVal)));
2594	}
2595	}
2596	break;
2597	}
2598
2599	case Intrinsic::minnum:
2600	case Intrinsic::maxnum:
2601	case Intrinsic::minimum:
2602	case Intrinsic::maximum: {
2603	Value *Arg0 = II->getArgOperand(i: `0`);
2604	Value *Arg1 = II->getArgOperand(i: `1`);
2605	Value X, Y;
2606	if (match(V: Arg0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Arg1, P: m_FNeg(X: m_Value(V&: Y))) &&
2607	(Arg0->hasOneUse() \|\| Arg1->hasOneUse())) {
2608	// If both operands are negated, invert the call and negate the result:
2609	// min(-X, -Y) --> -(max(X, Y))
2610	// max(-X, -Y) --> -(min(X, Y))
2611	Intrinsic::ID NewIID;
2612	switch (IID) {
2613	case Intrinsic::maxnum:
2614	NewIID = Intrinsic::minnum;
2615	break;
2616	case Intrinsic::minnum:
2617	NewIID = Intrinsic::maxnum;
2618	break;
2619	case Intrinsic::maximum:
2620	NewIID = Intrinsic::minimum;
2621	break;
2622	case Intrinsic::minimum:
2623	NewIID = Intrinsic::maximum;
2624	break;
2625	default:
2626	llvm_unreachable("unexpected intrinsic ID");
2627	}
2628	Value *NewCall = Builder.CreateBinaryIntrinsic(ID: NewIID, LHS: X, RHS: Y, FMFSource: II);
2629	Instruction *FNeg = UnaryOperator::CreateFNeg(V: NewCall);
2630	FNeg->copyIRFlags(V: II);
2631	return FNeg;
2632	}
2633
2634	// m(m(X, C2), C1) -> m(X, C)
2635	const APFloat C1, C2;
2636	if (auto *M = dyn_cast<IntrinsicInst>(Val: Arg0)) {
2637	if (M->getIntrinsicID() == IID && match(V: Arg1, P: m_APFloat(Res&: C1)) &&
2638	((match(V: M->getArgOperand(i: `0`), P: m_Value(V&: X)) &&
2639	match(V: M->getArgOperand(i: `1`), P: m_APFloat(Res&: C2))) \|\|
2640	(match(V: M->getArgOperand(i: `1`), P: m_Value(V&: X)) &&
2641	match(V: M->getArgOperand(i: `0`), P: m_APFloat(Res&: C2))))) {
2642	APFloat Res(`0.0`);
2643	switch (IID) {
2644	case Intrinsic::maxnum:
2645	Res = maxnum(A: C1, B: C2);
2646	break;
2647	case Intrinsic::minnum:
2648	Res = minnum(A: C1, B: C2);
2649	break;
2650	case Intrinsic::maximum:
2651	Res = maximum(A: C1, B: C2);
2652	break;
2653	case Intrinsic::minimum:
2654	Res = minimum(A: C1, B: C2);
2655	break;
2656	default:
2657	llvm_unreachable("unexpected intrinsic ID");
2658	}
2659	// TODO: Conservatively intersecting FMF. If Res == C2, the transform
2660	// was a simplification (so Arg0 and its original flags could
2661	// propagate?)
2662	Value *V = Builder.CreateBinaryIntrinsic(
2663	ID: IID, LHS: X, RHS: ConstantFP::get(Ty: Arg0->getType(), V: Res),
2664	FMFSource: FMFSource::intersect(A: II, B: M));
2665	return replaceInstUsesWith(I&: *II, V);
2666	}
2667	}
2668
2669	// m((fpext X), (fpext Y)) -> fpext (m(X, Y))
2670	if (match(V: Arg0, P: m_OneUse(SubPattern: m_FPExt(Op: m_Value(V&: X)))) &&
2671	match(V: Arg1, P: m_OneUse(SubPattern: m_FPExt(Op: m_Value(V&: Y)))) &&
2672	X->getType() == Y->getType()) {
2673	Value *NewCall =
2674	Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y, FMFSource: II, Name: II->getName());
2675	return new FPExtInst (NewCall, II->getType());
2676	}
2677
2678	// max X, -X --> fabs X
2679	// min X, -X --> -(fabs X)
2680	// TODO: Remove one-use limitation? That is obviously better for max,
2681	// hence why we don't check for one-use for that. However,
2682	// it would be an extra instruction for min (fnabs), but
2683	// that is still likely better for analysis and codegen.
2684	auto IsMinMaxOrXNegX = [IID, &X](Value Op0, Value Op1) {
2685	if (match(V: Op0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Op1, P: m_Specific(V: X)))
2686	return Op0->hasOneUse() \|\|
2687	(IID != Intrinsic::minimum && IID != Intrinsic::minnum);
2688	return false;
2689	};
2690
2691	if (IsMinMaxOrXNegX (Arg0, Arg1) \|\| IsMinMaxOrXNegX (Arg1, Arg0)) {
2692	Value *R = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: X, FMFSource: II);
2693	if (IID == Intrinsic::minimum \|\| IID == Intrinsic::minnum)
2694	R = Builder.CreateFNegFMF(V: R, FMFSource: II);
2695	return replaceInstUsesWith(I&: *II, V: R);
2696	}
2697
2698	break;
2699	}
2700	case Intrinsic::matrix_multiply: {
2701	// Optimize negation in matrix multiplication.
2702
2703	// -A -B -> A * B*
2704	Value A, B;
2705	if (match(V: II->getArgOperand(i: `0`), P: m_FNeg(X: m_Value(V&: A))) &&
2706	match(V: II->getArgOperand(i: `1`), P: m_FNeg(X: m_Value(V&: B)))) {
2707	replaceOperand(I&: *II, OpNum: `0`, V: A);
2708	replaceOperand(I&: *II, OpNum: `1`, V: B);
2709	return II;
2710	}
2711
2712	Value *Op0 = II->getOperand(i_nocapture: `0`);
2713	Value *Op1 = II->getOperand(i_nocapture: `1`);
2714	Value OpNotNeg, NegatedOp;
2715	unsigned NegatedOpArg, OtherOpArg;
2716	if (match(V: Op0, P: m_FNeg(X: m_Value(V&: OpNotNeg)))) {
2717	NegatedOp = Op0;
2718	NegatedOpArg = `0`;
2719	OtherOpArg = `1`;
2720	} else if (match(V: Op1, P: m_FNeg(X: m_Value(V&: OpNotNeg)))) {
2721	NegatedOp = Op1;
2722	NegatedOpArg = `1`;
2723	OtherOpArg = `0`;
2724	} else
2725	// Multiplication doesn't have a negated operand.
2726	break;
2727
2728	// Only optimize if the negated operand has only one use.
2729	if (!NegatedOp->hasOneUse())
2730	break;
2731
2732	Value *OtherOp = II->getOperand(i_nocapture: OtherOpArg);
2733	VectorType *RetTy = cast<VectorType>(Val: II->getType());
2734	VectorType *NegatedOpTy = cast<VectorType>(Val: NegatedOp->getType());
2735	VectorType *OtherOpTy = cast<VectorType>(Val: OtherOp->getType());
2736	ElementCount NegatedCount = NegatedOpTy->getElementCount();
2737	ElementCount OtherCount = OtherOpTy->getElementCount();
2738	ElementCount RetCount = RetTy->getElementCount();
2739	// (-A) B -> A * (-B), if it is cheaper to negate B and vice versa.*
2740	if (ElementCount::isKnownGT(LHS: NegatedCount, RHS: OtherCount) &&
2741	ElementCount::isKnownLT(LHS: OtherCount, RHS: RetCount)) {
2742	Value *InverseOtherOp = Builder.CreateFNeg(V: OtherOp);
2743	replaceOperand(I&: *II, OpNum: NegatedOpArg, V: OpNotNeg);
2744	replaceOperand(I&: *II, OpNum: OtherOpArg, V: InverseOtherOp);
2745	return II;
2746	}
2747	// (-A) B -> -(A * B), if it is cheaper to negate the result*
2748	if (ElementCount::isKnownGT(LHS: NegatedCount, RHS: RetCount)) {
2749	SmallVector<Value *, `5`> NewArgs(II->args());
2750	NewArgs [NegatedOpArg] = OpNotNeg;
2751	Instruction *NewMul =
2752	Builder.CreateIntrinsic(RetTy: II->getType(), ID: IID, Args: NewArgs, FMFSource: II);
2753	return replaceInstUsesWith(I&: *II, V: Builder.CreateFNegFMF(V: NewMul, FMFSource: II));
2754	}
2755	break;
2756	}
2757	case Intrinsic::fmuladd: {
2758	// Try to simplify the underlying FMul.
2759	if (Value *V =
2760	simplifyFMulInst(LHS: II->getArgOperand(i: `0`), RHS: II->getArgOperand(i: `1`),
2761	FMF: II->getFastMathFlags(), Q: SQ.getWithInstruction(I: II)))
2762	return BinaryOperator::CreateFAddFMF(V1: V, V2: II->getArgOperand(i: `2`),
2763	FMF: II->getFastMathFlags());
2764
2765	[[fallthrough]];
2766	}
2767	case Intrinsic::fma: {
2768	// fma fneg(x), fneg(y), z -> fma x, y, z
2769	Value *Src0 = II->getArgOperand(i: `0`);
2770	Value *Src1 = II->getArgOperand(i: `1`);
2771	Value *Src2 = II->getArgOperand(i: `2`);
2772	Value X, Y;
2773	if (match(V: Src0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Src1, P: m_FNeg(X: m_Value(V&: Y)))) {
2774	replaceOperand(I&: *II, OpNum: `0`, V: X);
2775	replaceOperand(I&: *II, OpNum: `1`, V: Y);
2776	return II;
2777	}
2778
2779	// fma fabs(x), fabs(x), z -> fma x, x, z
2780	if (match(V: Src0, P: m_FAbs(Op0: m_Value(V&: X))) &&
2781	match(V: Src1, P: m_FAbs(Op0: m_Specific(V: X)))) {
2782	replaceOperand(I&: *II, OpNum: `0`, V: X);
2783	replaceOperand(I&: *II, OpNum: `1`, V: X);
2784	return II;
2785	}
2786
2787	// Try to simplify the underlying FMul. We can only apply simplifications
2788	// that do not require rounding.
2789	if (Value *V = simplifyFMAFMul(LHS: Src0, RHS: Src1, FMF: II->getFastMathFlags(),
2790	Q: SQ.getWithInstruction(I: II)))
2791	return BinaryOperator::CreateFAddFMF(V1: V, V2: Src2, FMF: II->getFastMathFlags());
2792
2793	// fma x, y, 0 -> fmul x, y
2794	// This is always valid for -0.0, but requires nsz for +0.0 as
2795	// -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
2796	if (match(V: Src2, P: m_NegZeroFP()) \|\|
2797	(match(V: Src2, P: m_PosZeroFP()) && II->getFastMathFlags().noSignedZeros()))
2798	return BinaryOperator::CreateFMulFMF(V1: Src0, V2: Src1, FMFSource: II);
2799
2800	// fma x, -1.0, y -> fsub y, x
2801	if (match(V: Src1, P: m_SpecificFP(V: -`1.0`)))
2802	return BinaryOperator::CreateFSubFMF(V1: Src2, V2: Src0, FMFSource: II);
2803
2804	break;
2805	}
2806	case Intrinsic::copysign: {
2807	Value Mag = II->getArgOperand(i: `0`), Sign = II->getArgOperand(i: `1`);
2808	if (std::optional<bool> KnownSignBit = computeKnownFPSignBit(
2809	V: Sign, SQ: getSimplifyQuery().getWithInstruction(I: II))) {
2810	if (*KnownSignBit) {
2811	// If we know that the sign argument is negative, reduce to FNABS:
2812	// copysign Mag, -Sign --> fneg (fabs Mag)
2813	Value *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Mag, FMFSource: II);
2814	return replaceInstUsesWith(I&: *II, V: Builder.CreateFNegFMF(V: Fabs, FMFSource: II));
2815	}
2816
2817	// If we know that the sign argument is positive, reduce to FABS:
2818	// copysign Mag, +Sign --> fabs Mag
2819	Value *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Mag, FMFSource: II);
2820	return replaceInstUsesWith(I&: *II, V: Fabs);
2821	}
2822
2823	// Propagate sign argument through nested calls:
2824	// copysign Mag, (copysign ?, X) --> copysign Mag, X
2825	Value *X;
2826	if (match(V: Sign, P: m_Intrinsic<Intrinsic::copysign>(Op0: m_Value(), Op1: m_Value(V&: X)))) {
2827	Value *CopySign =
2828	Builder.CreateCopySign(LHS: Mag, RHS: X, FMFSource: FMFSource::intersect(A: II, B: Sign));
2829	return replaceInstUsesWith(I&: *II, V: CopySign);
2830	}
2831
2832	// Clear sign-bit of constant magnitude:
2833	// copysign -MagC, X --> copysign MagC, X
2834	// TODO: Support constant folding for fabs
2835	const APFloat *MagC;
2836	if (match(V: Mag, P: m_APFloat(Res&: MagC)) && MagC->isNegative()) {
2837	APFloat PosMagC = *MagC;
2838	PosMagC.clearSign();
2839	return replaceOperand(I&: *II, OpNum: `0`, V: ConstantFP::get(Ty: Mag->getType(), V: PosMagC));
2840	}
2841
2842	// Peek through changes of magnitude's sign-bit. This call rewrites those:
2843	// copysign (fabs X), Sign --> copysign X, Sign
2844	// copysign (fneg X), Sign --> copysign X, Sign
2845	if (match(V: Mag, P: m_FAbs(Op0: m_Value(V&: X))) \|\| match(V: Mag, P: m_FNeg(X: m_Value(V&: X))))
2846	return replaceOperand(I&: *II, OpNum: `0`, V: X);
2847
2848	break;
2849	}
2850	case Intrinsic::fabs: {
2851	Value Cond, TVal, *FVal;
2852	Value *Arg = II->getArgOperand(i: `0`);
2853	Value *X;
2854	// fabs (-X) --> fabs (X)
2855	if (match(V: Arg, P: m_FNeg(X: m_Value(V&: X)))) {
2856	CallInst *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: X, FMFSource: II);
2857	return replaceInstUsesWith(I&: CI, V: Fabs);
2858	}
2859
2860	if (match(V: Arg, P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: TVal), R: m_Value(V&: FVal)))) {
2861	// fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
2862	if (Arg->hasOneUse() ? (isa<Constant>(Val: TVal) \|\| isa<Constant>(Val: FVal))
2863	: (isa<Constant>(Val: TVal) && isa<Constant>(Val: FVal))) {
2864	CallInst *AbsT = Builder.CreateCall(Callee: II->getCalledFunction(), Args: {TVal});
2865	CallInst *AbsF = Builder.CreateCall(Callee: II->getCalledFunction(), Args: {FVal});
2866	SelectInst *SI = SelectInst::Create(C: Cond, S1: AbsT, S2: AbsF);
2867	FastMathFlags FMF1 = II->getFastMathFlags();
2868	FastMathFlags FMF2 = cast<SelectInst>(Val: Arg)->getFastMathFlags();
2869	FMF2.setNoSignedZeros(false);
2870	SI->setFastMathFlags(FMF1 \| FMF2);
2871	return SI;
2872	}
2873	// fabs (select Cond, -FVal, FVal) --> fabs FVal
2874	if (match(V: TVal, P: m_FNeg(X: m_Specific(V: FVal))))
2875	return replaceOperand(I&: *II, OpNum: `0`, V: FVal);
2876	// fabs (select Cond, TVal, -TVal) --> fabs TVal
2877	if (match(V: FVal, P: m_FNeg(X: m_Specific(V: TVal))))
2878	return replaceOperand(I&: *II, OpNum: `0`, V: TVal);
2879	}
2880
2881	Value Magnitude, Sign;
2882	if (match(V: II->getArgOperand(i: `0`),
2883	P: m_CopySign(Op0: m_Value(V&: Magnitude), Op1: m_Value(V&: Sign)))) {
2884	// fabs (copysign x, y) -> (fabs x)
2885	CallInst *AbsSign =
2886	Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Magnitude, FMFSource: II);
2887	return replaceInstUsesWith(I&: *II, V: AbsSign);
2888	}
2889
2890	[[fallthrough]];
2891	}
2892	case Intrinsic::ceil:
2893	case Intrinsic::floor:
2894	case Intrinsic::round:
2895	case Intrinsic::roundeven:
2896	case Intrinsic::nearbyint:
2897	case Intrinsic::rint:
2898	case Intrinsic::trunc: {
2899	Value *ExtSrc;
2900	if (match(V: II->getArgOperand(i: `0`), P: m_OneUse(SubPattern: m_FPExt(Op: m_Value(V&: ExtSrc))))) {
2901	// Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
2902	Value *NarrowII = Builder.CreateUnaryIntrinsic(ID: IID, V: ExtSrc, FMFSource: II);
2903	return new FPExtInst (NarrowII, II->getType());
2904	}
2905	break;
2906	}
2907	case Intrinsic::cos:
2908	case Intrinsic::amdgcn_cos: {
2909	Value X, Sign;
2910	Value *Src = II->getArgOperand(i: `0`);
2911	if (match(V: Src, P: m_FNeg(X: m_Value(V&: X))) \|\| match(V: Src, P: m_FAbs(Op0: m_Value(V&: X))) \|\|
2912	match(V: Src, P: m_CopySign(Op0: m_Value(V&: X), Op1: m_Value(V&: Sign)))) {
2913	// cos(-x) --> cos(x)
2914	// cos(fabs(x)) --> cos(x)
2915	// cos(copysign(x, y)) --> cos(x)
2916	return replaceOperand(I&: *II, OpNum: `0`, V: X);
2917	}
2918	break;
2919	}
2920	case Intrinsic::sin:
2921	case Intrinsic::amdgcn_sin: {
2922	Value *X;
2923	if (match(V: II->getArgOperand(i: `0`), P: m_OneUse(SubPattern: m_FNeg(X: m_Value(V&: X))))) {
2924	// sin(-x) --> -sin(x)
2925	Value *NewSin = Builder.CreateUnaryIntrinsic(ID: IID, V: X, FMFSource: II);
2926	return UnaryOperator::CreateFNegFMF(Op: NewSin, FMFSource: II);
2927	}
2928	break;
2929	}
2930	case Intrinsic::ldexp: {
2931	// ldexp(ldexp(x, a), b) -> ldexp(x, a + b)
2932	//
2933	// The danger is if the first ldexp would overflow to infinity or underflow
2934	// to zero, but the combined exponent avoids it. We ignore this with
2935	// reassoc.
2936	//
2937	// It's also safe to fold if we know both exponents are >= 0 or <= 0 since
2938	// it would just double down on the overflow/underflow which would occur
2939	// anyway.
2940	//
2941	// TODO: Could do better if we had range tracking for the input value
2942	// exponent. Also could broaden sign check to cover == 0 case.
2943	Value *Src = II->getArgOperand(i: `0`);
2944	Value *Exp = II->getArgOperand(i: `1`);
2945	Value *InnerSrc;
2946	Value *InnerExp;
2947	if (match(V: Src, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ldexp>(
2948	Op0: m_Value(V&: InnerSrc), Op1: m_Value(V&: InnerExp)))) &&
2949	Exp->getType() == InnerExp->getType()) {
2950	FastMathFlags FMF = II->getFastMathFlags();
2951	FastMathFlags InnerFlags = cast<FPMathOperator>(Val: Src)->getFastMathFlags();
2952
2953	if ((FMF.allowReassoc() && InnerFlags.allowReassoc()) \|\|
2954	signBitMustBeTheSame(Op0: Exp, Op1: InnerExp, SQ: SQ.getWithInstruction(I: II))) {
2955	// TODO: Add nsw/nuw probably safe if integer type exceeds exponent
2956	// width.
2957	Value *NewExp = Builder.CreateAdd(LHS: InnerExp, RHS: Exp);
2958	II->setArgOperand(i: `1`, v: NewExp);
2959	II->setFastMathFlags(InnerFlags); // Or the inner flags.
2960	return replaceOperand(I&: *II, OpNum: `0`, V: InnerSrc);
2961	}
2962	}
2963
2964	// ldexp(x, zext(i1 y)) -> fmul x, (select y, 2.0, 1.0)
2965	// ldexp(x, sext(i1 y)) -> fmul x, (select y, 0.5, 1.0)
2966	Value *ExtSrc;
2967	if (match(V: Exp, P: m_ZExt(Op: m_Value(V&: ExtSrc))) &&
2968	ExtSrc->getType()->getScalarSizeInBits() == `1`) {
2969	Value *Select =
2970	Builder.CreateSelect(C: ExtSrc, True: ConstantFP::get(Ty: II->getType(), V: `2.0`),
2971	False: ConstantFP::get(Ty: II->getType(), V: `1.0`));
2972	return BinaryOperator::CreateFMulFMF(V1: Src, V2: Select, FMFSource: II);
2973	}
2974	if (match(V: Exp, P: m_SExt(Op: m_Value(V&: ExtSrc))) &&
2975	ExtSrc->getType()->getScalarSizeInBits() == `1`) {
2976	Value *Select =
2977	Builder.CreateSelect(C: ExtSrc, True: ConstantFP::get(Ty: II->getType(), V: `0.5`),
2978	False: ConstantFP::get(Ty: II->getType(), V: `1.0`));
2979	return BinaryOperator::CreateFMulFMF(V1: Src, V2: Select, FMFSource: II);
2980	}
2981
2982	// ldexp(x, c ? exp : 0) -> c ? ldexp(x, exp) : x
2983	// ldexp(x, c ? 0 : exp) -> c ? x : ldexp(x, exp)
2984	///
2985	// TODO: If we cared, should insert a canonicalize for x
2986	Value SelectCond, SelectLHS, *SelectRHS;
2987	if (match(V: II->getArgOperand(i: `1`),
2988	P: m_OneUse(SubPattern: m_Select(C: m_Value(V&: SelectCond), L: m_Value(V&: SelectLHS),
2989	R: m_Value(V&: SelectRHS))))) {
2990	Value NewLdexp = nullptr*;
2991	Value Select = nullptr*;
2992	if (match(V: SelectRHS, P: m_ZeroInt())) {
2993	NewLdexp = Builder.CreateLdexp(Src, Exp: SelectLHS, FMFSource: II);
2994	Select = Builder.CreateSelect(C: SelectCond, True: NewLdexp, False: Src);
2995	} else if (match(V: SelectLHS, P: m_ZeroInt())) {
2996	NewLdexp = Builder.CreateLdexp(Src, Exp: SelectRHS, FMFSource: II);
2997	Select = Builder.CreateSelect(C: SelectCond, True: Src, False: NewLdexp);
2998	}
2999
3000	if (NewLdexp) {
3001	Select->takeName(V: II);
3002	return replaceInstUsesWith(I&: *II, V: Select);
3003	}
3004	}
3005
3006	break;
3007	}
3008	case Intrinsic::ptrauth_auth:
3009	case Intrinsic::ptrauth_resign: {
3010	// (sign\|resign) + (auth\|resign) can be folded by omitting the middle
3011	// sign+auth component if the key and discriminator match.
3012	bool NeedSign = II->getIntrinsicID() == Intrinsic::ptrauth_resign;
3013	Value *Ptr = II->getArgOperand(i: `0`);
3014	Value *Key = II->getArgOperand(i: `1`);
3015	Value *Disc = II->getArgOperand(i: `2`);
3016
3017	// AuthKey will be the key we need to end up authenticating against in
3018	// whatever we replace this sequence with.
3019	Value AuthKey = nullptr, AuthDisc = nullptr, *BasePtr;
3020	if (const auto *CI = dyn_cast<CallBase>(Val: Ptr)) {
3021	BasePtr = CI->getArgOperand(i: `0`);
3022	if (CI->getIntrinsicID() == Intrinsic::ptrauth_sign) {
3023	if (CI->getArgOperand(i: `1`) != Key \|\| CI->getArgOperand(i: `2`) != Disc)
3024	break;
3025	} else if (CI->getIntrinsicID() == Intrinsic::ptrauth_resign) {
3026	if (CI->getArgOperand(i: `3`) != Key \|\| CI->getArgOperand(i: `4`) != Disc)
3027	break;
3028	AuthKey = CI->getArgOperand(i: `1`);
3029	AuthDisc = CI->getArgOperand(i: `2`);
3030	} else
3031	break;
3032	} else if (const auto *PtrToInt = dyn_cast<PtrToIntOperator>(Val: Ptr)) {
3033	// ptrauth constants are equivalent to a call to @llvm.ptrauth.sign for
3034	// our purposes, so check for that too.
3035	const auto *CPA = dyn_cast<ConstantPtrAuth>(Val: PtrToInt->getOperand(i_nocapture: `0`));
3036	if (!CPA \|\| !CPA->isKnownCompatibleWith(Key, Discriminator: Disc, DL))
3037	break;
3038
3039	// resign(ptrauth(p,ks,ds),ks,ds,kr,dr) -> ptrauth(p,kr,dr)
3040	if (NeedSign && isa<ConstantInt>(Val: II->getArgOperand(i: `4`))) {
3041	auto *SignKey = cast<ConstantInt>(Val: II->getArgOperand(i: `3`));
3042	auto *SignDisc = cast<ConstantInt>(Val: II->getArgOperand(i: `4`));
3043	auto *SignAddrDisc = ConstantPointerNull::get(T: Builder.getPtrTy());
3044	auto *NewCPA = ConstantPtrAuth::get(Ptr: CPA->getPointer(), Key: SignKey,
3045	Disc: SignDisc, AddrDisc: SignAddrDisc);
3046	replaceInstUsesWith(
3047	I&: *II, V: ConstantExpr::getPointerCast(C: NewCPA, Ty: II->getType()));
3048	return eraseInstFromFunction(I&: *II);
3049	}
3050
3051	// auth(ptrauth(p,k,d),k,d) -> p
3052	BasePtr = Builder.CreatePtrToInt(V: CPA->getPointer(), DestTy: II->getType());
3053	} else
3054	break;
3055
3056	unsigned NewIntrin;
3057	if (AuthKey && NeedSign) {
3058	// resign(0,1) + resign(1,2) = resign(0, 2)
3059	NewIntrin = Intrinsic::ptrauth_resign;
3060	} else if (AuthKey) {
3061	// resign(0,1) + auth(1) = auth(0)
3062	NewIntrin = Intrinsic::ptrauth_auth;
3063	} else if (NeedSign) {
3064	// sign(0) + resign(0, 1) = sign(1)
3065	NewIntrin = Intrinsic::ptrauth_sign;
3066	} else {
3067	// sign(0) + auth(0) = nop
3068	replaceInstUsesWith(I&: *II, V: BasePtr);
3069	return eraseInstFromFunction(I&: *II);
3070	}
3071
3072	SmallVector<Value *, `4`> CallArgs;
3073	CallArgs.push_back(Elt: BasePtr);
3074	if (AuthKey) {
3075	CallArgs.push_back(Elt: AuthKey);
3076	CallArgs.push_back(Elt: AuthDisc);
3077	}
3078
3079	if (NeedSign) {
3080	CallArgs.push_back(Elt: II->getArgOperand(i: `3`));
3081	CallArgs.push_back(Elt: II->getArgOperand(i: `4`));
3082	}
3083
3084	Function *NewFn =
3085	Intrinsic::getOrInsertDeclaration(M: II->getModule(), id: NewIntrin);
3086	return CallInst::Create(Func: NewFn, Args: CallArgs);
3087	}
3088	case Intrinsic::arm_neon_vtbl1:
3089	case Intrinsic::aarch64_neon_tbl1:
3090	if (Value V = simplifyNeonTbl1(II: II, Builder))
3091	return replaceInstUsesWith(I&: *II, V);
3092	break;
3093
3094	case Intrinsic::arm_neon_vmulls:
3095	case Intrinsic::arm_neon_vmullu:
3096	case Intrinsic::aarch64_neon_smull:
3097	case Intrinsic::aarch64_neon_umull: {
3098	Value *Arg0 = II->getArgOperand(i: `0`);
3099	Value *Arg1 = II->getArgOperand(i: `1`);
3100
3101	// Handle mul by zero first:
3102	if (isa<ConstantAggregateZero>(Val: Arg0) \|\| isa<ConstantAggregateZero>(Val: Arg1)) {
3103	return replaceInstUsesWith(I&: CI, V: ConstantAggregateZero::get(Ty: II->getType()));
3104	}
3105
3106	// Check for constant LHS & RHS - in this case we just simplify.
3107	bool Zext = (IID == Intrinsic::arm_neon_vmullu \|\|
3108	IID == Intrinsic::aarch64_neon_umull);
3109	VectorType *NewVT = cast<VectorType>(Val: II->getType());
3110	if (Constant *CV0 = dyn_cast<Constant>(Val: Arg0)) {
3111	if (Constant *CV1 = dyn_cast<Constant>(Val: Arg1)) {
3112	Value V0 = Builder.CreateIntCast(V: CV0, DestTy: NewVT, /isSigned=/*!Zext);
3113	Value V1 = Builder.CreateIntCast(V: CV1, DestTy: NewVT, /isSigned=/*!Zext);
3114	return replaceInstUsesWith(I&: CI, V: Builder.CreateMul(LHS: V0, RHS: V1));
3115	}
3116
3117	// Couldn't simplify - canonicalize constant to the RHS.
3118	std::swap(a&: Arg0, b&: Arg1);
3119	}
3120
3121	// Handle mul by one:
3122	if (Constant *CV1 = dyn_cast<Constant>(Val: Arg1))
3123	if (ConstantInt *Splat =
3124	dyn_cast_or_null<ConstantInt>(Val: CV1->getSplatValue()))
3125	if (Splat->isOne())
3126	return CastInst::CreateIntegerCast(S: Arg0, Ty: II->getType(),
3127	/isSigned=/!Zext);
3128
3129	break;
3130	}
3131	case Intrinsic::arm_neon_aesd:
3132	case Intrinsic::arm_neon_aese:
3133	case Intrinsic::aarch64_crypto_aesd:
3134	case Intrinsic::aarch64_crypto_aese:
3135	case Intrinsic::aarch64_sve_aesd:
3136	case Intrinsic::aarch64_sve_aese: {
3137	Value *DataArg = II->getArgOperand(i: `0`);
3138	Value *KeyArg = II->getArgOperand(i: `1`);
3139
3140	// Accept zero on either operand.
3141	if (!match(V: KeyArg, P: m_ZeroInt()))
3142	std::swap(a&: KeyArg, b&: DataArg);
3143
3144	// Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
3145	Value Data, Key;
3146	if (match(V: KeyArg, P: m_ZeroInt()) &&
3147	match(V: DataArg, P: m_Xor(L: m_Value(V&: Data), R: m_Value(V&: Key)))) {
3148	replaceOperand(I&: *II, OpNum: `0`, V: Data);
3149	replaceOperand(I&: *II, OpNum: `1`, V: Key);
3150	return II;
3151	}
3152	break;
3153	}
3154	case Intrinsic::hexagon_V6_vandvrt:
3155	case Intrinsic::hexagon_V6_vandvrt_128B: {
3156	// Simplify Q -> V -> Q conversion.
3157	if (auto Op0 = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: `0`))) {
3158	Intrinsic::ID ID0 = Op0->getIntrinsicID();
3159	if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
3160	ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
3161	break;
3162	Value Bytes = Op0->getArgOperand(i: `1`), Mask = II->getArgOperand(i: `1`);
3163	uint64_t Bytes1 = computeKnownBits(V: Bytes, CxtI: Op0).One.getZExtValue();
3164	uint64_t Mask1 = computeKnownBits(V: Mask, CxtI: II).One.getZExtValue();
3165	// Check if every byte has common bits in Bytes and Mask.
3166	uint64_t C = Bytes1 & Mask1;
3167	if ((C & `0xFF`) && (C & `0xFF00`) && (C & `0xFF0000`) && (C & `0xFF000000`))
3168	return replaceInstUsesWith(I&: *II, V: Op0->getArgOperand(i: `0`));
3169	}
3170	break;
3171	}
3172	case Intrinsic::stackrestore: {
3173	enum class ClassifyResult {
3174	None,
3175	Alloca,
3176	StackRestore,
3177	CallWithSideEffects,
3178	};
3179	auto Classify = [](const Instruction *I) {
3180	if (isa<AllocaInst>(Val: I))
3181	return ClassifyResult::Alloca;
3182
3183	if (auto *CI = dyn_cast<CallInst>(Val: I)) {
3184	if (auto *II = dyn_cast<IntrinsicInst>(Val: CI)) {
3185	if (II->getIntrinsicID() == Intrinsic::stackrestore)
3186	return ClassifyResult::StackRestore;
3187
3188	if (II->mayHaveSideEffects())
3189	return ClassifyResult::CallWithSideEffects;
3190	} else {
3191	// Consider all non-intrinsic calls to be side effects
3192	return ClassifyResult::CallWithSideEffects;
3193	}
3194	}
3195
3196	return ClassifyResult::None;
3197	};
3198
3199	// If the stacksave and the stackrestore are in the same BB, and there is
3200	// no intervening call, alloca, or stackrestore of a different stacksave,
3201	// remove the restore. This can happen when variable allocas are DCE'd.
3202	if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: `0`))) {
3203	if (SS->getIntrinsicID() == Intrinsic::stacksave &&
3204	SS->getParent() == II->getParent()) {
3205	BasicBlock::iterator BI(SS);
3206	bool CannotRemove = false;
3207	for (++BI; &*BI != II; ++BI) {
3208	switch (Classify (&*BI)) {
3209	case ClassifyResult::None:
3210	// So far so good, look at next instructions.
3211	break;
3212
3213	case ClassifyResult::StackRestore:
3214	// If we found an intervening stackrestore for a different
3215	// stacksave, we can't remove the stackrestore. Otherwise, continue.
3216	if (cast<IntrinsicInst>(Val&: *BI).getArgOperand(i: `0`) != SS)
3217	CannotRemove = true;
3218	break;
3219
3220	case ClassifyResult::Alloca:
3221	case ClassifyResult::CallWithSideEffects:
3222	// If we found an alloca, a non-intrinsic call, or an intrinsic
3223	// call with side effects, we can't remove the stackrestore.
3224	CannotRemove = true;
3225	break;
3226	}
3227	if (CannotRemove)
3228	break;
3229	}
3230
3231	if (!CannotRemove)
3232	return eraseInstFromFunction(I&: CI);
3233	}
3234	}
3235
3236	// Scan down this block to see if there is another stack restore in the
3237	// same block without an intervening call/alloca.
3238	BasicBlock::iterator BI(II);
3239	Instruction *TI = II->getParent()->getTerminator();
3240	bool CannotRemove = false;
3241	for (++BI; &*BI != TI; ++BI) {
3242	switch (Classify (&*BI)) {
3243	case ClassifyResult::None:
3244	// So far so good, look at next instructions.
3245	break;
3246
3247	case ClassifyResult::StackRestore:
3248	// If there is a stackrestore below this one, remove this one.
3249	return eraseInstFromFunction(I&: CI);
3250
3251	case ClassifyResult::Alloca:
3252	case ClassifyResult::CallWithSideEffects:
3253	// If we found an alloca, a non-intrinsic call, or an intrinsic call
3254	// with side effects (such as llvm.stacksave and llvm.read_register),
3255	// we can't remove the stack restore.
3256	CannotRemove = true;
3257	break;
3258	}
3259	if (CannotRemove)
3260	break;
3261	}
3262
3263	// If the stack restore is in a return, resume, or unwind block and if there
3264	// are no allocas or calls between the restore and the return, nuke the
3265	// restore.
3266	if (!CannotRemove && (isa<ReturnInst>(Val: TI) \|\| isa<ResumeInst>(Val: TI)))
3267	return eraseInstFromFunction(I&: CI);
3268	break;
3269	}
3270	case Intrinsic::lifetime_end:
3271	// Asan needs to poison memory to detect invalid access which is possible
3272	// even for empty lifetime range.
3273	if (II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeAddress) \|\|
3274	II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeMemory) \|\|
3275	II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeHWAddress))
3276	break;
3277
3278	if (removeTriviallyEmptyRange(EndI&: II, IC&: this, IsStart: [](const IntrinsicInst &I) {
3279	return I.getIntrinsicID() == Intrinsic::lifetime_start;
3280	}))
3281	return nullptr;
3282	break;
3283	case Intrinsic::assume: {
3284	Value *IIOperand = II->getArgOperand(i: `0`);
3285	SmallVector<OperandBundleDef, `4`> OpBundles;
3286	II->getOperandBundlesAsDefs(Defs&: OpBundles);
3287
3288	/// This will remove the boolean Condition from the assume given as
3289	/// argument and remove the assume if it becomes useless.
3290	/// always returns nullptr for use as a return values.
3291	auto RemoveConditionFromAssume = [&](Instruction Assume) -> Instruction {
3292	assert(isa<AssumeInst>(Assume));
3293	if (isAssumeWithEmptyBundle(Assume: *cast<AssumeInst>(Val: II)))
3294	return eraseInstFromFunction(I&: CI);
3295	replaceUse(U&: II->getOperandUse(i: `0`), NewValue: ConstantInt::getTrue(Context&: II->getContext()));
3296	return nullptr;
3297	};
3298	// Remove an assume if it is followed by an identical assume.
3299	// TODO: Do we need this? Unless there are conflicting assumptions, the
3300	// computeKnownBits(IIOperand) below here eliminates redundant assumes.
3301	Instruction *Next = II->getNextNonDebugInstruction();
3302	if (match(V: Next, P: m_Intrinsic<Intrinsic::assume>(Op0: m_Specific(V: IIOperand))))
3303	return RemoveConditionFromAssume (Next);
3304
3305	// Canonicalize assume(a && b) -> assume(a); assume(b);
3306	// Note: New assumption intrinsics created here are registered by
3307	// the InstCombineIRInserter object.
3308	FunctionType *AssumeIntrinsicTy = II->getFunctionType();
3309	Value *AssumeIntrinsic = II->getCalledOperand();
3310	Value A, B;
3311	if (match(V: IIOperand, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3312	Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic, Args: A, OpBundles,
3313	Name: II->getName());
3314	Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic, Args: B, Name: II->getName());
3315	return eraseInstFromFunction(I&: *II);
3316	}
3317	// assume(!(a \|\| b)) -> assume(!a); assume(!b);
3318	if (match(V: IIOperand, P: m_Not(V: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B))))) {
3319	Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic,
3320	Args: Builder.CreateNot(V: A), OpBundles, Name: II->getName());
3321	Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic,
3322	Args: Builder.CreateNot(V: B), Name: II->getName());
3323	return eraseInstFromFunction(I&: *II);
3324	}
3325
3326	// assume( (load addr) != null ) -> add 'nonnull' metadata to load
3327	// (if assume is valid at the load)
3328	Instruction *LHS;
3329	if (match(V: IIOperand, P: m_SpecificICmp(MatchPred: ICmpInst::ICMP_NE, L: m_Instruction(I&: LHS),
3330	R: m_Zero())) &&
3331	LHS->getOpcode() == Instruction::Load &&
3332	LHS->getType()->isPointerTy() &&
3333	isValidAssumeForContext(I: II, CxtI: LHS, DT: &DT)) {
3334	MDNode *MD = MDNode::get(Context&: II->getContext(), MDs: {});
3335	LHS->setMetadata(KindID: LLVMContext::MD_nonnull, Node: MD);
3336	LHS->setMetadata(KindID: LLVMContext::MD_noundef, Node: MD);
3337	return RemoveConditionFromAssume (II);
3338
3339	// TODO: apply nonnull return attributes to calls and invokes
3340	// TODO: apply range metadata for range check patterns?
3341	}
3342
3343	// Separate storage assumptions apply to the underlying allocations, not any
3344	// particular pointer within them. When evaluating the hints for AA purposes
3345	// we getUnderlyingObject them; by precomputing the answers here we can
3346	// avoid having to do so repeatedly there.
3347	for (unsigned Idx = `0`; Idx < II->getNumOperandBundles(); Idx++) {
3348	OperandBundleUse OBU = II->getOperandBundleAt(Index: Idx);
3349	if (OBU.getTagName() == "separate_storage") {
3350	assert(OBU.Inputs.size() == `2`);
3351	auto MaybeSimplifyHint = [&](const Use &U) {
3352	Value *Hint = U.get();
3353	// Not having a limit is safe because InstCombine removes unreachable
3354	// code.
3355	Value UnderlyingObject = getUnderlyingObject(V: Hint, /MaxLookup/* `0`);
3356	if (Hint != UnderlyingObject)
3357	replaceUse(U&: const_cast<Use &>(U), NewValue: UnderlyingObject);
3358	};
3359	MaybeSimplifyHint (OBU.Inputs [`0`]);
3360	MaybeSimplifyHint (OBU.Inputs [`1`]);
3361	}
3362	}
3363
3364	// Convert nonnull assume like:
3365	// %A = icmp ne i32 %PTR, null*
3366	// call void @llvm.assume(i1 %A)
3367	// into
3368	// call void @llvm.assume(i1 true) [ "nonnull"(i32 %PTR) ]*
3369	if (EnableKnowledgeRetention &&
3370	match(V: IIOperand,
3371	P: m_SpecificICmp(MatchPred: ICmpInst::ICMP_NE, L: m_Value(V&: A), R: m_Zero())) &&
3372	A->getType()->isPointerTy()) {
3373	if (auto *Replacement = buildAssumeFromKnowledge(
3374	Knowledge: {RetainedKnowledge{.AttrKind: Attribute::NonNull, .ArgValue: `0`, .WasOn: A}}, CtxI: Next, AC: &AC, DT: &DT)) {
3375
3376	Replacement->insertBefore(InsertPos: Next->getIterator());
3377	AC.registerAssumption(CI: Replacement);
3378	return RemoveConditionFromAssume (II);
3379	}
3380	}
3381
3382	// Convert alignment assume like:
3383	// %B = ptrtoint i32 %A to i64*
3384	// %C = and i64 %B, Constant
3385	// %D = icmp eq i64 %C, 0
3386	// call void @llvm.assume(i1 %D)
3387	// into
3388	// call void @llvm.assume(i1 true) [ "align"(i32 [[A]], i64 Constant + 1)]*
3389	uint64_t AlignMask = `1`;
3390	if (EnableKnowledgeRetention &&
3391	(match(V: IIOperand, P: m_Not(V: m_Trunc(Op: m_Value(V&: A)))) \|\|
3392	match(V: IIOperand,
3393	P: m_SpecificICmp(MatchPred: ICmpInst::ICMP_EQ,
3394	L: m_And(L: m_Value(V&: A), R: m_ConstantInt(V&: AlignMask)),
3395	R: m_Zero())))) {
3396	if (isPowerOf2_64(Value: AlignMask + `1`)) {
3397	uint64_t Offset = `0`;
3398	match(V: A, P: m_Add(L: m_Value(V&: A), R: m_ConstantInt(V&: Offset)));
3399	if (match(V: A, P: m_PtrToInt(Op: m_Value(V&: A)))) {
3400	/// Note: this doesn't preserve the offset information but merges
3401	/// offset and alignment.
3402	/// TODO: we can generate a GEP instead of merging the alignment with
3403	/// the offset.
3404	RetainedKnowledge RK{.AttrKind: Attribute::Alignment,
3405	.ArgValue: (unsigned)MinAlign(A: Offset, B: AlignMask + `1`), .WasOn: A};
3406	if (auto *Replacement =
3407	buildAssumeFromKnowledge(Knowledge: RK, CtxI: Next, AC: &AC, DT: &DT)) {
3408
3409	Replacement->insertAfter(InsertPos: II->getIterator());
3410	AC.registerAssumption(CI: Replacement);
3411	}
3412	return RemoveConditionFromAssume (II);
3413	}
3414	}
3415	}
3416
3417	/// Canonicalize Knowledge in operand bundles.
3418	if (EnableKnowledgeRetention && II->hasOperandBundles()) {
3419	for (unsigned Idx = `0`; Idx < II->getNumOperandBundles(); Idx++) {
3420	auto &BOI = II->bundle_op_info_begin()[Idx];
3421	RetainedKnowledge RK =
3422	llvm::getKnowledgeFromBundle(Assume&: cast<AssumeInst>(Val&: *II), BOI);
3423	if (BOI.End - BOI.Begin > `2`)
3424	continue; // Prevent reducing knowledge in an align with offset since
3425	// extracting a RetainedKnowledge from them looses offset
3426	// information
3427	RetainedKnowledge CanonRK =
3428	llvm::simplifyRetainedKnowledge(Assume: cast<AssumeInst>(Val: II), RK,
3429	AC: &getAssumptionCache(),
3430	DT: &getDominatorTree());
3431	if (CanonRK == RK)
3432	continue;
3433	if (!CanonRK) {
3434	if (BOI.End - BOI.Begin > `0`) {
3435	Worklist.pushValue(V: II->op_begin()[BOI.Begin]);
3436	Value::dropDroppableUse(U&: II->op_begin()[BOI.Begin]);
3437	}
3438	continue;
3439	}
3440	assert(RK.AttrKind == CanonRK.AttrKind);
3441	if (BOI.End - BOI.Begin > `0`)
3442	II->op_begin()[BOI.Begin].set(CanonRK.WasOn);
3443	if (BOI.End - BOI.Begin > `1`)
3444	II->op_begin()[BOI.Begin + `1`].set(ConstantInt::get(
3445	Ty: Type::getInt64Ty(C&: II->getContext()), V: CanonRK.ArgValue));
3446	if (RK.WasOn)
3447	Worklist.pushValue(V: RK.WasOn);
3448	return II;
3449	}
3450	}
3451
3452	// If there is a dominating assume with the same condition as this one,
3453	// then this one is redundant, and should be removed.
3454	KnownBits Known(`1`);
3455	computeKnownBits(V: IIOperand, Known, CxtI: II);
3456	if (Known.isAllOnes() && isAssumeWithEmptyBundle(Assume: cast<AssumeInst>(Val&: *II)))
3457	return eraseInstFromFunction(I&: *II);
3458
3459	// assume(false) is unreachable.
3460	if (match(V: IIOperand, P: m_CombineOr(L: m_Zero(), R: m_Undef()))) {
3461	CreateNonTerminatorUnreachable(InsertAt: II);
3462	return eraseInstFromFunction(I&: *II);
3463	}
3464
3465	// Update the cache of affected values for this assumption (we might be
3466	// here because we just simplified the condition).
3467	AC.updateAffectedValues(CI: cast<AssumeInst>(Val: II));
3468	break;
3469	}
3470	case Intrinsic::experimental_guard: {
3471	// Is this guard followed by another guard? We scan forward over a small
3472	// fixed window of instructions to handle common cases with conditions
3473	// computed between guards.
3474	Instruction *NextInst = II->getNextNonDebugInstruction();
3475	for (unsigned i = `0`; i < GuardWideningWindow; i++) {
3476	// Note: Using context-free form to avoid compile time blow up
3477	if (!isSafeToSpeculativelyExecute(I: NextInst))
3478	break;
3479	NextInst = NextInst->getNextNonDebugInstruction();
3480	}
3481	Value NextCond = nullptr*;
3482	if (match(V: NextInst,
3483	P: m_Intrinsic<Intrinsic::experimental_guard>(Op0: m_Value(V&: NextCond)))) {
3484	Value *CurrCond = II->getArgOperand(i: `0`);
3485
3486	// Remove a guard that it is immediately preceded by an identical guard.
3487	// Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3488	if (CurrCond != NextCond) {
3489	Instruction *MoveI = II->getNextNonDebugInstruction();
3490	while (MoveI != NextInst) {
3491	auto *Temp = MoveI;
3492	MoveI = MoveI->getNextNonDebugInstruction();
3493	Temp->moveBefore(InsertPos: II->getIterator());
3494	}
3495	replaceOperand(I&: *II, OpNum: `0`, V: Builder.CreateAnd(LHS: CurrCond, RHS: NextCond));
3496	}
3497	eraseInstFromFunction(I&: *NextInst);
3498	return II;
3499	}
3500	break;
3501	}
3502	case Intrinsic::vector_insert: {
3503	Value *Vec = II->getArgOperand(i: `0`);
3504	Value *SubVec = II->getArgOperand(i: `1`);
3505	Value *Idx = II->getArgOperand(i: `2`);
3506	auto *DstTy = dyn_cast<FixedVectorType>(Val: II->getType());
3507	auto *VecTy = dyn_cast<FixedVectorType>(Val: Vec->getType());
3508	auto *SubVecTy = dyn_cast<FixedVectorType>(Val: SubVec->getType());
3509
3510	// Only canonicalize if the destination vector, Vec, and SubVec are all
3511	// fixed vectors.
3512	if (DstTy && VecTy && SubVecTy) {
3513	unsigned DstNumElts = DstTy->getNumElements();
3514	unsigned VecNumElts = VecTy->getNumElements();
3515	unsigned SubVecNumElts = SubVecTy->getNumElements();
3516	unsigned IdxN = cast<ConstantInt>(Val: Idx)->getZExtValue();
3517
3518	// An insert that entirely overwrites Vec with SubVec is a nop.
3519	if (VecNumElts == SubVecNumElts)
3520	return replaceInstUsesWith(I&: CI, V: SubVec);
3521
3522	// Widen SubVec into a vector of the same width as Vec, since
3523	// shufflevector requires the two input vectors to be the same width.
3524	// Elements beyond the bounds of SubVec within the widened vector are
3525	// undefined.
3526	SmallVector<int, `8`> WidenMask;
3527	unsigned i;
3528	for (i = `0`; i != SubVecNumElts; ++i)
3529	WidenMask.push_back(Elt: i);
3530	for (; i != VecNumElts; ++i)
3531	WidenMask.push_back(Elt: PoisonMaskElem);
3532
3533	Value *WidenShuffle = Builder.CreateShuffleVector(V: SubVec, Mask: WidenMask);
3534
3535	SmallVector<int, `8`> Mask;
3536	for (unsigned i = `0`; i != IdxN; ++i)
3537	Mask.push_back(Elt: i);
3538	for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
3539	Mask.push_back(Elt: i);
3540	for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
3541	Mask.push_back(Elt: i);
3542
3543	Value *Shuffle = Builder.CreateShuffleVector(V1: Vec, V2: WidenShuffle, Mask);
3544	return replaceInstUsesWith(I&: CI, V: Shuffle);
3545	}
3546	break;
3547	}
3548	case Intrinsic::vector_extract: {
3549	Value *Vec = II->getArgOperand(i: `0`);
3550	Value *Idx = II->getArgOperand(i: `1`);
3551
3552	Type *ReturnType = II->getType();
3553	// (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
3554	// ExtractIdx)
3555	unsigned ExtractIdx = cast<ConstantInt>(Val: Idx)->getZExtValue();
3556	Value InsertTuple, InsertIdx, *InsertValue;
3557	if (match(V: Vec, P: m_Intrinsic<Intrinsic::vector_insert>(Op0: m_Value(V&: InsertTuple),
3558	Op1: m_Value(V&: InsertValue),
3559	Op2: m_Value(V&: InsertIdx))) &&
3560	InsertValue->getType() == ReturnType) {
3561	unsigned Index = cast<ConstantInt>(Val: InsertIdx)->getZExtValue();
3562	// Case where we get the same index right after setting it.
3563	// extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
3564	// InsertValue
3565	if (ExtractIdx == Index)
3566	return replaceInstUsesWith(I&: CI, V: InsertValue);
3567	// If we are getting a different index than what was set in the
3568	// insert.vector intrinsic. We can just set the input tuple to the one up
3569	// in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
3570	// InsertIndex), ExtractIndex)
3571	// --> extract.vector(InsertTuple, ExtractIndex)
3572	else
3573	return replaceOperand(I&: CI, OpNum: `0`, V: InsertTuple);
3574	}
3575
3576	auto *DstTy = dyn_cast<VectorType>(Val: ReturnType);
3577	auto *VecTy = dyn_cast<VectorType>(Val: Vec->getType());
3578
3579	if (DstTy && VecTy) {
3580	auto DstEltCnt = DstTy->getElementCount();
3581	auto VecEltCnt = VecTy->getElementCount();
3582	unsigned IdxN = cast<ConstantInt>(Val: Idx)->getZExtValue();
3583
3584	// Extracting the entirety of Vec is a nop.
3585	if (DstEltCnt == VecTy->getElementCount()) {
3586	replaceInstUsesWith(I&: CI, V: Vec);
3587	return eraseInstFromFunction(I&: CI);
3588	}
3589
3590	// Only canonicalize to shufflevector if the destination vector and
3591	// Vec are fixed vectors.
3592	if (VecEltCnt.isScalable() \|\| DstEltCnt.isScalable())
3593	break;
3594
3595	SmallVector<int, `8`> Mask;
3596	for (unsigned i = `0`; i != DstEltCnt.getKnownMinValue(); ++i)
3597	Mask.push_back(Elt: IdxN + i);
3598
3599	Value *Shuffle = Builder.CreateShuffleVector(V: Vec, Mask);
3600	return replaceInstUsesWith(I&: CI, V: Shuffle);
3601	}
3602	break;
3603	}
3604	case Intrinsic::experimental_vp_reverse: {
3605	Value *X;
3606	Value *Vec = II->getArgOperand(i: `0`);
3607	Value *Mask = II->getArgOperand(i: `1`);
3608	if (!match(V: Mask, P: m_AllOnes()))
3609	break;
3610	Value *EVL = II->getArgOperand(i: `2`);
3611	// TODO: Canonicalize experimental.vp.reverse after unop/binops?
3612	// rev(unop rev(X)) --> unop X
3613	if (match(V: Vec,
3614	P: m_OneUse(SubPattern: m_UnOp(X: m_Intrinsic<Intrinsic::experimental_vp_reverse>(
3615	Op0: m_Value(V&: X), Op1: m_AllOnes(), Op2: m_Specific(V: EVL)))))) {
3616	auto *OldUnOp = cast<UnaryOperator>(Val: Vec);
3617	auto *NewUnOp = UnaryOperator::CreateWithCopiedFlags(
3618	Opc: OldUnOp->getOpcode(), V: X, CopyO: OldUnOp, Name: OldUnOp->getName(),
3619	InsertBefore: II->getIterator());
3620	return replaceInstUsesWith(I&: CI, V: NewUnOp);
3621	}
3622	break;
3623	}
3624	case Intrinsic::vector_reduce_or:
3625	case Intrinsic::vector_reduce_and: {
3626	// Canonicalize logical or/and reductions:
3627	// Or reduction for i1 is represented as:
3628	// %val = bitcast <ReduxWidth x i1> to iReduxWidth
3629	// %res = cmp ne iReduxWidth %val, 0
3630	// And reduction for i1 is represented as:
3631	// %val = bitcast <ReduxWidth x i1> to iReduxWidth
3632	// %res = cmp eq iReduxWidth %val, 11111
3633	Value *Arg = II->getArgOperand(i: `0`);
3634	Value *Vect;
3635
3636	if (Value *NewOp =
3637	simplifyReductionOperand(Arg, /CanReorderLanes=/true)) {
3638	replaceUse(U&: II->getOperandUse(i: `0`), NewValue: NewOp);
3639	return II;
3640	}
3641
3642	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3643	if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType()))
3644	if (FTy->getElementType() == Builder.getInt1Ty()) {
3645	Value *Res = Builder.CreateBitCast(
3646	V: Vect, DestTy: Builder.getIntNTy(N: FTy->getNumElements()));
3647	if (IID == Intrinsic::vector_reduce_and) {
3648	Res = Builder.CreateICmpEQ(
3649	LHS: Res, RHS: ConstantInt::getAllOnesValue(Ty: Res->getType()));
3650	} else {
3651	assert(IID == Intrinsic::vector_reduce_or &&
3652	"Expected or reduction.");
3653	Res = Builder.CreateIsNotNull(Arg: Res);
3654	}
3655	if (Arg != Vect)
3656	Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res,
3657	DestTy: II->getType());
3658	return replaceInstUsesWith(I&: CI, V: Res);
3659	}
3660	}
3661	[[fallthrough]];
3662	}
3663	case Intrinsic::vector_reduce_add: {
3664	if (IID == Intrinsic::vector_reduce_add) {
3665	// Convert vector_reduce_add(ZExt(<n x i1>)) to
3666	// ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
3667	// Convert vector_reduce_add(SExt(<n x i1>)) to
3668	// -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
3669	// Convert vector_reduce_add(<n x i1>) to
3670	// Trunc(ctpop(bitcast <n x i1> to in)).
3671	Value *Arg = II->getArgOperand(i: `0`);
3672	Value *Vect;
3673
3674	if (Value *NewOp =
3675	simplifyReductionOperand(Arg, /CanReorderLanes=/true)) {
3676	replaceUse(U&: II->getOperandUse(i: `0`), NewValue: NewOp);
3677	return II;
3678	}
3679
3680	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3681	if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType()))
3682	if (FTy->getElementType() == Builder.getInt1Ty()) {
3683	Value *V = Builder.CreateBitCast(
3684	V: Vect, DestTy: Builder.getIntNTy(N: FTy->getNumElements()));
3685	Value *Res = Builder.CreateUnaryIntrinsic(ID: Intrinsic::ctpop, V);
3686	if (Res->getType() != II->getType())
3687	Res = Builder.CreateZExtOrTrunc(V: Res, DestTy: II->getType());
3688	if (Arg != Vect &&
3689	cast<Instruction>(Val: Arg)->getOpcode() == Instruction::SExt)
3690	Res = Builder.CreateNeg(V: Res);
3691	return replaceInstUsesWith(I&: CI, V: Res);
3692	}
3693	}
3694	}
3695	[[fallthrough]];
3696	}
3697	case Intrinsic::vector_reduce_xor: {
3698	if (IID == Intrinsic::vector_reduce_xor) {
3699	// Exclusive disjunction reduction over the vector with
3700	// (potentially-extended) i1 element type is actually a
3701	// (potentially-extended) arithmetic `add` reduction over the original
3702	// non-extended value:
3703	// vector_reduce_xor(?ext(<n x i1>))
3704	// -->
3705	// ?ext(vector_reduce_add(<n x i1>))
3706	Value *Arg = II->getArgOperand(i: `0`);
3707	Value *Vect;
3708
3709	if (Value *NewOp =
3710	simplifyReductionOperand(Arg, /CanReorderLanes=/true)) {
3711	replaceUse(U&: II->getOperandUse(i: `0`), NewValue: NewOp);
3712	return II;
3713	}
3714
3715	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3716	if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType()))
3717	if (VTy->getElementType() == Builder.getInt1Ty()) {
3718	Value *Res = Builder.CreateAddReduce(Src: Vect);
3719	if (Arg != Vect)
3720	Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res,
3721	DestTy: II->getType());
3722	return replaceInstUsesWith(I&: CI, V: Res);
3723	}
3724	}
3725	}
3726	[[fallthrough]];
3727	}
3728	case Intrinsic::vector_reduce_mul: {
3729	if (IID == Intrinsic::vector_reduce_mul) {
3730	// Multiplicative reduction over the vector with (potentially-extended)
3731	// i1 element type is actually a (potentially zero-extended)
3732	// logical `and` reduction over the original non-extended value:
3733	// vector_reduce_mul(?ext(<n x i1>))
3734	// -->
3735	// zext(vector_reduce_and(<n x i1>))
3736	Value *Arg = II->getArgOperand(i: `0`);
3737	Value *Vect;
3738
3739	if (Value *NewOp =
3740	simplifyReductionOperand(Arg, /CanReorderLanes=/true)) {
3741	replaceUse(U&: II->getOperandUse(i: `0`), NewValue: NewOp);
3742	return II;
3743	}
3744
3745	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3746	if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType()))
3747	if (VTy->getElementType() == Builder.getInt1Ty()) {
3748	Value *Res = Builder.CreateAndReduce(Src: Vect);
3749	if (Res->getType() != II->getType())
3750	Res = Builder.CreateZExt(V: Res, DestTy: II->getType());
3751	return replaceInstUsesWith(I&: CI, V: Res);
3752	}
3753	}
3754	}
3755	[[fallthrough]];
3756	}
3757	case Intrinsic::vector_reduce_umin:
3758	case Intrinsic::vector_reduce_umax: {
3759	if (IID == Intrinsic::vector_reduce_umin \|\|
3760	IID == Intrinsic::vector_reduce_umax) {
3761	// UMin/UMax reduction over the vector with (potentially-extended)
3762	// i1 element type is actually a (potentially-extended)
3763	// logical `and`/`or` reduction over the original non-extended value:
3764	// vector_reduce_u{min,max}(?ext(<n x i1>))
3765	// -->
3766	// ?ext(vector_reduce_{and,or}(<n x i1>))
3767	Value *Arg = II->getArgOperand(i: `0`);
3768	Value *Vect;
3769
3770	if (Value *NewOp =
3771	simplifyReductionOperand(Arg, /CanReorderLanes=/true)) {
3772	replaceUse(U&: II->getOperandUse(i: `0`), NewValue: NewOp);
3773	return II;
3774	}
3775
3776	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3777	if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType()))
3778	if (VTy->getElementType() == Builder.getInt1Ty()) {
3779	Value *Res = IID == Intrinsic::vector_reduce_umin
3780	? Builder.CreateAndReduce(Src: Vect)
3781	: Builder.CreateOrReduce(Src: Vect);
3782	if (Arg != Vect)
3783	Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res,
3784	DestTy: II->getType());
3785	return replaceInstUsesWith(I&: CI, V: Res);
3786	}
3787	}
3788	}
3789	[[fallthrough]];
3790	}
3791	case Intrinsic::vector_reduce_smin:
3792	case Intrinsic::vector_reduce_smax: {
3793	if (IID == Intrinsic::vector_reduce_smin \|\|
3794	IID == Intrinsic::vector_reduce_smax) {
3795	// SMin/SMax reduction over the vector with (potentially-extended)
3796	// i1 element type is actually a (potentially-extended)
3797	// logical `and`/`or` reduction over the original non-extended value:
3798	// vector_reduce_s{min,max}(<n x i1>)
3799	// -->
3800	// vector_reduce_{or,and}(<n x i1>)
3801	// and
3802	// vector_reduce_s{min,max}(sext(<n x i1>))
3803	// -->
3804	// sext(vector_reduce_{or,and}(<n x i1>))
3805	// and
3806	// vector_reduce_s{min,max}(zext(<n x i1>))
3807	// -->
3808	// zext(vector_reduce_{and,or}(<n x i1>))
3809	Value *Arg = II->getArgOperand(i: `0`);
3810	Value *Vect;
3811
3812	if (Value *NewOp =
3813	simplifyReductionOperand(Arg, /CanReorderLanes=/true)) {
3814	replaceUse(U&: II->getOperandUse(i: `0`), NewValue: NewOp);
3815	return II;
3816	}
3817
3818	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3819	if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType()))
3820	if (VTy->getElementType() == Builder.getInt1Ty()) {
3821	Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
3822	if (Arg != Vect)
3823	ExtOpc = cast<CastInst>(Val: Arg)->getOpcode();
3824	Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
3825	(ExtOpc == Instruction::CastOps::ZExt))
3826	? Builder.CreateAndReduce(Src: Vect)
3827	: Builder.CreateOrReduce(Src: Vect);
3828	if (Arg != Vect)
3829	Res = Builder.CreateCast(Op: ExtOpc, V: Res, DestTy: II->getType());
3830	return replaceInstUsesWith(I&: CI, V: Res);
3831	}
3832	}
3833	}
3834	[[fallthrough]];
3835	}
3836	case Intrinsic::vector_reduce_fmax:
3837	case Intrinsic::vector_reduce_fmin:
3838	case Intrinsic::vector_reduce_fadd:
3839	case Intrinsic::vector_reduce_fmul: {
3840	bool CanReorderLanes = (IID != Intrinsic::vector_reduce_fadd &&
3841	IID != Intrinsic::vector_reduce_fmul) \|\|
3842	II->hasAllowReassoc();
3843	const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd \|\|
3844	IID == Intrinsic::vector_reduce_fmul)
3845	? `1`
3846	: `0`;
3847	Value *Arg = II->getArgOperand(i: ArgIdx);
3848	if (Value *NewOp = simplifyReductionOperand(Arg, CanReorderLanes)) {
3849	replaceUse(U&: II->getOperandUse(i: ArgIdx), NewValue: NewOp);
3850	return nullptr;
3851	}
3852	break;
3853	}
3854	case Intrinsic::is_fpclass: {
3855	if (Instruction I = foldIntrinsicIsFPClass(II&: II))
3856	return I;
3857	break;
3858	}
3859	case Intrinsic::threadlocal_address: {
3860	Align MinAlign = getKnownAlignment(V: II->getArgOperand(i: `0`), DL, CxtI: II, AC: &AC, DT: &DT);
3861	MaybeAlign Align = II->getRetAlign();
3862	if (MinAlign > Align.valueOrOne()) {
3863	II->addRetAttr(Attr: Attribute::getWithAlignment(Context&: II->getContext(), Alignment: MinAlign));
3864	return II;
3865	}
3866	break;
3867	}
3868	case Intrinsic::frexp: {
3869	Value *X;
3870	// The first result is idempotent with the added complication of the struct
3871	// return, and the second result is zero because the value is already
3872	// normalized.
3873	if (match(V: II->getArgOperand(i: `0`), P: m_ExtractValue<`0`>(V: m_Value(V&: X)))) {
3874	if (match(V: X, P: m_Intrinsic<Intrinsic::frexp>(Op0: m_Value()))) {
3875	X = Builder.CreateInsertValue(
3876	Agg: X, Val: Constant::getNullValue(Ty: II->getType()->getStructElementType(N: `1`)),
3877	Idxs: `1`);
3878	return replaceInstUsesWith(I&: *II, V: X);
3879	}
3880	}
3881	break;
3882	}
3883	default: {
3884	// Handle target specific intrinsics
3885	std::optional<Instruction > V = targetInstCombineIntrinsic(II&: II);
3886	if (V)
3887	return *V;
3888	break;
3889	}
3890	}
3891
3892	// Try to fold intrinsic into select operands. This is legal if:
3893	// The intrinsic is speculatable.*
3894	// The select condition is not a vector, or the intrinsic does not*
3895	// perform cross-lane operations.
3896	if (isSafeToSpeculativelyExecuteWithVariableReplaced(I: &CI) &&
3897	isNotCrossLaneOperation(I: II))
3898	for (Value *Op : II->args())
3899	if (auto *Sel = dyn_cast<SelectInst>(Val: Op))
3900	if (Instruction R = FoldOpIntoSelect(Op&: II, SI: Sel))
3901	return R;
3902
3903	if (Instruction *Shuf = foldShuffledIntrinsicOperands(II))
3904	return Shuf;
3905
3906	if (Value *Reverse = foldReversedIntrinsicOperands(II))
3907	return replaceInstUsesWith(I&: *II, V: Reverse);
3908
3909	// Some intrinsics (like experimental_gc_statepoint) can be used in invoke
3910	// context, so it is handled in visitCallBase and we should trigger it.
3911	return visitCallBase(Call&: *II);
3912	}
3913
3914	// Fence instruction simplification
3915	Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) {
3916	auto *NFI = dyn_cast<FenceInst>(Val: FI.getNextNonDebugInstruction());
3917	// This check is solely here to handle arbitrary target-dependent syncscopes.
3918	// TODO: Can remove if does not matter in practice.
3919	if (NFI && FI.isIdenticalTo(I: NFI))
3920	return eraseInstFromFunction(I&: FI);
3921
3922	// Returns true if FI1 is identical or stronger fence than FI2.
3923	auto isIdenticalOrStrongerFence = [](FenceInst FI1, FenceInst FI2) {
3924	auto FI1SyncScope = FI1->getSyncScopeID();
3925	// Consider same scope, where scope is global or single-thread.
3926	if (FI1SyncScope != FI2->getSyncScopeID() \|\|
3927	(FI1SyncScope != SyncScope::System &&
3928	FI1SyncScope != SyncScope::SingleThread))
3929	return false;
3930
3931	return isAtLeastOrStrongerThan(AO: FI1->getOrdering(), Other: FI2->getOrdering());
3932	};
3933	if (NFI && isIdenticalOrStrongerFence (NFI, &FI))
3934	return eraseInstFromFunction(I&: FI);
3935
3936	if (auto *PFI = dyn_cast_or_null<FenceInst>(Val: FI.getPrevNonDebugInstruction()))
3937	if (isIdenticalOrStrongerFence (PFI, &FI))
3938	return eraseInstFromFunction(I&: FI);
3939	return nullptr;
3940	}
3941
3942	// InvokeInst simplification
3943	Instruction *InstCombinerImpl::visitInvokeInst(InvokeInst &II) {
3944	return visitCallBase(Call&: II);
3945	}
3946
3947	// CallBrInst simplification
3948	Instruction *InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) {
3949	return visitCallBase(Call&: CBI);
3950	}
3951
3952	Instruction InstCombinerImpl::tryOptimizeCall(CallInst CI) {
3953	if (!CI->getCalledFunction()) return nullptr;
3954
3955	// Skip optimizing notail and musttail calls so
3956	// LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
3957	// LibCallSimplifier::optimizeCall should try to preserve tail calls though.
3958	if (CI->isMustTailCall() \|\| CI->isNoTailCall())
3959	return nullptr;
3960
3961	auto InstCombineRAUW = [this](Instruction From, Value With) {
3962	replaceInstUsesWith(I&: *From, V: With);
3963	};
3964	auto InstCombineErase = [this](Instruction *I) {
3965	eraseInstFromFunction(I&: *I);
3966	};
3967	LibCallSimplifier Simplifier(DL, &TLI, &DT, &DC, &AC, ORE, BFI, PSI,
3968	InstCombineRAUW, InstCombineErase);
3969	if (Value *With = Simplifier.optimizeCall(CI, B&: Builder)) {
3970	++NumSimplified;
3971	return CI->use_empty() ? CI : replaceInstUsesWith(I&: *CI, V: With);
3972	}
3973
3974	return nullptr;
3975	}
3976
3977	static IntrinsicInst findInitTrampolineFromAlloca(Value TrampMem) {
3978	// Strip off at most one level of pointer casts, looking for an alloca. This
3979	// is good enough in practice and simpler than handling any number of casts.
3980	Value *Underlying = TrampMem->stripPointerCasts();
3981	if (Underlying != TrampMem &&
3982	(!Underlying->hasOneUse() \|\| Underlying->user_back() != TrampMem))
3983	return nullptr;
3984	if (!isa<AllocaInst>(Val: Underlying))
3985	return nullptr;
3986
3987	IntrinsicInst InitTrampoline = nullptr*;
3988	for (User *U : TrampMem->users()) {
3989	IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: U);
3990	if (!II)
3991	return nullptr;
3992	if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
3993	if (InitTrampoline)
3994	// More than one init_trampoline writes to this value. Give up.
3995	return nullptr;
3996	InitTrampoline = II;
3997	continue;
3998	}
3999	if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
4000	// Allow any number of calls to adjust.trampoline.
4001	continue;
4002	return nullptr;
4003	}
4004
4005	// No call to init.trampoline found.
4006	if (!InitTrampoline)
4007	return nullptr;
4008
4009	// Check that the alloca is being used in the expected way.
4010	if (InitTrampoline->getOperand(i_nocapture: `0`) != TrampMem)
4011	return nullptr;
4012
4013	return InitTrampoline;
4014	}
4015
4016	static IntrinsicInst findInitTrampolineFromBB(IntrinsicInst AdjustTramp,
4017	Value *TrampMem) {
4018	// Visit all the previous instructions in the basic block, and try to find a
4019	// init.trampoline which has a direct path to the adjust.trampoline.
4020	for (BasicBlock::iterator I = AdjustTramp->getIterator(),
4021	E = AdjustTramp->getParent()->begin();
4022	I != E;) {
4023	Instruction Inst = &--I;
4024	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val&: I))
4025	if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
4026	II->getOperand(i_nocapture: `0`) == TrampMem)
4027	return II;
4028	if (Inst->mayWriteToMemory())
4029	return nullptr;
4030	}
4031	return nullptr;
4032	}
4033
4034	// Given a call to llvm.adjust.trampoline, find and return the corresponding
4035	// call to llvm.init.trampoline if the call to the trampoline can be optimized
4036	// to a direct call to a function. Otherwise return NULL.
4037	static IntrinsicInst findInitTrampoline(Value Callee) {
4038	Callee = Callee->stripPointerCasts();
4039	IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Val: Callee);
4040	if (!AdjustTramp \|\|
4041	AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
4042	return nullptr;
4043
4044	Value *TrampMem = AdjustTramp->getOperand(i_nocapture: `0`);
4045
4046	if (IntrinsicInst *IT = findInitTrampolineFromAlloca(TrampMem))
4047	return IT;
4048	if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
4049	return IT;
4050	return nullptr;
4051	}
4052
4053	bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
4054	const TargetLibraryInfo *TLI) {
4055	// Note: We only handle cases which can't be driven from generic attributes
4056	// here. So, for example, nonnull and noalias (which are common properties
4057	// of some allocation functions) are expected to be handled via annotation
4058	// of the respective allocator declaration with generic attributes.
4059	bool Changed = false;
4060
4061	if (!Call.getType()->isPointerTy())
4062	return Changed;
4063
4064	std::optional<APInt> Size = getAllocSize(CB: &Call, TLI);
4065	if (Size && *Size != `0`) {
4066	// TODO: We really should just emit deref_or_null here and then
4067	// let the generic inference code combine that with nonnull.
4068	if (Call.hasRetAttr(Kind: Attribute::NonNull)) {
4069	Changed = !Call.hasRetAttr(Kind: Attribute::Dereferenceable);
4070	Call.addRetAttr(Attr: Attribute::getWithDereferenceableBytes(
4071	Context&: Call.getContext(), Bytes: Size ->getLimitedValue()));
4072	} else {
4073	Changed = !Call.hasRetAttr(Kind: Attribute::DereferenceableOrNull);
4074	Call.addRetAttr(Attr: Attribute::getWithDereferenceableOrNullBytes(
4075	Context&: Call.getContext(), Bytes: Size ->getLimitedValue()));
4076	}
4077	}
4078
4079	// Add alignment attribute if alignment is a power of two constant.
4080	Value *Alignment = getAllocAlignment(V: &Call, TLI);
4081	if (!Alignment)
4082	return Changed;
4083
4084	ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Val: Alignment);
4085	if (AlignOpC && AlignOpC->getValue().ult(RHS: llvm::Value::MaximumAlignment)) {
4086	uint64_t AlignmentVal = AlignOpC->getZExtValue();
4087	if (llvm::isPowerOf2_64(Value: AlignmentVal)) {
4088	Align ExistingAlign = Call.getRetAlign().valueOrOne();
4089	Align NewAlign = Align (AlignmentVal);
4090	if (NewAlign > ExistingAlign) {
4091	Call.addRetAttr(
4092	Attr: Attribute::getWithAlignment(Context&: Call.getContext(), Alignment: NewAlign));
4093	Changed = true;
4094	}
4095	}
4096	}
4097	return Changed;
4098	}
4099
4100	/// Improvements for call, callbr and invoke instructions.
4101	Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
4102	bool Changed = annotateAnyAllocSite(Call, TLI: &TLI);
4103
4104	// Mark any parameters that are known to be non-null with the nonnull
4105	// attribute. This is helpful for inlining calls to functions with null
4106	// checks on their arguments.
4107	SmallVector<unsigned, `4`> ArgNos;
4108	unsigned ArgNo = `0`;
4109
4110	for (Value *V : Call.args()) {
4111	if (V->getType()->isPointerTy()) {
4112	// Simplify the nonnull operand if the parameter is known to be nonnull.
4113	// Otherwise, try to infer nonnull for it.
4114	bool HasDereferenceable = Call.getParamDereferenceableBytes(i: ArgNo) > `0`;
4115	if (Call.paramHasAttr(ArgNo, Kind: Attribute::NonNull) \|\|
4116	(HasDereferenceable &&
4117	!NullPointerIsDefined(F: Call.getFunction(),
4118	AS: V->getType()->getPointerAddressSpace()))) {
4119	if (Value *Res = simplifyNonNullOperand(V, HasDereferenceable)) {
4120	replaceOperand(I&: Call, OpNum: ArgNo, V: Res);
4121	Changed = true;
4122	}
4123	} else if (isKnownNonZero(V,
4124	Q: getSimplifyQuery().getWithInstruction(I: &Call))) {
4125	ArgNos.push_back(Elt: ArgNo);
4126	}
4127	}
4128	ArgNo++;
4129	}
4130
4131	assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
4132
4133	if (!ArgNos.empty()) {
4134	AttributeList AS = Call.getAttributes();
4135	LLVMContext &Ctx = Call.getContext();
4136	AS = AS.addParamAttribute(C&: Ctx, ArgNos,
4137	A: Attribute::get(Context&: Ctx, Kind: Attribute::NonNull));
4138	Call.setAttributes(AS);
4139	Changed = true;
4140	}
4141
4142	// If the callee is a pointer to a function, attempt to move any casts to the
4143	// arguments of the call/callbr/invoke.
4144	Value *Callee = Call.getCalledOperand();
4145	Function *CalleeF = dyn_cast<Function>(Val: Callee);
4146	if ((!CalleeF \|\| CalleeF->getFunctionType() != Call.getFunctionType()) &&
4147	transformConstExprCastCall(Call))
4148	return nullptr;
4149
4150	if (CalleeF) {
4151	// Remove the convergent attr on calls when the callee is not convergent.
4152	if (Call.isConvergent() && !CalleeF->isConvergent() &&
4153	!CalleeF->isIntrinsic()) {
4154	LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
4155	<< "\n");
4156	Call.setNotConvergent();
4157	return &Call;
4158	}
4159
4160	// If the call and callee calling conventions don't match, and neither one
4161	// of the calling conventions is compatible with C calling convention
4162	// this call must be unreachable, as the call is undefined.
4163	if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
4164	!(CalleeF->getCallingConv() == llvm::CallingConv::C &&
4165	TargetLibraryInfoImpl::isCallingConvCCompatible(CI: &Call)) &&
4166	!(Call.getCallingConv() == llvm::CallingConv::C &&
4167	TargetLibraryInfoImpl::isCallingConvCCompatible(Callee: CalleeF))) &&
4168	// Only do this for calls to a function with a body. A prototype may
4169	// not actually end up matching the implementation's calling conv for a
4170	// variety of reasons (e.g. it may be written in assembly).
4171	!CalleeF->isDeclaration()) {
4172	Instruction *OldCall = &Call;
4173	CreateNonTerminatorUnreachable(InsertAt: OldCall);
4174	// If OldCall does not return void then replaceInstUsesWith poison.
4175	// This allows ValueHandlers and custom metadata to adjust itself.
4176	if (!OldCall->getType()->isVoidTy())
4177	replaceInstUsesWith(I&: *OldCall, V: PoisonValue::get(T: OldCall->getType()));
4178	if (isa<CallInst>(Val: OldCall))
4179	return eraseInstFromFunction(I&: *OldCall);
4180
4181	// We cannot remove an invoke or a callbr, because it would change thexi
4182	// CFG, just change the callee to a null pointer.
4183	cast<CallBase>(Val: OldCall)->setCalledFunction(
4184	FTy: CalleeF->getFunctionType(),
4185	Fn: Constant::getNullValue(Ty: CalleeF->getType()));
4186	return nullptr;
4187	}
4188	}
4189
4190	// Calling a null function pointer is undefined if a null address isn't
4191	// dereferenceable.
4192	if ((isa<ConstantPointerNull>(Val: Callee) &&
4193	!NullPointerIsDefined(F: Call.getFunction())) \|\|
4194	isa<UndefValue>(Val: Callee)) {
4195	// If Call does not return void then replaceInstUsesWith poison.
4196	// This allows ValueHandlers and custom metadata to adjust itself.
4197	if (!Call.getType()->isVoidTy())
4198	replaceInstUsesWith(I&: Call, V: PoisonValue::get(T: Call.getType()));
4199
4200	if (Call.isTerminator()) {
4201	// Can't remove an invoke or callbr because we cannot change the CFG.
4202	return nullptr;
4203	}
4204
4205	// This instruction is not reachable, just remove it.
4206	CreateNonTerminatorUnreachable(InsertAt: &Call);
4207	return eraseInstFromFunction(I&: Call);
4208	}
4209
4210	if (IntrinsicInst *II = findInitTrampoline(Callee))
4211	return transformCallThroughTrampoline(Call, Tramp&: *II);
4212
4213	if (isa<InlineAsm>(Val: Callee) && !Call.doesNotThrow()) {
4214	InlineAsm *IA = cast<InlineAsm>(Val: Callee);
4215	if (!IA->canThrow()) {
4216	// Normal inline asm calls cannot throw - mark them
4217	// 'nounwind'.
4218	Call.setDoesNotThrow();
4219	Changed = true;
4220	}
4221	}
4222
4223	// Try to optimize the call if possible, we require DataLayout for most of
4224	// this. None of these calls are seen as possibly dead so go ahead and
4225	// delete the instruction now.
4226	if (CallInst *CI = dyn_cast<CallInst>(Val: &Call)) {
4227	Instruction *I = tryOptimizeCall(CI);
4228	// If we changed something return the result, etc. Otherwise let
4229	// the fallthrough check.
4230	if (I) return eraseInstFromFunction(I&: *I);
4231	}
4232
4233	if (!Call.use_empty() && !Call.isMustTailCall())
4234	if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
4235	Type *CallTy = Call.getType();
4236	Type *RetArgTy = ReturnedArg->getType();
4237	if (RetArgTy->canLosslesslyBitCastTo(Ty: CallTy))
4238	return replaceInstUsesWith(
4239	I&: Call, V: Builder.CreateBitOrPointerCast(V: ReturnedArg, DestTy: CallTy));
4240	}
4241
4242	// Drop unnecessary kcfi operand bundles from calls that were converted
4243	// into direct calls.
4244	auto Bundle = Call.getOperandBundle(ID: LLVMContext::OB_kcfi);
4245	if (Bundle && !Call.isIndirectCall()) {
4246	DEBUG_WITH_TYPE(DEBUG_TYPE "-kcfi", {
4247	if (CalleeF) {
4248	ConstantInt FunctionType = nullptr*;
4249	ConstantInt *ExpectedType = cast<ConstantInt>(Bundle->Inputs[`0`]);
4250
4251	if (MDNode *MD = CalleeF->getMetadata(LLVMContext::MD_kcfi_type))
4252	FunctionType = mdconst::extract<ConstantInt>(MD->getOperand(`0`));
4253
4254	if (FunctionType &&
4255	FunctionType->getZExtValue() != ExpectedType->getZExtValue())
4256	dbgs() << Call.getModule()->getName()
4257	<< ": warning: kcfi: " << Call.getCaller()->getName()
4258	<< ": call to " << CalleeF->getName()
4259	<< " using a mismatching function pointer type\n";
4260	}
4261	});
4262
4263	return CallBase::removeOperandBundle(CB: &Call, ID: LLVMContext::OB_kcfi);
4264	}
4265
4266	if (isRemovableAlloc(V: &Call, TLI: &TLI))
4267	return visitAllocSite(FI&: Call);
4268
4269	// Handle intrinsics which can be used in both call and invoke context.
4270	switch (Call.getIntrinsicID()) {
4271	case Intrinsic::experimental_gc_statepoint: {
4272	GCStatepointInst &GCSP = *cast<GCStatepointInst>(Val: &Call);
4273	SmallPtrSet<Value *, `32`> LiveGcValues;
4274	for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4275	GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4276
4277	// Remove the relocation if unused.
4278	if (GCR.use_empty()) {
4279	eraseInstFromFunction(I&: GCR);
4280	continue;
4281	}
4282
4283	Value *DerivedPtr = GCR.getDerivedPtr();
4284	Value *BasePtr = GCR.getBasePtr();
4285
4286	// Undef is undef, even after relocation.
4287	if (isa<UndefValue>(Val: DerivedPtr) \|\| isa<UndefValue>(Val: BasePtr)) {
4288	replaceInstUsesWith(I&: GCR, V: UndefValue::get(T: GCR.getType()));
4289	eraseInstFromFunction(I&: GCR);
4290	continue;
4291	}
4292
4293	if (auto *PT = dyn_cast<PointerType>(Val: GCR.getType())) {
4294	// The relocation of null will be null for most any collector.
4295	// TODO: provide a hook for this in GCStrategy. There might be some
4296	// weird collector this property does not hold for.
4297	if (isa<ConstantPointerNull>(Val: DerivedPtr)) {
4298	// Use null-pointer of gc_relocate's type to replace it.
4299	replaceInstUsesWith(I&: GCR, V: ConstantPointerNull::get(T: PT));
4300	eraseInstFromFunction(I&: GCR);
4301	continue;
4302	}
4303
4304	// isKnownNonNull -> nonnull attribute
4305	if (!GCR.hasRetAttr(Kind: Attribute::NonNull) &&
4306	isKnownNonZero(V: DerivedPtr,
4307	Q: getSimplifyQuery().getWithInstruction(I: &Call))) {
4308	GCR.addRetAttr(Kind: Attribute::NonNull);
4309	// We discovered new fact, re-check users.
4310	Worklist.pushUsersToWorkList(I&: GCR);
4311	}
4312	}
4313
4314	// If we have two copies of the same pointer in the statepoint argument
4315	// list, canonicalize to one. This may let us common gc.relocates.
4316	if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
4317	GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
4318	auto *OpIntTy = GCR.getOperand(i_nocapture: `2`)->getType();
4319	GCR.setOperand(i_nocapture: `2`, Val_nocapture: ConstantInt::get(Ty: OpIntTy, V: GCR.getBasePtrIndex()));
4320	}
4321
4322	// TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
4323	// Canonicalize on the type from the uses to the defs
4324
4325	// TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
4326	LiveGcValues.insert(Ptr: BasePtr);
4327	LiveGcValues.insert(Ptr: DerivedPtr);
4328	}
4329	std::optional<OperandBundleUse> Bundle =
4330	GCSP.getOperandBundle(ID: LLVMContext::OB_gc_live);
4331	unsigned NumOfGCLives = LiveGcValues.size();
4332	if (!Bundle \|\| NumOfGCLives == Bundle ->Inputs.size())
4333	break;
4334	// We can reduce the size of gc live bundle.
4335	DenseMap<Value , unsigned*> Val2Idx;
4336	std::vector<Value *> NewLiveGc;
4337	for (Value *V : Bundle ->Inputs) {
4338	auto [It, Inserted] = Val2Idx.try_emplace(Key: V);
4339	if (!Inserted)
4340	continue;
4341	if (LiveGcValues.count(Ptr: V)) {
4342	It ->second = NewLiveGc.size();
4343	NewLiveGc.push_back(x: V);
4344	} else
4345	It ->second = NumOfGCLives;
4346	}
4347	// Update all gc.relocates
4348	for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4349	GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4350	Value *BasePtr = GCR.getBasePtr();
4351	assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
4352	"Missed live gc for base pointer");
4353	auto *OpIntTy1 = GCR.getOperand(i_nocapture: `1`)->getType();
4354	GCR.setOperand(i_nocapture: `1`, Val_nocapture: ConstantInt::get(Ty: OpIntTy1, V: Val2Idx [BasePtr]));
4355	Value *DerivedPtr = GCR.getDerivedPtr();
4356	assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
4357	"Missed live gc for derived pointer");
4358	auto *OpIntTy2 = GCR.getOperand(i_nocapture: `2`)->getType();
4359	GCR.setOperand(i_nocapture: `2`, Val_nocapture: ConstantInt::get(Ty: OpIntTy2, V: Val2Idx [DerivedPtr]));
4360	}
4361	// Create new statepoint instruction.
4362	OperandBundleDef NewBundle("gc-live", NewLiveGc);
4363	return CallBase::Create(CB: &Call, Bundle: NewBundle);
4364	}
4365	default: { break; }
4366	}
4367
4368	return Changed ? &Call : nullptr;
4369	}
4370
4371	/// If the callee is a constexpr cast of a function, attempt to move the cast to
4372	/// the arguments of the call/invoke.
4373	/// CallBrInst is not supported.
4374	bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
4375	auto *Callee =
4376	dyn_cast<Function>(Val: Call.getCalledOperand()->stripPointerCasts());
4377	if (!Callee)
4378	return false;
4379
4380	assert(!isa<CallBrInst>(Call) &&
4381	"CallBr's don't have a single point after a def to insert at");
4382
4383	// Don't perform the transform for declarations, which may not be fully
4384	// accurate. For example, void @foo() is commonly used as a placeholder for
4385	// unknown prototypes.
4386	if (Callee->isDeclaration())
4387	return false;
4388
4389	// If this is a call to a thunk function, don't remove the cast. Thunks are
4390	// used to transparently forward all incoming parameters and outgoing return
4391	// values, so it's important to leave the cast in place.
4392	if (Callee->hasFnAttribute(Kind: "thunk"))
4393	return false;
4394
4395	// If this is a call to a naked function, the assembly might be
4396	// using an argument, or otherwise rely on the frame layout,
4397	// the function prototype will mismatch.
4398	if (Callee->hasFnAttribute(Kind: Attribute::Naked))
4399	return false;
4400
4401	// If this is a musttail call, the callee's prototype must match the caller's
4402	// prototype with the exception of pointee types. The code below doesn't
4403	// implement that, so we can't do this transform.
4404	// TODO: Do the transform if it only requires adding pointer casts.
4405	if (Call.isMustTailCall())
4406	return false;
4407
4408	Instruction *Caller = &Call;
4409	const AttributeList &CallerPAL = Call.getAttributes();
4410
4411	// Okay, this is a cast from a function to a different type. Unless doing so
4412	// would cause a type conversion of one of our arguments, change this call to
4413	// be a direct call with arguments casted to the appropriate types.
4414	FunctionType *FT = Callee->getFunctionType();
4415	Type *OldRetTy = Caller->getType();
4416	Type *NewRetTy = FT->getReturnType();
4417
4418	// Check to see if we are changing the return type...
4419	if (OldRetTy != NewRetTy) {
4420
4421	if (NewRetTy->isStructTy())
4422	return false; // TODO: Handle multiple return values.
4423
4424	if (!CastInst::isBitOrNoopPointerCastable(SrcTy: NewRetTy, DestTy: OldRetTy, DL)) {
4425	if (!Caller->use_empty())
4426	return false; // Cannot transform this return value.
4427	}
4428
4429	if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
4430	AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
4431	if (RAttrs.overlaps(AM: AttributeFuncs::typeIncompatible(
4432	Ty: NewRetTy, AS: CallerPAL.getRetAttrs())))
4433	return false; // Attribute not compatible with transformed value.
4434	}
4435
4436	// If the callbase is an invoke instruction, and the return value is
4437	// used by a PHI node in a successor, we cannot change the return type of
4438	// the call because there is no place to put the cast instruction (without
4439	// breaking the critical edge). Bail out in this case.
4440	if (!Caller->use_empty()) {
4441	BasicBlock PhisNotSupportedBlock = nullptr*;
4442	if (auto *II = dyn_cast<InvokeInst>(Val: Caller))
4443	PhisNotSupportedBlock = II->getNormalDest();
4444	if (PhisNotSupportedBlock)
4445	for (User *U : Caller->users())
4446	if (PHINode *PN = dyn_cast<PHINode>(Val: U))
4447	if (PN->getParent() == PhisNotSupportedBlock)
4448	return false;
4449	}
4450	}
4451
4452	unsigned NumActualArgs = Call.arg_size();
4453	unsigned NumCommonArgs = std::min(a: FT->getNumParams(), b: NumActualArgs);
4454
4455	// Prevent us turning:
4456	// declare void @takes_i32_inalloca(i32 inalloca)*
4457	// call void bitcast (void (i32)* @takes_i32_inalloca to void (i32))(i32 0)
4458	//
4459	// into:
4460	// call void @takes_i32_inalloca(i32 null)*
4461	//
4462	// Similarly, avoid folding away bitcasts of byval calls.
4463	if (Callee->getAttributes().hasAttrSomewhere(Kind: Attribute::InAlloca) \|\|
4464	Callee->getAttributes().hasAttrSomewhere(Kind: Attribute::Preallocated))
4465	return false;
4466
4467	auto AI = Call.arg_begin();
4468	for (unsigned i = `0`, e = NumCommonArgs; i != e; ++i, ++AI) {
4469	Type *ParamTy = FT->getParamType(i);
4470	Type ActTy = (AI)->getType();
4471
4472	if (!CastInst::isBitOrNoopPointerCastable(SrcTy: ActTy, DestTy: ParamTy, DL))
4473	return false; // Cannot transform this parameter value.
4474
4475	// Check if there are any incompatible attributes we cannot drop safely.
4476	if (AttrBuilder (FT->getContext(), CallerPAL.getParamAttrs(ArgNo: i))
4477	.overlaps(AM: AttributeFuncs::typeIncompatible(
4478	Ty: ParamTy, AS: CallerPAL.getParamAttrs(ArgNo: i),
4479	ASK: AttributeFuncs::ASK_UNSAFE_TO_DROP)))
4480	return false; // Attribute not compatible with transformed value.
4481
4482	if (Call.isInAllocaArgument(ArgNo: i) \|\|
4483	CallerPAL.hasParamAttr(ArgNo: i, Kind: Attribute::Preallocated))
4484	return false; // Cannot transform to and from inalloca/preallocated.
4485
4486	if (CallerPAL.hasParamAttr(ArgNo: i, Kind: Attribute::SwiftError))
4487	return false;
4488
4489	if (CallerPAL.hasParamAttr(ArgNo: i, Kind: Attribute::ByVal) !=
4490	Callee->getAttributes().hasParamAttr(ArgNo: i, Kind: Attribute::ByVal))
4491	return false; // Cannot transform to or from byval.
4492	}
4493
4494	if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
4495	!CallerPAL.isEmpty()) {
4496	// In this case we have more arguments than the new function type, but we
4497	// won't be dropping them. Check that these extra arguments have attributes
4498	// that are compatible with being a vararg call argument.
4499	unsigned SRetIdx;
4500	if (CallerPAL.hasAttrSomewhere(Kind: Attribute::StructRet, Index: &SRetIdx) &&
4501	SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
4502	return false;
4503	}
4504
4505	// Okay, we decided that this is a safe thing to do: go ahead and start
4506	// inserting cast instructions as necessary.
4507	SmallVector<Value *, `8`> Args;
4508	SmallVector<AttributeSet, `8`> ArgAttrs;
4509	Args.reserve(N: NumActualArgs);
4510	ArgAttrs.reserve(N: NumActualArgs);
4511
4512	// Get any return attributes.
4513	AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
4514
4515	// If the return value is not being used, the type may not be compatible
4516	// with the existing attributes. Wipe out any problematic attributes.
4517	RAttrs.remove(
4518	AM: AttributeFuncs::typeIncompatible(Ty: NewRetTy, AS: CallerPAL.getRetAttrs()));
4519
4520	LLVMContext &Ctx = Call.getContext();
4521	AI = Call.arg_begin();
4522	for (unsigned i = `0`; i != NumCommonArgs; ++i, ++AI) {
4523	Type *ParamTy = FT->getParamType(i);
4524
4525	Value NewArg = AI;
4526	if ((*AI)->getType() != ParamTy)
4527	NewArg = Builder.CreateBitOrPointerCast(V: *AI, DestTy: ParamTy);
4528	Args.push_back(Elt: NewArg);
4529
4530	// Add any parameter attributes except the ones incompatible with the new
4531	// type. Note that we made sure all incompatible ones are safe to drop.
4532	AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
4533	Ty: ParamTy, AS: CallerPAL.getParamAttrs(ArgNo: i), ASK: AttributeFuncs::ASK_SAFE_TO_DROP);
4534	ArgAttrs.push_back(
4535	Elt: CallerPAL.getParamAttrs(ArgNo: i).removeAttributes(C&: Ctx, AttrsToRemove: IncompatibleAttrs));
4536	}
4537
4538	// If the function takes more arguments than the call was taking, add them
4539	// now.
4540	for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
4541	Args.push_back(Elt: Constant::getNullValue(Ty: FT->getParamType(i)));
4542	ArgAttrs.push_back(Elt: AttributeSet ());
4543	}
4544
4545	// If we are removing arguments to the function, emit an obnoxious warning.
4546	if (FT->getNumParams() < NumActualArgs) {
4547	// TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
4548	if (FT->isVarArg()) {
4549	// Add all of the arguments in their promoted form to the arg list.
4550	for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
4551	Type PTy = getPromotedType(Ty: (AI)->getType());
4552	Value NewArg = AI;
4553	if (PTy != (*AI)->getType()) {
4554	// Must promote to pass through va_arg area!
4555	Instruction::CastOps opcode =
4556	CastInst::getCastOpcode(Val: AI, SrcIsSigned: false, Ty: PTy, DstIsSigned: false*);
4557	NewArg = Builder.CreateCast(Op: opcode, V: *AI, DestTy: PTy);
4558	}
4559	Args.push_back(Elt: NewArg);
4560
4561	// Add any parameter attributes.
4562	ArgAttrs.push_back(Elt: CallerPAL.getParamAttrs(ArgNo: i));
4563	}
4564	}
4565	}
4566
4567	AttributeSet FnAttrs = CallerPAL.getFnAttrs();
4568
4569	if (NewRetTy->isVoidTy())
4570	Caller->setName(""); // Void type should not have a name.
4571
4572	assert((ArgAttrs.size() == FT->getNumParams() \|\| FT->isVarArg()) &&
4573	"missing argument attributes");
4574	AttributeList NewCallerPAL = AttributeList::get(
4575	C&: Ctx, FnAttrs, RetAttrs: AttributeSet::get(C&: Ctx, B: RAttrs), ArgAttrs);
4576
4577	SmallVector<OperandBundleDef, `1`> OpBundles;
4578	Call.getOperandBundlesAsDefs(Defs&: OpBundles);
4579
4580	CallBase *NewCall;
4581	if (InvokeInst *II = dyn_cast<InvokeInst>(Val: Caller)) {
4582	NewCall = Builder.CreateInvoke(Callee, NormalDest: II->getNormalDest(),
4583	UnwindDest: II->getUnwindDest(), Args, OpBundles);
4584	} else {
4585	NewCall = Builder.CreateCall(Callee, Args, OpBundles);
4586	cast<CallInst>(Val: NewCall)->setTailCallKind(
4587	cast<CallInst>(Val: Caller)->getTailCallKind());
4588	}
4589	NewCall->takeName(V: Caller);
4590	NewCall->setCallingConv(Call.getCallingConv());
4591	NewCall->setAttributes(NewCallerPAL);
4592
4593	// Preserve prof metadata if any.
4594	NewCall->copyMetadata(SrcInst: *Caller, WL: {LLVMContext::MD_prof});
4595
4596	// Insert a cast of the return type as necessary.
4597	Instruction *NC = NewCall;
4598	Value *NV = NC;
4599	if (OldRetTy != NV->getType() && !Caller->use_empty()) {
4600	assert(!NV->getType()->isVoidTy());
4601	NV = NC = CastInst::CreateBitOrPointerCast(S: NC, Ty: OldRetTy);
4602	NC->setDebugLoc(Caller->getDebugLoc());
4603
4604	auto OptInsertPt = NewCall->getInsertionPointAfterDef();
4605	assert(OptInsertPt && "No place to insert cast");
4606	InsertNewInstBefore(New: NC, Old: *OptInsertPt);
4607	Worklist.pushUsersToWorkList(I&: *Caller);
4608	}
4609
4610	if (!Caller->use_empty())
4611	replaceInstUsesWith(I&: *Caller, V: NV);
4612	else if (Caller->hasValueHandle()) {
4613	if (OldRetTy == NV->getType())
4614	ValueHandleBase::ValueIsRAUWd(Old: Caller, New: NV);
4615	else
4616	// We cannot call ValueIsRAUWd with a different type, and the
4617	// actual tracked value will disappear.
4618	ValueHandleBase::ValueIsDeleted(V: Caller);
4619	}
4620
4621	eraseInstFromFunction(I&: *Caller);
4622	return true;
4623	}
4624
4625	/// Turn a call to a function created by init_trampoline / adjust_trampoline
4626	/// intrinsic pair into a direct call to the underlying function.
4627	Instruction *
4628	InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
4629	IntrinsicInst &Tramp) {
4630	FunctionType *FTy = Call.getFunctionType();
4631	AttributeList Attrs = Call.getAttributes();
4632
4633	// If the call already has the 'nest' attribute somewhere then give up -
4634	// otherwise 'nest' would occur twice after splicing in the chain.
4635	if (Attrs.hasAttrSomewhere(Kind: Attribute::Nest))
4636	return nullptr;
4637
4638	Function *NestF = cast<Function>(Val: Tramp.getArgOperand(i: `1`)->stripPointerCasts());
4639	FunctionType *NestFTy = NestF->getFunctionType();
4640
4641	AttributeList NestAttrs = NestF->getAttributes();
4642	if (!NestAttrs.isEmpty()) {
4643	unsigned NestArgNo = `0`;
4644	Type NestTy = nullptr*;
4645	AttributeSet NestAttr;
4646
4647	// Look for a parameter marked with the 'nest' attribute.
4648	for (FunctionType::param_iterator I = NestFTy->param_begin(),
4649	E = NestFTy->param_end();
4650	I != E; ++NestArgNo, ++I) {
4651	AttributeSet AS = NestAttrs.getParamAttrs(ArgNo: NestArgNo);
4652	if (AS.hasAttribute(Kind: Attribute::Nest)) {
4653	// Record the parameter type and any other attributes.
4654	NestTy = *I;
4655	NestAttr = AS;
4656	break;
4657	}
4658	}
4659
4660	if (NestTy) {
4661	std::vector<Value*> NewArgs;
4662	std::vector<AttributeSet> NewArgAttrs;
4663	NewArgs.reserve(n: Call.arg_size() + `1`);
4664	NewArgAttrs.reserve(n: Call.arg_size());
4665
4666	// Insert the nest argument into the call argument list, which may
4667	// mean appending it. Likewise for attributes.
4668
4669	{
4670	unsigned ArgNo = `0`;
4671	auto I = Call.arg_begin(), E = Call.arg_end();
4672	do {
4673	if (ArgNo == NestArgNo) {
4674	// Add the chain argument and attributes.
4675	Value *NestVal = Tramp.getArgOperand(i: `2`);
4676	if (NestVal->getType() != NestTy)
4677	NestVal = Builder.CreateBitCast(V: NestVal, DestTy: NestTy, Name: "nest");
4678	NewArgs.push_back(x: NestVal);
4679	NewArgAttrs.push_back(x: NestAttr);
4680	}
4681
4682	if (I == E)
4683	break;
4684
4685	// Add the original argument and attributes.
4686	NewArgs.push_back(x: *I);
4687	NewArgAttrs.push_back(x: Attrs.getParamAttrs(ArgNo));
4688
4689	++ArgNo;
4690	++I;
4691	} while (true);
4692	}
4693
4694	// The trampoline may have been bitcast to a bogus type (FTy).
4695	// Handle this by synthesizing a new function type, equal to FTy
4696	// with the chain parameter inserted.
4697
4698	std::vector<Type*> NewTypes;
4699	NewTypes.reserve(n: FTy->getNumParams()+`1`);
4700
4701	// Insert the chain's type into the list of parameter types, which may
4702	// mean appending it.
4703	{
4704	unsigned ArgNo = `0`;
4705	FunctionType::param_iterator I = FTy->param_begin(),
4706	E = FTy->param_end();
4707
4708	do {
4709	if (ArgNo == NestArgNo)
4710	// Add the chain's type.
4711	NewTypes.push_back(x: NestTy);
4712
4713	if (I == E)
4714	break;
4715
4716	// Add the original type.
4717	NewTypes.push_back(x: *I);
4718
4719	++ArgNo;
4720	++I;
4721	} while (true);
4722	}
4723
4724	// Replace the trampoline call with a direct call. Let the generic
4725	// code sort out any function type mismatches.
4726	FunctionType *NewFTy =
4727	FunctionType::get(Result: FTy->getReturnType(), Params: NewTypes, isVarArg: FTy->isVarArg());
4728	AttributeList NewPAL =
4729	AttributeList::get(C&: FTy->getContext(), FnAttrs: Attrs.getFnAttrs(),
4730	RetAttrs: Attrs.getRetAttrs(), ArgAttrs: NewArgAttrs);
4731
4732	SmallVector<OperandBundleDef, `1`> OpBundles;
4733	Call.getOperandBundlesAsDefs(Defs&: OpBundles);
4734
4735	Instruction *NewCaller;
4736	if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &Call)) {
4737	NewCaller = InvokeInst::Create(Ty: NewFTy, Func: NestF, IfNormal: II->getNormalDest(),
4738	IfException: II->getUnwindDest(), Args: NewArgs, Bundles: OpBundles);
4739	cast<InvokeInst>(Val: NewCaller)->setCallingConv(II->getCallingConv());
4740	cast<InvokeInst>(Val: NewCaller)->setAttributes(NewPAL);
4741	} else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Val: &Call)) {
4742	NewCaller =
4743	CallBrInst::Create(Ty: NewFTy, Func: NestF, DefaultDest: CBI->getDefaultDest(),
4744	IndirectDests: CBI->getIndirectDests(), Args: NewArgs, Bundles: OpBundles);
4745	cast<CallBrInst>(Val: NewCaller)->setCallingConv(CBI->getCallingConv());
4746	cast<CallBrInst>(Val: NewCaller)->setAttributes(NewPAL);
4747	} else {
4748	NewCaller = CallInst::Create(Ty: NewFTy, Func: NestF, Args: NewArgs, Bundles: OpBundles);
4749	cast<CallInst>(Val: NewCaller)->setTailCallKind(
4750	cast<CallInst>(Val&: Call).getTailCallKind());
4751	cast<CallInst>(Val: NewCaller)->setCallingConv(
4752	cast<CallInst>(Val&: Call).getCallingConv());
4753	cast<CallInst>(Val: NewCaller)->setAttributes(NewPAL);
4754	}
4755	NewCaller->setDebugLoc(Call.getDebugLoc());
4756
4757	return NewCaller;
4758	}
4759	}
4760
4761	// Replace the trampoline call with a direct call. Since there is no 'nest'
4762	// parameter, there is no need to adjust the argument list. Let the generic
4763	// code sort out any function type mismatches.
4764	Call.setCalledFunction(FTy, Fn: NestF);
4765	return &Call;
4766	}
4767

Browse the source code of llvm_projects/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp