1//===- InstCombineCalls.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visitCall, visitInvoke, and visitCallBr functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/APSInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/STLFunctionalExtras.h"
19#include "llvm/ADT/SmallBitVector.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/ADT/StringExtras.h"
23#include "llvm/Analysis/AliasAnalysis.h"
24#include "llvm/Analysis/AssumeBundleQueries.h"
25#include "llvm/Analysis/AssumptionCache.h"
26#include "llvm/Analysis/InstructionSimplify.h"
27#include "llvm/Analysis/Loads.h"
28#include "llvm/Analysis/MemoryBuiltins.h"
29#include "llvm/Analysis/ValueTracking.h"
30#include "llvm/Analysis/VectorUtils.h"
31#include "llvm/IR/AttributeMask.h"
32#include "llvm/IR/Attributes.h"
33#include "llvm/IR/BasicBlock.h"
34#include "llvm/IR/Constant.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/DebugInfo.h"
38#include "llvm/IR/DerivedTypes.h"
39#include "llvm/IR/Function.h"
40#include "llvm/IR/GlobalVariable.h"
41#include "llvm/IR/InlineAsm.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
44#include "llvm/IR/Instructions.h"
45#include "llvm/IR/IntrinsicInst.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/IntrinsicsAArch64.h"
48#include "llvm/IR/IntrinsicsAMDGPU.h"
49#include "llvm/IR/IntrinsicsARM.h"
50#include "llvm/IR/IntrinsicsHexagon.h"
51#include "llvm/IR/LLVMContext.h"
52#include "llvm/IR/Metadata.h"
53#include "llvm/IR/PatternMatch.h"
54#include "llvm/IR/Statepoint.h"
55#include "llvm/IR/Type.h"
56#include "llvm/IR/User.h"
57#include "llvm/IR/Value.h"
58#include "llvm/IR/ValueHandle.h"
59#include "llvm/Support/AtomicOrdering.h"
60#include "llvm/Support/Casting.h"
61#include "llvm/Support/CommandLine.h"
62#include "llvm/Support/Compiler.h"
63#include "llvm/Support/Debug.h"
64#include "llvm/Support/ErrorHandling.h"
65#include "llvm/Support/KnownBits.h"
66#include "llvm/Support/KnownFPClass.h"
67#include "llvm/Support/MathExtras.h"
68#include "llvm/Support/TypeSize.h"
69#include "llvm/Support/raw_ostream.h"
70#include "llvm/Transforms/InstCombine/InstCombiner.h"
71#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
72#include "llvm/Transforms/Utils/Local.h"
73#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
74#include <algorithm>
75#include <cassert>
76#include <cstdint>
77#include <optional>
78#include <utility>
79#include <vector>
80
81#define DEBUG_TYPE "instcombine"
82#include "llvm/Transforms/Utils/InstructionWorklist.h"
83
84using namespace llvm;
85using namespace PatternMatch;
86
87STATISTIC(NumSimplified, "Number of library calls simplified");
88
89static cl::opt<unsigned> GuardWideningWindow(
90 "instcombine-guard-widening-window",
91 cl::init(Val: 3),
92 cl::desc("How wide an instruction window to bypass looking for "
93 "another guard"));
94
95/// Return the specified type promoted as it would be to pass though a va_arg
96/// area.
97static Type *getPromotedType(Type *Ty) {
98 if (IntegerType* ITy = dyn_cast<IntegerType>(Val: Ty)) {
99 if (ITy->getBitWidth() < 32)
100 return Type::getInt32Ty(C&: Ty->getContext());
101 }
102 return Ty;
103}
104
105/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
106/// TODO: This should probably be integrated with visitAllocSites, but that
107/// requires a deeper change to allow either unread or unwritten objects.
108static bool hasUndefSource(AnyMemTransferInst *MI) {
109 auto *Src = MI->getRawSource();
110 while (isa<GetElementPtrInst>(Val: Src)) {
111 if (!Src->hasOneUse())
112 return false;
113 Src = cast<Instruction>(Val: Src)->getOperand(i: 0);
114 }
115 return isa<AllocaInst>(Val: Src) && Src->hasOneUse();
116}
117
118Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
119 Align DstAlign = getKnownAlignment(V: MI->getRawDest(), DL, CxtI: MI, AC: &AC, DT: &DT);
120 MaybeAlign CopyDstAlign = MI->getDestAlign();
121 if (!CopyDstAlign || *CopyDstAlign < DstAlign) {
122 MI->setDestAlignment(DstAlign);
123 return MI;
124 }
125
126 Align SrcAlign = getKnownAlignment(V: MI->getRawSource(), DL, CxtI: MI, AC: &AC, DT: &DT);
127 MaybeAlign CopySrcAlign = MI->getSourceAlign();
128 if (!CopySrcAlign || *CopySrcAlign < SrcAlign) {
129 MI->setSourceAlignment(SrcAlign);
130 return MI;
131 }
132
133 // If we have a store to a location which is known constant, we can conclude
134 // that the store must be storing the constant value (else the memory
135 // wouldn't be constant), and this must be a noop.
136 if (!isModSet(MRI: AA->getModRefInfoMask(P: MI->getDest()))) {
137 // Set the size of the copy to 0, it will be deleted on the next iteration.
138 MI->setLength((uint64_t)0);
139 return MI;
140 }
141
142 // If the source is provably undef, the memcpy/memmove doesn't do anything
143 // (unless the transfer is volatile).
144 if (hasUndefSource(MI) && !MI->isVolatile()) {
145 // Set the size of the copy to 0, it will be deleted on the next iteration.
146 MI->setLength((uint64_t)0);
147 return MI;
148 }
149
150 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
151 // load/store.
152 ConstantInt *MemOpLength = dyn_cast<ConstantInt>(Val: MI->getLength());
153 if (!MemOpLength) return nullptr;
154
155 // Source and destination pointer types are always "i8*" for intrinsic. See
156 // if the size is something we can handle with a single primitive load/store.
157 // A single load+store correctly handles overlapping memory in the memmove
158 // case.
159 uint64_t Size = MemOpLength->getLimitedValue();
160 assert(Size && "0-sized memory transferring should be removed already.");
161
162 if (Size > 8 || (Size&(Size-1)))
163 return nullptr; // If not 1/2/4/8 bytes, exit.
164
165 // If it is an atomic and alignment is less than the size then we will
166 // introduce the unaligned memory access which will be later transformed
167 // into libcall in CodeGen. This is not evident performance gain so disable
168 // it now.
169 if (MI->isAtomic())
170 if (*CopyDstAlign < Size || *CopySrcAlign < Size)
171 return nullptr;
172
173 // Use an integer load+store unless we can find something better.
174 IntegerType* IntType = IntegerType::get(C&: MI->getContext(), NumBits: Size<<3);
175
176 // If the memcpy has metadata describing the members, see if we can get the
177 // TBAA, scope and noalias tags describing our copy.
178 AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(AccessSize: Size);
179
180 Value *Src = MI->getArgOperand(i: 1);
181 Value *Dest = MI->getArgOperand(i: 0);
182 LoadInst *L = Builder.CreateLoad(Ty: IntType, Ptr: Src);
183 // Alignment from the mem intrinsic will be better, so use it.
184 L->setAlignment(*CopySrcAlign);
185 L->setAAMetadata(AACopyMD);
186 MDNode *LoopMemParallelMD =
187 MI->getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access);
188 if (LoopMemParallelMD)
189 L->setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access, Node: LoopMemParallelMD);
190 MDNode *AccessGroupMD = MI->getMetadata(KindID: LLVMContext::MD_access_group);
191 if (AccessGroupMD)
192 L->setMetadata(KindID: LLVMContext::MD_access_group, Node: AccessGroupMD);
193
194 StoreInst *S = Builder.CreateStore(Val: L, Ptr: Dest);
195 // Alignment from the mem intrinsic will be better, so use it.
196 S->setAlignment(*CopyDstAlign);
197 S->setAAMetadata(AACopyMD);
198 if (LoopMemParallelMD)
199 S->setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access, Node: LoopMemParallelMD);
200 if (AccessGroupMD)
201 S->setMetadata(KindID: LLVMContext::MD_access_group, Node: AccessGroupMD);
202 S->copyMetadata(SrcInst: *MI, WL: LLVMContext::MD_DIAssignID);
203
204 if (auto *MT = dyn_cast<MemTransferInst>(Val: MI)) {
205 // non-atomics can be volatile
206 L->setVolatile(MT->isVolatile());
207 S->setVolatile(MT->isVolatile());
208 }
209 if (MI->isAtomic()) {
210 // atomics have to be unordered
211 L->setOrdering(AtomicOrdering::Unordered);
212 S->setOrdering(AtomicOrdering::Unordered);
213 }
214
215 // Set the size of the copy to 0, it will be deleted on the next iteration.
216 MI->setLength((uint64_t)0);
217 return MI;
218}
219
220Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) {
221 const Align KnownAlignment =
222 getKnownAlignment(V: MI->getDest(), DL, CxtI: MI, AC: &AC, DT: &DT);
223 MaybeAlign MemSetAlign = MI->getDestAlign();
224 if (!MemSetAlign || *MemSetAlign < KnownAlignment) {
225 MI->setDestAlignment(KnownAlignment);
226 return MI;
227 }
228
229 // If we have a store to a location which is known constant, we can conclude
230 // that the store must be storing the constant value (else the memory
231 // wouldn't be constant), and this must be a noop.
232 if (!isModSet(MRI: AA->getModRefInfoMask(P: MI->getDest()))) {
233 // Set the size of the copy to 0, it will be deleted on the next iteration.
234 MI->setLength((uint64_t)0);
235 return MI;
236 }
237
238 // Remove memset with an undef value.
239 // FIXME: This is technically incorrect because it might overwrite a poison
240 // value. Change to PoisonValue once #52930 is resolved.
241 if (isa<UndefValue>(Val: MI->getValue())) {
242 // Set the size of the copy to 0, it will be deleted on the next iteration.
243 MI->setLength((uint64_t)0);
244 return MI;
245 }
246
247 // Extract the length and alignment and fill if they are constant.
248 ConstantInt *LenC = dyn_cast<ConstantInt>(Val: MI->getLength());
249 ConstantInt *FillC = dyn_cast<ConstantInt>(Val: MI->getValue());
250 if (!LenC || !FillC || !FillC->getType()->isIntegerTy(Bitwidth: 8))
251 return nullptr;
252 const uint64_t Len = LenC->getLimitedValue();
253 assert(Len && "0-sized memory setting should be removed already.");
254 const Align Alignment = MI->getDestAlign().valueOrOne();
255
256 // If it is an atomic and alignment is less than the size then we will
257 // introduce the unaligned memory access which will be later transformed
258 // into libcall in CodeGen. This is not evident performance gain so disable
259 // it now.
260 if (MI->isAtomic() && Alignment < Len)
261 return nullptr;
262
263 // memset(s,c,n) -> store s, c (for n=1,2,4,8)
264 if (Len <= 8 && isPowerOf2_32(Value: (uint32_t)Len)) {
265 Value *Dest = MI->getDest();
266
267 // Extract the fill value and store.
268 Constant *FillVal = ConstantInt::get(
269 Context&: MI->getContext(), V: APInt::getSplat(NewLen: Len * 8, V: FillC->getValue()));
270 StoreInst *S = Builder.CreateStore(Val: FillVal, Ptr: Dest, isVolatile: MI->isVolatile());
271 S->copyMetadata(SrcInst: *MI, WL: LLVMContext::MD_DIAssignID);
272 for (DbgVariableRecord *DbgAssign : at::getDVRAssignmentMarkers(Inst: S)) {
273 if (llvm::is_contained(Range: DbgAssign->location_ops(), Element: FillC))
274 DbgAssign->replaceVariableLocationOp(OldValue: FillC, NewValue: FillVal);
275 }
276
277 S->setAlignment(Alignment);
278 if (MI->isAtomic())
279 S->setOrdering(AtomicOrdering::Unordered);
280
281 // Set the size of the copy to 0, it will be deleted on the next iteration.
282 MI->setLength((uint64_t)0);
283 return MI;
284 }
285
286 return nullptr;
287}
288
289// TODO, Obvious Missing Transforms:
290// * Narrow width by halfs excluding zero/undef lanes
291Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
292 Value *LoadPtr = II.getArgOperand(i: 0);
293 const Align Alignment = II.getParamAlign(ArgNo: 0).valueOrOne();
294
295 // If the mask is all ones or undefs, this is a plain vector load of the 1st
296 // argument.
297 if (maskIsAllOneOrUndef(Mask: II.getArgOperand(i: 1))) {
298 LoadInst *L = Builder.CreateAlignedLoad(Ty: II.getType(), Ptr: LoadPtr, Align: Alignment,
299 Name: "unmaskedload");
300 L->copyMetadata(SrcInst: II);
301 return L;
302 }
303
304 // If we can unconditionally load from this address, replace with a
305 // load/select idiom. TODO: use DT for context sensitive query
306 if (isDereferenceablePointer(V: LoadPtr, Ty: II.getType(),
307 DL: II.getDataLayout(), CtxI: &II, AC: &AC)) {
308 LoadInst *LI = Builder.CreateAlignedLoad(Ty: II.getType(), Ptr: LoadPtr, Align: Alignment,
309 Name: "unmaskedload");
310 LI->copyMetadata(SrcInst: II);
311 return Builder.CreateSelect(C: II.getArgOperand(i: 1), True: LI, False: II.getArgOperand(i: 2));
312 }
313
314 return nullptr;
315}
316
317// TODO, Obvious Missing Transforms:
318// * Single constant active lane -> store
319// * Narrow width by halfs excluding zero/undef lanes
320Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
321 Value *StorePtr = II.getArgOperand(i: 1);
322 Align Alignment = II.getParamAlign(ArgNo: 1).valueOrOne();
323 auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: 2));
324 if (!ConstMask)
325 return nullptr;
326
327 // If the mask is all zeros, this instruction does nothing.
328 if (maskIsAllZeroOrUndef(Mask: ConstMask))
329 return eraseInstFromFunction(I&: II);
330
331 // If the mask is all ones, this is a plain vector store of the 1st argument.
332 if (maskIsAllOneOrUndef(Mask: ConstMask)) {
333 StoreInst *S =
334 new StoreInst(II.getArgOperand(i: 0), StorePtr, false, Alignment);
335 S->copyMetadata(SrcInst: II);
336 return S;
337 }
338
339 if (isa<ScalableVectorType>(Val: ConstMask->getType()))
340 return nullptr;
341
342 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
343 APInt DemandedElts = possiblyDemandedEltsInMask(Mask: ConstMask);
344 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
345 if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: 0), DemandedElts,
346 PoisonElts))
347 return replaceOperand(I&: II, OpNum: 0, V);
348
349 return nullptr;
350}
351
352// TODO, Obvious Missing Transforms:
353// * Single constant active lane load -> load
354// * Dereferenceable address & few lanes -> scalarize speculative load/selects
355// * Adjacent vector addresses -> masked.load
356// * Narrow width by halfs excluding zero/undef lanes
357// * Vector incrementing address -> vector masked load
358Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
359 auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: 1));
360 if (!ConstMask)
361 return nullptr;
362
363 // Vector splat address w/known mask -> scalar load
364 // Fold the gather to load the source vector first lane
365 // because it is reloading the same value each time
366 if (ConstMask->isAllOnesValue())
367 if (auto *SplatPtr = getSplatValue(V: II.getArgOperand(i: 0))) {
368 auto *VecTy = cast<VectorType>(Val: II.getType());
369 const Align Alignment = II.getParamAlign(ArgNo: 0).valueOrOne();
370 LoadInst *L = Builder.CreateAlignedLoad(Ty: VecTy->getElementType(), Ptr: SplatPtr,
371 Align: Alignment, Name: "load.scalar");
372 Value *Shuf =
373 Builder.CreateVectorSplat(EC: VecTy->getElementCount(), V: L, Name: "broadcast");
374 return replaceInstUsesWith(I&: II, V: cast<Instruction>(Val: Shuf));
375 }
376
377 return nullptr;
378}
379
380// TODO, Obvious Missing Transforms:
381// * Single constant active lane -> store
382// * Adjacent vector addresses -> masked.store
383// * Narrow store width by halfs excluding zero/undef lanes
384// * Vector incrementing address -> vector masked store
385Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
386 auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: 2));
387 if (!ConstMask)
388 return nullptr;
389
390 // If the mask is all zeros, a scatter does nothing.
391 if (maskIsAllZeroOrUndef(Mask: ConstMask))
392 return eraseInstFromFunction(I&: II);
393
394 // Vector splat address -> scalar store
395 if (auto *SplatPtr = getSplatValue(V: II.getArgOperand(i: 1))) {
396 // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
397 if (auto *SplatValue = getSplatValue(V: II.getArgOperand(i: 0))) {
398 if (maskContainsAllOneOrUndef(Mask: ConstMask)) {
399 Align Alignment = II.getParamAlign(ArgNo: 1).valueOrOne();
400 StoreInst *S = new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false,
401 Alignment);
402 S->copyMetadata(SrcInst: II);
403 return S;
404 }
405 }
406 // scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
407 // lastlane), ptr
408 if (ConstMask->isAllOnesValue()) {
409 Align Alignment = II.getParamAlign(ArgNo: 1).valueOrOne();
410 VectorType *WideLoadTy = cast<VectorType>(Val: II.getArgOperand(i: 1)->getType());
411 ElementCount VF = WideLoadTy->getElementCount();
412 Value *RunTimeVF = Builder.CreateElementCount(Ty: Builder.getInt32Ty(), EC: VF);
413 Value *LastLane = Builder.CreateSub(LHS: RunTimeVF, RHS: Builder.getInt32(C: 1));
414 Value *Extract =
415 Builder.CreateExtractElement(Vec: II.getArgOperand(i: 0), Idx: LastLane);
416 StoreInst *S =
417 new StoreInst(Extract, SplatPtr, /*IsVolatile=*/false, Alignment);
418 S->copyMetadata(SrcInst: II);
419 return S;
420 }
421 }
422 if (isa<ScalableVectorType>(Val: ConstMask->getType()))
423 return nullptr;
424
425 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
426 APInt DemandedElts = possiblyDemandedEltsInMask(Mask: ConstMask);
427 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
428 if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: 0), DemandedElts,
429 PoisonElts))
430 return replaceOperand(I&: II, OpNum: 0, V);
431 if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: 1), DemandedElts,
432 PoisonElts))
433 return replaceOperand(I&: II, OpNum: 1, V);
434
435 return nullptr;
436}
437
438/// This function transforms launder.invariant.group and strip.invariant.group
439/// like:
440/// launder(launder(%x)) -> launder(%x) (the result is not the argument)
441/// launder(strip(%x)) -> launder(%x)
442/// strip(strip(%x)) -> strip(%x) (the result is not the argument)
443/// strip(launder(%x)) -> strip(%x)
444/// This is legal because it preserves the most recent information about
445/// the presence or absence of invariant.group.
446static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II,
447 InstCombinerImpl &IC) {
448 auto *Arg = II.getArgOperand(i: 0);
449 auto *StrippedArg = Arg->stripPointerCasts();
450 auto *StrippedInvariantGroupsArg = StrippedArg;
451 while (auto *Intr = dyn_cast<IntrinsicInst>(Val: StrippedInvariantGroupsArg)) {
452 if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
453 Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
454 break;
455 StrippedInvariantGroupsArg = Intr->getArgOperand(i: 0)->stripPointerCasts();
456 }
457 if (StrippedArg == StrippedInvariantGroupsArg)
458 return nullptr; // No launders/strips to remove.
459
460 Value *Result = nullptr;
461
462 if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
463 Result = IC.Builder.CreateLaunderInvariantGroup(Ptr: StrippedInvariantGroupsArg);
464 else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
465 Result = IC.Builder.CreateStripInvariantGroup(Ptr: StrippedInvariantGroupsArg);
466 else
467 llvm_unreachable(
468 "simplifyInvariantGroupIntrinsic only handles launder and strip");
469 if (Result->getType()->getPointerAddressSpace() !=
470 II.getType()->getPointerAddressSpace())
471 Result = IC.Builder.CreateAddrSpaceCast(V: Result, DestTy: II.getType());
472
473 return cast<Instruction>(Val: Result);
474}
475
476static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
477 assert((II.getIntrinsicID() == Intrinsic::cttz ||
478 II.getIntrinsicID() == Intrinsic::ctlz) &&
479 "Expected cttz or ctlz intrinsic");
480 bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
481 Value *Op0 = II.getArgOperand(i: 0);
482 Value *Op1 = II.getArgOperand(i: 1);
483 Value *X;
484 // ctlz(bitreverse(x)) -> cttz(x)
485 // cttz(bitreverse(x)) -> ctlz(x)
486 if (match(V: Op0, P: m_BitReverse(Op0: m_Value(V&: X)))) {
487 Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
488 Function *F =
489 Intrinsic::getOrInsertDeclaration(M: II.getModule(), id: ID, Tys: II.getType());
490 return CallInst::Create(Func: F, Args: {X, II.getArgOperand(i: 1)});
491 }
492
493 if (II.getType()->isIntOrIntVectorTy(BitWidth: 1)) {
494 // ctlz/cttz i1 Op0 --> not Op0
495 if (match(V: Op1, P: m_Zero()))
496 return BinaryOperator::CreateNot(Op: Op0);
497 // If zero is poison, then the input can be assumed to be "true", so the
498 // instruction simplifies to "false".
499 assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
500 return IC.replaceInstUsesWith(I&: II, V: ConstantInt::getNullValue(Ty: II.getType()));
501 }
502
503 // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true.
504 if (II.hasOneUse() && match(V: Op1, P: m_Zero()) &&
505 match(V: II.user_back(), P: m_Shift(L: m_Value(), R: m_Specific(V: &II)))) {
506 II.dropUBImplyingAttrsAndMetadata();
507 return IC.replaceOperand(I&: II, OpNum: 1, V: IC.Builder.getTrue());
508 }
509
510 Constant *C;
511
512 if (IsTZ) {
513 // cttz(-x) -> cttz(x)
514 if (match(V: Op0, P: m_Neg(V: m_Value(V&: X))))
515 return IC.replaceOperand(I&: II, OpNum: 0, V: X);
516
517 // cttz(-x & x) -> cttz(x)
518 if (match(V: Op0, P: m_c_And(L: m_Neg(V: m_Value(V&: X)), R: m_Deferred(V: X))))
519 return IC.replaceOperand(I&: II, OpNum: 0, V: X);
520
521 // cttz(sext(x)) -> cttz(zext(x))
522 if (match(V: Op0, P: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: X))))) {
523 auto *Zext = IC.Builder.CreateZExt(V: X, DestTy: II.getType());
524 auto *CttzZext =
525 IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: Zext, RHS: Op1);
526 return IC.replaceInstUsesWith(I&: II, V: CttzZext);
527 }
528
529 // Zext doesn't change the number of trailing zeros, so narrow:
530 // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
531 if (match(V: Op0, P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X)))) && match(V: Op1, P: m_One())) {
532 auto *Cttz = IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: X,
533 RHS: IC.Builder.getTrue());
534 auto *ZextCttz = IC.Builder.CreateZExt(V: Cttz, DestTy: II.getType());
535 return IC.replaceInstUsesWith(I&: II, V: ZextCttz);
536 }
537
538 // cttz(abs(x)) -> cttz(x)
539 // cttz(nabs(x)) -> cttz(x)
540 Value *Y;
541 SelectPatternFlavor SPF = matchSelectPattern(V: Op0, LHS&: X, RHS&: Y).Flavor;
542 if (SPF == SPF_ABS || SPF == SPF_NABS)
543 return IC.replaceOperand(I&: II, OpNum: 0, V: X);
544
545 if (match(V: Op0, P: m_Intrinsic<Intrinsic::abs>(Op0: m_Value(V&: X))))
546 return IC.replaceOperand(I&: II, OpNum: 0, V: X);
547
548 // cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
549 if (match(V: Op0, P: m_Shl(L: m_ImmConstant(C), R: m_Value(V&: X))) &&
550 match(V: Op1, P: m_One())) {
551 Value *ConstCttz =
552 IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: C, RHS: Op1);
553 return BinaryOperator::CreateAdd(V1: ConstCttz, V2: X);
554 }
555
556 // cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)
557 if (match(V: Op0, P: m_Exact(SubPattern: m_LShr(L: m_ImmConstant(C), R: m_Value(V&: X)))) &&
558 match(V: Op1, P: m_One())) {
559 Value *ConstCttz =
560 IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: C, RHS: Op1);
561 return BinaryOperator::CreateSub(V1: ConstCttz, V2: X);
562 }
563
564 // cttz(add(lshr(UINT_MAX, %val), 1)) --> sub(width, %val)
565 if (match(V: Op0, P: m_Add(L: m_LShr(L: m_AllOnes(), R: m_Value(V&: X)), R: m_One()))) {
566 Value *Width =
567 ConstantInt::get(Ty: II.getType(), V: II.getType()->getScalarSizeInBits());
568 return BinaryOperator::CreateSub(V1: Width, V2: X);
569 }
570 } else {
571 // ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
572 if (match(V: Op0, P: m_LShr(L: m_ImmConstant(C), R: m_Value(V&: X))) &&
573 match(V: Op1, P: m_One())) {
574 Value *ConstCtlz =
575 IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::ctlz, LHS: C, RHS: Op1);
576 return BinaryOperator::CreateAdd(V1: ConstCtlz, V2: X);
577 }
578
579 // ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)
580 if (match(V: Op0, P: m_NUWShl(L: m_ImmConstant(C), R: m_Value(V&: X))) &&
581 match(V: Op1, P: m_One())) {
582 Value *ConstCtlz =
583 IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::ctlz, LHS: C, RHS: Op1);
584 return BinaryOperator::CreateSub(V1: ConstCtlz, V2: X);
585 }
586
587 // ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false)
588 if (Op0->hasOneUse() &&
589 match(V: Op0,
590 P: m_c_And(L: m_Not(V: m_Value(V&: X)), R: m_Add(L: m_Deferred(V: X), R: m_AllOnes())))) {
591 Type *Ty = II.getType();
592 unsigned BitWidth = Ty->getScalarSizeInBits();
593 auto *Cttz = IC.Builder.CreateIntrinsic(ID: Intrinsic::cttz, Types: Ty,
594 Args: {X, IC.Builder.getFalse()});
595 auto *Bw = ConstantInt::get(Ty, V: APInt(BitWidth, BitWidth));
596 return IC.replaceInstUsesWith(I&: II, V: IC.Builder.CreateSub(LHS: Bw, RHS: Cttz));
597 }
598 }
599
600 // cttz(Pow2) -> Log2(Pow2)
601 // ctlz(Pow2) -> BitWidth - 1 - Log2(Pow2)
602 if (auto *R = IC.tryGetLog2(Op: Op0, AssumeNonZero: match(V: Op1, P: m_One()))) {
603 if (IsTZ)
604 return IC.replaceInstUsesWith(I&: II, V: R);
605 BinaryOperator *BO = BinaryOperator::CreateSub(
606 V1: ConstantInt::get(Ty: R->getType(), V: R->getType()->getScalarSizeInBits() - 1),
607 V2: R);
608 BO->setHasNoSignedWrap();
609 BO->setHasNoUnsignedWrap();
610 return BO;
611 }
612
613 KnownBits Known = IC.computeKnownBits(V: Op0, CxtI: &II);
614
615 // Create a mask for bits above (ctlz) or below (cttz) the first known one.
616 unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
617 : Known.countMaxLeadingZeros();
618 unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
619 : Known.countMinLeadingZeros();
620
621 // If all bits above (ctlz) or below (cttz) the first known one are known
622 // zero, this value is constant.
623 // FIXME: This should be in InstSimplify because we're replacing an
624 // instruction with a constant.
625 if (PossibleZeros == DefiniteZeros) {
626 auto *C = ConstantInt::get(Ty: Op0->getType(), V: DefiniteZeros);
627 return IC.replaceInstUsesWith(I&: II, V: C);
628 }
629
630 // If the input to cttz/ctlz is known to be non-zero,
631 // then change the 'ZeroIsPoison' parameter to 'true'
632 // because we know the zero behavior can't affect the result.
633 if (!Known.One.isZero() ||
634 isKnownNonZero(V: Op0, Q: IC.getSimplifyQuery().getWithInstruction(I: &II))) {
635 if (!match(V: II.getArgOperand(i: 1), P: m_One()))
636 return IC.replaceOperand(I&: II, OpNum: 1, V: IC.Builder.getTrue());
637 }
638
639 // Add range attribute since known bits can't completely reflect what we know.
640 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
641 if (BitWidth != 1 && !II.hasRetAttr(Kind: Attribute::Range) &&
642 !II.getMetadata(KindID: LLVMContext::MD_range)) {
643 ConstantRange Range(APInt(BitWidth, DefiniteZeros),
644 APInt(BitWidth, PossibleZeros + 1));
645 II.addRangeRetAttr(CR: Range);
646 return &II;
647 }
648
649 return nullptr;
650}
651
652static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
653 assert(II.getIntrinsicID() == Intrinsic::ctpop &&
654 "Expected ctpop intrinsic");
655 Type *Ty = II.getType();
656 unsigned BitWidth = Ty->getScalarSizeInBits();
657 Value *Op0 = II.getArgOperand(i: 0);
658 Value *X, *Y;
659
660 // ctpop(bitreverse(x)) -> ctpop(x)
661 // ctpop(bswap(x)) -> ctpop(x)
662 if (match(V: Op0, P: m_BitReverse(Op0: m_Value(V&: X))) || match(V: Op0, P: m_BSwap(Op0: m_Value(V&: X))))
663 return IC.replaceOperand(I&: II, OpNum: 0, V: X);
664
665 // ctpop(rot(x)) -> ctpop(x)
666 if ((match(V: Op0, P: m_FShl(Op0: m_Value(V&: X), Op1: m_Value(V&: Y), Op2: m_Value())) ||
667 match(V: Op0, P: m_FShr(Op0: m_Value(V&: X), Op1: m_Value(V&: Y), Op2: m_Value()))) &&
668 X == Y)
669 return IC.replaceOperand(I&: II, OpNum: 0, V: X);
670
671 // ctpop(x | -x) -> bitwidth - cttz(x, false)
672 if (Op0->hasOneUse() &&
673 match(V: Op0, P: m_c_Or(L: m_Value(V&: X), R: m_Neg(V: m_Deferred(V: X))))) {
674 auto *Cttz = IC.Builder.CreateIntrinsic(ID: Intrinsic::cttz, Types: Ty,
675 Args: {X, IC.Builder.getFalse()});
676 auto *Bw = ConstantInt::get(Ty, V: APInt(BitWidth, BitWidth));
677 return IC.replaceInstUsesWith(I&: II, V: IC.Builder.CreateSub(LHS: Bw, RHS: Cttz));
678 }
679
680 // ctpop(~x & (x - 1)) -> cttz(x, false)
681 if (match(V: Op0,
682 P: m_c_And(L: m_Not(V: m_Value(V&: X)), R: m_Add(L: m_Deferred(V: X), R: m_AllOnes())))) {
683 Function *F =
684 Intrinsic::getOrInsertDeclaration(M: II.getModule(), id: Intrinsic::cttz, Tys: Ty);
685 return CallInst::Create(Func: F, Args: {X, IC.Builder.getFalse()});
686 }
687
688 // Zext doesn't change the number of set bits, so narrow:
689 // ctpop (zext X) --> zext (ctpop X)
690 if (match(V: Op0, P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X))))) {
691 Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(ID: Intrinsic::ctpop, V: X);
692 return CastInst::Create(Instruction::ZExt, S: NarrowPop, Ty);
693 }
694
695 KnownBits Known(BitWidth);
696 IC.computeKnownBits(V: Op0, Known, CxtI: &II);
697
698 // If all bits are zero except for exactly one fixed bit, then the result
699 // must be 0 or 1, and we can get that answer by shifting to LSB:
700 // ctpop (X & 32) --> (X & 32) >> 5
701 // TODO: Investigate removing this as its likely unnecessary given the below
702 // `isKnownToBeAPowerOfTwo` check.
703 if ((~Known.Zero).isPowerOf2())
704 return BinaryOperator::CreateLShr(
705 V1: Op0, V2: ConstantInt::get(Ty, V: (~Known.Zero).exactLogBase2()));
706
707 // More generally we can also handle non-constant power of 2 patterns such as
708 // shl/shr(Pow2, X), (X & -X), etc... by transforming:
709 // ctpop(Pow2OrZero) --> icmp ne X, 0
710 if (IC.isKnownToBeAPowerOfTwo(V: Op0, /* OrZero */ true))
711 return CastInst::Create(Instruction::ZExt,
712 S: IC.Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: Op0,
713 RHS: Constant::getNullValue(Ty)),
714 Ty);
715
716 // Add range attribute since known bits can't completely reflect what we know.
717 if (BitWidth != 1) {
718 ConstantRange OldRange =
719 II.getRange().value_or(u: ConstantRange::getFull(BitWidth));
720
721 unsigned Lower = Known.countMinPopulation();
722 unsigned Upper = Known.countMaxPopulation() + 1;
723
724 if (Lower == 0 && OldRange.contains(Val: APInt::getZero(numBits: BitWidth)) &&
725 isKnownNonZero(V: Op0, Q: IC.getSimplifyQuery().getWithInstruction(I: &II)))
726 Lower = 1;
727
728 ConstantRange Range(APInt(BitWidth, Lower), APInt(BitWidth, Upper));
729 Range = Range.intersectWith(CR: OldRange, Type: ConstantRange::Unsigned);
730
731 if (Range != OldRange) {
732 II.addRangeRetAttr(CR: Range);
733 return &II;
734 }
735 }
736
737 return nullptr;
738}
739
740/// Convert `tbl`/`tbx` intrinsics to shufflevector if the mask is constant, and
741/// at most two source operands are actually referenced.
742static Instruction *simplifyNeonTbl(IntrinsicInst &II, InstCombiner &IC,
743 bool IsExtension) {
744 // Bail out if the mask is not a constant.
745 auto *C = dyn_cast<Constant>(Val: II.getArgOperand(i: II.arg_size() - 1));
746 if (!C)
747 return nullptr;
748
749 auto *RetTy = cast<FixedVectorType>(Val: II.getType());
750 unsigned NumIndexes = RetTy->getNumElements();
751
752 // Only perform this transformation for <8 x i8> and <16 x i8> vector types.
753 if (!RetTy->getElementType()->isIntegerTy(Bitwidth: 8) ||
754 (NumIndexes != 8 && NumIndexes != 16))
755 return nullptr;
756
757 // For tbx instructions, the first argument is the "fallback" vector, which
758 // has the same length as the mask and return type.
759 unsigned int StartIndex = (unsigned)IsExtension;
760 auto *SourceTy =
761 cast<FixedVectorType>(Val: II.getArgOperand(i: StartIndex)->getType());
762 // Note that the element count of each source vector does *not* need to be the
763 // same as the element count of the return type and mask! All source vectors
764 // must have the same element count as each other, though.
765 unsigned NumElementsPerSource = SourceTy->getNumElements();
766
767 // There are no tbl/tbx intrinsics for which the destination size exceeds the
768 // source size. However, our definitions of the intrinsics, at least in
769 // IntrinsicsAArch64.td, allow for arbitrary destination vector sizes, so it
770 // *could* technically happen.
771 if (NumIndexes > NumElementsPerSource)
772 return nullptr;
773
774 // The tbl/tbx intrinsics take several source operands followed by a mask
775 // operand.
776 unsigned int NumSourceOperands = II.arg_size() - 1 - (unsigned)IsExtension;
777
778 // Map input operands to shuffle indices. This also helpfully deduplicates the
779 // input arguments, in case the same value is passed as an argument multiple
780 // times.
781 SmallDenseMap<Value *, unsigned, 2> ValueToShuffleSlot;
782 Value *ShuffleOperands[2] = {PoisonValue::get(T: SourceTy),
783 PoisonValue::get(T: SourceTy)};
784
785 int Indexes[16];
786 for (unsigned I = 0; I < NumIndexes; ++I) {
787 Constant *COp = C->getAggregateElement(Elt: I);
788
789 if (!COp || (!isa<UndefValue>(Val: COp) && !isa<ConstantInt>(Val: COp)))
790 return nullptr;
791
792 if (isa<UndefValue>(Val: COp)) {
793 Indexes[I] = -1;
794 continue;
795 }
796
797 uint64_t Index = cast<ConstantInt>(Val: COp)->getZExtValue();
798 // The index of the input argument that this index references (0 = first
799 // source argument, etc).
800 unsigned SourceOperandIndex = Index / NumElementsPerSource;
801 // The index of the element at that source operand.
802 unsigned SourceOperandElementIndex = Index % NumElementsPerSource;
803
804 Value *SourceOperand;
805 if (SourceOperandIndex >= NumSourceOperands) {
806 // This index is out of bounds. Map it to index into either the fallback
807 // vector (tbx) or vector of zeroes (tbl).
808 SourceOperandIndex = NumSourceOperands;
809 if (IsExtension) {
810 // For out-of-bounds indices in tbx, choose the `I`th element of the
811 // fallback.
812 SourceOperand = II.getArgOperand(i: 0);
813 SourceOperandElementIndex = I;
814 } else {
815 // Otherwise, choose some element from the dummy vector of zeroes (we'll
816 // always choose the first).
817 SourceOperand = Constant::getNullValue(Ty: SourceTy);
818 SourceOperandElementIndex = 0;
819 }
820 } else {
821 SourceOperand = II.getArgOperand(i: SourceOperandIndex + StartIndex);
822 }
823
824 // The source operand may be the fallback vector, which may not have the
825 // same number of elements as the source vector. In that case, we *could*
826 // choose to extend its length with another shufflevector, but it's simpler
827 // to just bail instead.
828 if (cast<FixedVectorType>(Val: SourceOperand->getType())->getNumElements() !=
829 NumElementsPerSource)
830 return nullptr;
831
832 // We now know the source operand referenced by this index. Make it a
833 // shufflevector operand, if it isn't already.
834 unsigned NumSlots = ValueToShuffleSlot.size();
835 // This shuffle references more than two sources, and hence cannot be
836 // represented as a shufflevector.
837 if (NumSlots == 2 && !ValueToShuffleSlot.contains(Val: SourceOperand))
838 return nullptr;
839
840 auto [It, Inserted] =
841 ValueToShuffleSlot.try_emplace(Key: SourceOperand, Args&: NumSlots);
842 if (Inserted)
843 ShuffleOperands[It->getSecond()] = SourceOperand;
844
845 unsigned RemappedIndex =
846 (It->getSecond() * NumElementsPerSource) + SourceOperandElementIndex;
847 Indexes[I] = RemappedIndex;
848 }
849
850 Value *Shuf = IC.Builder.CreateShuffleVector(
851 V1: ShuffleOperands[0], V2: ShuffleOperands[1], Mask: ArrayRef(Indexes, NumIndexes));
852 return IC.replaceInstUsesWith(I&: II, V: Shuf);
853}
854
855// Returns true iff the 2 intrinsics have the same operands, limiting the
856// comparison to the first NumOperands.
857static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
858 unsigned NumOperands) {
859 assert(I.arg_size() >= NumOperands && "Not enough operands");
860 assert(E.arg_size() >= NumOperands && "Not enough operands");
861 for (unsigned i = 0; i < NumOperands; i++)
862 if (I.getArgOperand(i) != E.getArgOperand(i))
863 return false;
864 return true;
865}
866
867// Remove trivially empty start/end intrinsic ranges, i.e. a start
868// immediately followed by an end (ignoring debuginfo or other
869// start/end intrinsics in between). As this handles only the most trivial
870// cases, tracking the nesting level is not needed:
871//
872// call @llvm.foo.start(i1 0)
873// call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
874// call @llvm.foo.end(i1 0)
875// call @llvm.foo.end(i1 0) ; &I
876static bool
877removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC,
878 std::function<bool(const IntrinsicInst &)> IsStart) {
879 // We start from the end intrinsic and scan backwards, so that InstCombine
880 // has already processed (and potentially removed) all the instructions
881 // before the end intrinsic.
882 BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
883 for (; BI != BE; ++BI) {
884 if (auto *I = dyn_cast<IntrinsicInst>(Val: &*BI)) {
885 if (I->isDebugOrPseudoInst() ||
886 I->getIntrinsicID() == EndI.getIntrinsicID())
887 continue;
888 if (IsStart(*I)) {
889 if (haveSameOperands(I: EndI, E: *I, NumOperands: EndI.arg_size())) {
890 IC.eraseInstFromFunction(I&: *I);
891 IC.eraseInstFromFunction(I&: EndI);
892 return true;
893 }
894 // Skip start intrinsics that don't pair with this end intrinsic.
895 continue;
896 }
897 }
898 break;
899 }
900
901 return false;
902}
903
904Instruction *InstCombinerImpl::visitVAEndInst(VAEndInst &I) {
905 removeTriviallyEmptyRange(EndI&: I, IC&: *this, IsStart: [&I](const IntrinsicInst &II) {
906 // Bail out on the case where the source va_list of a va_copy is destroyed
907 // immediately by a follow-up va_end.
908 return II.getIntrinsicID() == Intrinsic::vastart ||
909 (II.getIntrinsicID() == Intrinsic::vacopy &&
910 I.getArgOperand(i: 0) != II.getArgOperand(i: 1));
911 });
912 return nullptr;
913}
914
915static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) {
916 assert(Call.arg_size() > 1 && "Need at least 2 args to swap");
917 Value *Arg0 = Call.getArgOperand(i: 0), *Arg1 = Call.getArgOperand(i: 1);
918 if (isa<Constant>(Val: Arg0) && !isa<Constant>(Val: Arg1)) {
919 Call.setArgOperand(i: 0, v: Arg1);
920 Call.setArgOperand(i: 1, v: Arg0);
921 return &Call;
922 }
923 return nullptr;
924}
925
926/// Creates a result tuple for an overflow intrinsic \p II with a given
927/// \p Result and a constant \p Overflow value.
928static Instruction *createOverflowTuple(IntrinsicInst *II, Value *Result,
929 Constant *Overflow) {
930 Constant *V[] = {PoisonValue::get(T: Result->getType()), Overflow};
931 StructType *ST = cast<StructType>(Val: II->getType());
932 Constant *Struct = ConstantStruct::get(T: ST, V);
933 return InsertValueInst::Create(Agg: Struct, Val: Result, Idxs: 0);
934}
935
936Instruction *
937InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
938 WithOverflowInst *WO = cast<WithOverflowInst>(Val: II);
939 Value *OperationResult = nullptr;
940 Constant *OverflowResult = nullptr;
941 if (OptimizeOverflowCheck(BinaryOp: WO->getBinaryOp(), IsSigned: WO->isSigned(), LHS: WO->getLHS(),
942 RHS: WO->getRHS(), CtxI&: *WO, OperationResult, OverflowResult))
943 return createOverflowTuple(II: WO, Result: OperationResult, Overflow: OverflowResult);
944
945 // See whether we can optimize the overflow check with assumption information.
946 for (User *U : WO->users()) {
947 if (!match(V: U, P: m_ExtractValue<1>(V: m_Value())))
948 continue;
949
950 for (auto &AssumeVH : AC.assumptionsFor(V: U)) {
951 if (!AssumeVH)
952 continue;
953 CallInst *I = cast<CallInst>(Val&: AssumeVH);
954 if (!match(V: I->getArgOperand(i: 0), P: m_Not(V: m_Specific(V: U))))
955 continue;
956 if (!isValidAssumeForContext(I, CxtI: II, /*DT=*/nullptr,
957 /*AllowEphemerals=*/true))
958 continue;
959 Value *Result =
960 Builder.CreateBinOp(Opc: WO->getBinaryOp(), LHS: WO->getLHS(), RHS: WO->getRHS());
961 Result->takeName(V: WO);
962 if (auto *Inst = dyn_cast<Instruction>(Val: Result)) {
963 if (WO->isSigned())
964 Inst->setHasNoSignedWrap();
965 else
966 Inst->setHasNoUnsignedWrap();
967 }
968 return createOverflowTuple(II: WO, Result,
969 Overflow: ConstantInt::getFalse(Ty: U->getType()));
970 }
971 }
972
973 return nullptr;
974}
975
976static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
977 Ty = Ty->getScalarType();
978 return F.getDenormalMode(FPType: Ty->getFltSemantics()).Input == DenormalMode::IEEE;
979}
980
981static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) {
982 Ty = Ty->getScalarType();
983 return F.getDenormalMode(FPType: Ty->getFltSemantics()).inputsAreZero();
984}
985
986/// \returns the compare predicate type if the test performed by
987/// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the
988/// floating-point environment assumed for \p F for type \p Ty
989static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask,
990 const Function &F, Type *Ty) {
991 switch (static_cast<unsigned>(Mask)) {
992 case fcZero:
993 if (inputDenormalIsIEEE(F, Ty))
994 return FCmpInst::FCMP_OEQ;
995 break;
996 case fcZero | fcSubnormal:
997 if (inputDenormalIsDAZ(F, Ty))
998 return FCmpInst::FCMP_OEQ;
999 break;
1000 case fcPositive | fcNegZero:
1001 if (inputDenormalIsIEEE(F, Ty))
1002 return FCmpInst::FCMP_OGE;
1003 break;
1004 case fcPositive | fcNegZero | fcNegSubnormal:
1005 if (inputDenormalIsDAZ(F, Ty))
1006 return FCmpInst::FCMP_OGE;
1007 break;
1008 case fcPosSubnormal | fcPosNormal | fcPosInf:
1009 if (inputDenormalIsIEEE(F, Ty))
1010 return FCmpInst::FCMP_OGT;
1011 break;
1012 case fcNegative | fcPosZero:
1013 if (inputDenormalIsIEEE(F, Ty))
1014 return FCmpInst::FCMP_OLE;
1015 break;
1016 case fcNegative | fcPosZero | fcPosSubnormal:
1017 if (inputDenormalIsDAZ(F, Ty))
1018 return FCmpInst::FCMP_OLE;
1019 break;
1020 case fcNegSubnormal | fcNegNormal | fcNegInf:
1021 if (inputDenormalIsIEEE(F, Ty))
1022 return FCmpInst::FCMP_OLT;
1023 break;
1024 case fcPosNormal | fcPosInf:
1025 if (inputDenormalIsDAZ(F, Ty))
1026 return FCmpInst::FCMP_OGT;
1027 break;
1028 case fcNegNormal | fcNegInf:
1029 if (inputDenormalIsDAZ(F, Ty))
1030 return FCmpInst::FCMP_OLT;
1031 break;
1032 case ~fcZero & ~fcNan:
1033 if (inputDenormalIsIEEE(F, Ty))
1034 return FCmpInst::FCMP_ONE;
1035 break;
1036 case ~(fcZero | fcSubnormal) & ~fcNan:
1037 if (inputDenormalIsDAZ(F, Ty))
1038 return FCmpInst::FCMP_ONE;
1039 break;
1040 default:
1041 break;
1042 }
1043
1044 return FCmpInst::BAD_FCMP_PREDICATE;
1045}
1046
1047Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
1048 Value *Src0 = II.getArgOperand(i: 0);
1049 Value *Src1 = II.getArgOperand(i: 1);
1050 const ConstantInt *CMask = cast<ConstantInt>(Val: Src1);
1051 FPClassTest Mask = static_cast<FPClassTest>(CMask->getZExtValue());
1052 const bool IsUnordered = (Mask & fcNan) == fcNan;
1053 const bool IsOrdered = (Mask & fcNan) == fcNone;
1054 const FPClassTest OrderedMask = Mask & ~fcNan;
1055 const FPClassTest OrderedInvertedMask = ~OrderedMask & ~fcNan;
1056
1057 const bool IsStrict =
1058 II.getFunction()->getAttributes().hasFnAttr(Kind: Attribute::StrictFP);
1059
1060 Value *FNegSrc;
1061 if (match(V: Src0, P: m_FNeg(X: m_Value(V&: FNegSrc)))) {
1062 // is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask)
1063
1064 II.setArgOperand(i: 1, v: ConstantInt::get(Ty: Src1->getType(), V: fneg(Mask)));
1065 return replaceOperand(I&: II, OpNum: 0, V: FNegSrc);
1066 }
1067
1068 Value *FAbsSrc;
1069 if (match(V: Src0, P: m_FAbs(Op0: m_Value(V&: FAbsSrc)))) {
1070 II.setArgOperand(i: 1, v: ConstantInt::get(Ty: Src1->getType(), V: inverse_fabs(Mask)));
1071 return replaceOperand(I&: II, OpNum: 0, V: FAbsSrc);
1072 }
1073
1074 if ((OrderedMask == fcInf || OrderedInvertedMask == fcInf) &&
1075 (IsOrdered || IsUnordered) && !IsStrict) {
1076 // is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf
1077 // is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf
1078 // is.fpclass(x, fcInf|fcNan) -> fcmp ueq fabs(x), +inf
1079 // is.fpclass(x, ~(fcInf|fcNan)) -> fcmp une fabs(x), +inf
1080 Constant *Inf = ConstantFP::getInfinity(Ty: Src0->getType());
1081 FCmpInst::Predicate Pred =
1082 IsUnordered ? FCmpInst::FCMP_UEQ : FCmpInst::FCMP_OEQ;
1083 if (OrderedInvertedMask == fcInf)
1084 Pred = IsUnordered ? FCmpInst::FCMP_UNE : FCmpInst::FCMP_ONE;
1085
1086 Value *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Src0);
1087 Value *CmpInf = Builder.CreateFCmp(P: Pred, LHS: Fabs, RHS: Inf);
1088 CmpInf->takeName(V: &II);
1089 return replaceInstUsesWith(I&: II, V: CmpInf);
1090 }
1091
1092 if ((OrderedMask == fcPosInf || OrderedMask == fcNegInf) &&
1093 (IsOrdered || IsUnordered) && !IsStrict) {
1094 // is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf
1095 // is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf
1096 // is.fpclass(x, fcPosInf|fcNan) -> fcmp ueq x, +inf
1097 // is.fpclass(x, fcNegInf|fcNan) -> fcmp ueq x, -inf
1098 Constant *Inf =
1099 ConstantFP::getInfinity(Ty: Src0->getType(), Negative: OrderedMask == fcNegInf);
1100 Value *EqInf = IsUnordered ? Builder.CreateFCmpUEQ(LHS: Src0, RHS: Inf)
1101 : Builder.CreateFCmpOEQ(LHS: Src0, RHS: Inf);
1102
1103 EqInf->takeName(V: &II);
1104 return replaceInstUsesWith(I&: II, V: EqInf);
1105 }
1106
1107 if ((OrderedInvertedMask == fcPosInf || OrderedInvertedMask == fcNegInf) &&
1108 (IsOrdered || IsUnordered) && !IsStrict) {
1109 // is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf
1110 // is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf
1111 // is.fpclass(x, ~fcPosInf|fcNan) -> fcmp une x, +inf
1112 // is.fpclass(x, ~fcNegInf|fcNan) -> fcmp une x, -inf
1113 Constant *Inf = ConstantFP::getInfinity(Ty: Src0->getType(),
1114 Negative: OrderedInvertedMask == fcNegInf);
1115 Value *NeInf = IsUnordered ? Builder.CreateFCmpUNE(LHS: Src0, RHS: Inf)
1116 : Builder.CreateFCmpONE(LHS: Src0, RHS: Inf);
1117 NeInf->takeName(V: &II);
1118 return replaceInstUsesWith(I&: II, V: NeInf);
1119 }
1120
1121 if (Mask == fcNan && !IsStrict) {
1122 // Equivalent of isnan. Replace with standard fcmp if we don't care about FP
1123 // exceptions.
1124 Value *IsNan =
1125 Builder.CreateFCmpUNO(LHS: Src0, RHS: ConstantFP::getZero(Ty: Src0->getType()));
1126 IsNan->takeName(V: &II);
1127 return replaceInstUsesWith(I&: II, V: IsNan);
1128 }
1129
1130 if (Mask == (~fcNan & fcAllFlags) && !IsStrict) {
1131 // Equivalent of !isnan. Replace with standard fcmp.
1132 Value *FCmp =
1133 Builder.CreateFCmpORD(LHS: Src0, RHS: ConstantFP::getZero(Ty: Src0->getType()));
1134 FCmp->takeName(V: &II);
1135 return replaceInstUsesWith(I&: II, V: FCmp);
1136 }
1137
1138 FCmpInst::Predicate PredType = FCmpInst::BAD_FCMP_PREDICATE;
1139
1140 // Try to replace with an fcmp with 0
1141 //
1142 // is.fpclass(x, fcZero) -> fcmp oeq x, 0.0
1143 // is.fpclass(x, fcZero | fcNan) -> fcmp ueq x, 0.0
1144 // is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0
1145 // is.fpclass(x, ~fcZero) -> fcmp une x, 0.0
1146 //
1147 // is.fpclass(x, fcPosSubnormal | fcPosNormal | fcPosInf) -> fcmp ogt x, 0.0
1148 // is.fpclass(x, fcPositive | fcNegZero) -> fcmp oge x, 0.0
1149 //
1150 // is.fpclass(x, fcNegSubnormal | fcNegNormal | fcNegInf) -> fcmp olt x, 0.0
1151 // is.fpclass(x, fcNegative | fcPosZero) -> fcmp ole x, 0.0
1152 //
1153 if (!IsStrict && (IsOrdered || IsUnordered) &&
1154 (PredType = fpclassTestIsFCmp0(Mask: OrderedMask, F: *II.getFunction(),
1155 Ty: Src0->getType())) !=
1156 FCmpInst::BAD_FCMP_PREDICATE) {
1157 Constant *Zero = ConstantFP::getZero(Ty: Src0->getType());
1158 // Equivalent of == 0.
1159 Value *FCmp = Builder.CreateFCmp(
1160 P: IsUnordered ? FCmpInst::getUnorderedPredicate(Pred: PredType) : PredType,
1161 LHS: Src0, RHS: Zero);
1162
1163 FCmp->takeName(V: &II);
1164 return replaceInstUsesWith(I&: II, V: FCmp);
1165 }
1166
1167 KnownFPClass Known = computeKnownFPClass(Val: Src0, Interested: Mask, CtxI: &II);
1168
1169 // Clear test bits we know must be false from the source value.
1170 // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
1171 // fp_class (ninf x), ninf|pinf|other -> fp_class (ninf x), other
1172 if ((Mask & Known.KnownFPClasses) != Mask) {
1173 II.setArgOperand(
1174 i: 1, v: ConstantInt::get(Ty: Src1->getType(), V: Mask & Known.KnownFPClasses));
1175 return &II;
1176 }
1177
1178 // If none of the tests which can return false are possible, fold to true.
1179 // fp_class (nnan x), ~(qnan|snan) -> true
1180 // fp_class (ninf x), ~(ninf|pinf) -> true
1181 if (Mask == Known.KnownFPClasses)
1182 return replaceInstUsesWith(I&: II, V: ConstantInt::get(Ty: II.getType(), V: true));
1183
1184 return nullptr;
1185}
1186
1187static std::optional<bool> getKnownSign(Value *Op, const SimplifyQuery &SQ) {
1188 KnownBits Known = computeKnownBits(V: Op, Q: SQ);
1189 if (Known.isNonNegative())
1190 return false;
1191 if (Known.isNegative())
1192 return true;
1193
1194 Value *X, *Y;
1195 if (match(V: Op, P: m_NSWSub(L: m_Value(V&: X), R: m_Value(V&: Y))))
1196 return isImpliedByDomCondition(Pred: ICmpInst::ICMP_SLT, LHS: X, RHS: Y, ContextI: SQ.CxtI, DL: SQ.DL);
1197
1198 return std::nullopt;
1199}
1200
1201static std::optional<bool> getKnownSignOrZero(Value *Op,
1202 const SimplifyQuery &SQ) {
1203 if (std::optional<bool> Sign = getKnownSign(Op, SQ))
1204 return Sign;
1205
1206 Value *X, *Y;
1207 if (match(V: Op, P: m_NSWSub(L: m_Value(V&: X), R: m_Value(V&: Y))))
1208 return isImpliedByDomCondition(Pred: ICmpInst::ICMP_SLE, LHS: X, RHS: Y, ContextI: SQ.CxtI, DL: SQ.DL);
1209
1210 return std::nullopt;
1211}
1212
1213/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
1214static bool signBitMustBeTheSame(Value *Op0, Value *Op1,
1215 const SimplifyQuery &SQ) {
1216 std::optional<bool> Known1 = getKnownSign(Op: Op1, SQ);
1217 if (!Known1)
1218 return false;
1219 std::optional<bool> Known0 = getKnownSign(Op: Op0, SQ);
1220 if (!Known0)
1221 return false;
1222 return *Known0 == *Known1;
1223}
1224
1225/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
1226/// can trigger other combines.
1227static Instruction *moveAddAfterMinMax(IntrinsicInst *II,
1228 InstCombiner::BuilderTy &Builder) {
1229 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1230 assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin ||
1231 MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) &&
1232 "Expected a min or max intrinsic");
1233
1234 // TODO: Match vectors with undef elements, but undef may not propagate.
1235 Value *Op0 = II->getArgOperand(i: 0), *Op1 = II->getArgOperand(i: 1);
1236 Value *X;
1237 const APInt *C0, *C1;
1238 if (!match(V: Op0, P: m_OneUse(SubPattern: m_Add(L: m_Value(V&: X), R: m_APInt(Res&: C0)))) ||
1239 !match(V: Op1, P: m_APInt(Res&: C1)))
1240 return nullptr;
1241
1242 // Check for necessary no-wrap and overflow constraints.
1243 bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin;
1244 auto *Add = cast<BinaryOperator>(Val: Op0);
1245 if ((IsSigned && !Add->hasNoSignedWrap()) ||
1246 (!IsSigned && !Add->hasNoUnsignedWrap()))
1247 return nullptr;
1248
1249 // If the constant difference overflows, then instsimplify should reduce the
1250 // min/max to the add or C1.
1251 bool Overflow;
1252 APInt CDiff =
1253 IsSigned ? C1->ssub_ov(RHS: *C0, Overflow) : C1->usub_ov(RHS: *C0, Overflow);
1254 assert(!Overflow && "Expected simplify of min/max");
1255
1256 // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
1257 // Note: the "mismatched" no-overflow setting does not propagate.
1258 Constant *NewMinMaxC = ConstantInt::get(Ty: II->getType(), V: CDiff);
1259 Value *NewMinMax = Builder.CreateBinaryIntrinsic(ID: MinMaxID, LHS: X, RHS: NewMinMaxC);
1260 return IsSigned ? BinaryOperator::CreateNSWAdd(V1: NewMinMax, V2: Add->getOperand(i_nocapture: 1))
1261 : BinaryOperator::CreateNUWAdd(V1: NewMinMax, V2: Add->getOperand(i_nocapture: 1));
1262}
1263/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
1264Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
1265 Type *Ty = MinMax1.getType();
1266
1267 // We are looking for a tree of:
1268 // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
1269 // Where the min and max could be reversed
1270 Instruction *MinMax2;
1271 BinaryOperator *AddSub;
1272 const APInt *MinValue, *MaxValue;
1273 if (match(V: &MinMax1, P: m_SMin(L: m_Instruction(I&: MinMax2), R: m_APInt(Res&: MaxValue)))) {
1274 if (!match(V: MinMax2, P: m_SMax(L: m_BinOp(I&: AddSub), R: m_APInt(Res&: MinValue))))
1275 return nullptr;
1276 } else if (match(V: &MinMax1,
1277 P: m_SMax(L: m_Instruction(I&: MinMax2), R: m_APInt(Res&: MinValue)))) {
1278 if (!match(V: MinMax2, P: m_SMin(L: m_BinOp(I&: AddSub), R: m_APInt(Res&: MaxValue))))
1279 return nullptr;
1280 } else
1281 return nullptr;
1282
1283 // Check that the constants clamp a saturate, and that the new type would be
1284 // sensible to convert to.
1285 if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1)
1286 return nullptr;
1287 // In what bitwidth can this be treated as saturating arithmetics?
1288 unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
1289 // FIXME: This isn't quite right for vectors, but using the scalar type is a
1290 // good first approximation for what should be done there.
1291 if (!shouldChangeType(FromBitWidth: Ty->getScalarType()->getIntegerBitWidth(), ToBitWidth: NewBitWidth))
1292 return nullptr;
1293
1294 // Also make sure that the inner min/max and the add/sub have one use.
1295 if (!MinMax2->hasOneUse() || !AddSub->hasOneUse())
1296 return nullptr;
1297
1298 // Create the new type (which can be a vector type)
1299 Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
1300
1301 Intrinsic::ID IntrinsicID;
1302 if (AddSub->getOpcode() == Instruction::Add)
1303 IntrinsicID = Intrinsic::sadd_sat;
1304 else if (AddSub->getOpcode() == Instruction::Sub)
1305 IntrinsicID = Intrinsic::ssub_sat;
1306 else
1307 return nullptr;
1308
1309 // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
1310 // is usually achieved via a sext from a smaller type.
1311 if (ComputeMaxSignificantBits(Op: AddSub->getOperand(i_nocapture: 0), CxtI: AddSub) > NewBitWidth ||
1312 ComputeMaxSignificantBits(Op: AddSub->getOperand(i_nocapture: 1), CxtI: AddSub) > NewBitWidth)
1313 return nullptr;
1314
1315 // Finally create and return the sat intrinsic, truncated to the new type
1316 Value *AT = Builder.CreateTrunc(V: AddSub->getOperand(i_nocapture: 0), DestTy: NewTy);
1317 Value *BT = Builder.CreateTrunc(V: AddSub->getOperand(i_nocapture: 1), DestTy: NewTy);
1318 Value *Sat = Builder.CreateIntrinsic(ID: IntrinsicID, Types: NewTy, Args: {AT, BT});
1319 return CastInst::Create(Instruction::SExt, S: Sat, Ty);
1320}
1321
1322
1323/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
1324/// can only be one of two possible constant values -- turn that into a select
1325/// of constants.
1326static Instruction *foldClampRangeOfTwo(IntrinsicInst *II,
1327 InstCombiner::BuilderTy &Builder) {
1328 Value *I0 = II->getArgOperand(i: 0), *I1 = II->getArgOperand(i: 1);
1329 Value *X;
1330 const APInt *C0, *C1;
1331 if (!match(V: I1, P: m_APInt(Res&: C1)) || !I0->hasOneUse())
1332 return nullptr;
1333
1334 CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
1335 switch (II->getIntrinsicID()) {
1336 case Intrinsic::smax:
1337 if (match(V: I0, P: m_SMin(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && *C0 == *C1 + 1)
1338 Pred = ICmpInst::ICMP_SGT;
1339 break;
1340 case Intrinsic::smin:
1341 if (match(V: I0, P: m_SMax(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && *C1 == *C0 + 1)
1342 Pred = ICmpInst::ICMP_SLT;
1343 break;
1344 case Intrinsic::umax:
1345 if (match(V: I0, P: m_UMin(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && *C0 == *C1 + 1)
1346 Pred = ICmpInst::ICMP_UGT;
1347 break;
1348 case Intrinsic::umin:
1349 if (match(V: I0, P: m_UMax(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && *C1 == *C0 + 1)
1350 Pred = ICmpInst::ICMP_ULT;
1351 break;
1352 default:
1353 llvm_unreachable("Expected min/max intrinsic");
1354 }
1355 if (Pred == CmpInst::BAD_ICMP_PREDICATE)
1356 return nullptr;
1357
1358 // max (min X, 42), 41 --> X > 41 ? 42 : 41
1359 // min (max X, 42), 43 --> X < 43 ? 42 : 43
1360 Value *Cmp = Builder.CreateICmp(P: Pred, LHS: X, RHS: I1);
1361 return SelectInst::Create(C: Cmp, S1: ConstantInt::get(Ty: II->getType(), V: *C0), S2: I1);
1362}
1363
1364/// If this min/max has a constant operand and an operand that is a matching
1365/// min/max with a constant operand, constant-fold the 2 constant operands.
1366static Value *reassociateMinMaxWithConstants(IntrinsicInst *II,
1367 IRBuilderBase &Builder,
1368 const SimplifyQuery &SQ) {
1369 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1370 auto *LHS = dyn_cast<MinMaxIntrinsic>(Val: II->getArgOperand(i: 0));
1371 if (!LHS)
1372 return nullptr;
1373
1374 Constant *C0, *C1;
1375 if (!match(V: LHS->getArgOperand(i: 1), P: m_ImmConstant(C&: C0)) ||
1376 !match(V: II->getArgOperand(i: 1), P: m_ImmConstant(C&: C1)))
1377 return nullptr;
1378
1379 // max (max X, C0), C1 --> max X, (max C0, C1)
1380 // min (min X, C0), C1 --> min X, (min C0, C1)
1381 // umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1)
1382 // smin (umin X, nneg C0), nneg C1 --> umin X, (smin C0, C1)
1383 Intrinsic::ID InnerMinMaxID = LHS->getIntrinsicID();
1384 if (InnerMinMaxID != MinMaxID &&
1385 !(((MinMaxID == Intrinsic::umax && InnerMinMaxID == Intrinsic::smax) ||
1386 (MinMaxID == Intrinsic::smin && InnerMinMaxID == Intrinsic::umin)) &&
1387 isKnownNonNegative(V: C0, SQ) && isKnownNonNegative(V: C1, SQ)))
1388 return nullptr;
1389
1390 ICmpInst::Predicate Pred = MinMaxIntrinsic::getPredicate(ID: MinMaxID);
1391 Value *CondC = Builder.CreateICmp(P: Pred, LHS: C0, RHS: C1);
1392 Value *NewC = Builder.CreateSelect(C: CondC, True: C0, False: C1);
1393 return Builder.CreateIntrinsic(ID: InnerMinMaxID, Types: II->getType(),
1394 Args: {LHS->getArgOperand(i: 0), NewC});
1395}
1396
1397/// If this min/max has a matching min/max operand with a constant, try to push
1398/// the constant operand into this instruction. This can enable more folds.
1399static Instruction *
1400reassociateMinMaxWithConstantInOperand(IntrinsicInst *II,
1401 InstCombiner::BuilderTy &Builder) {
1402 // Match and capture a min/max operand candidate.
1403 Value *X, *Y;
1404 Constant *C;
1405 Instruction *Inner;
1406 if (!match(V: II, P: m_c_MaxOrMin(L: m_OneUse(SubPattern: m_CombineAnd(
1407 L: m_Instruction(I&: Inner),
1408 R: m_MaxOrMin(L: m_Value(V&: X), R: m_ImmConstant(C)))),
1409 R: m_Value(V&: Y))))
1410 return nullptr;
1411
1412 // The inner op must match. Check for constants to avoid infinite loops.
1413 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1414 auto *InnerMM = dyn_cast<IntrinsicInst>(Val: Inner);
1415 if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID ||
1416 match(V: X, P: m_ImmConstant()) || match(V: Y, P: m_ImmConstant()))
1417 return nullptr;
1418
1419 // max (max X, C), Y --> max (max X, Y), C
1420 Function *MinMax = Intrinsic::getOrInsertDeclaration(M: II->getModule(),
1421 id: MinMaxID, Tys: II->getType());
1422 Value *NewInner = Builder.CreateBinaryIntrinsic(ID: MinMaxID, LHS: X, RHS: Y);
1423 NewInner->takeName(V: Inner);
1424 return CallInst::Create(Func: MinMax, Args: {NewInner, C});
1425}
1426
1427/// Reduce a sequence of min/max intrinsics with a common operand.
1428static Instruction *factorizeMinMaxTree(IntrinsicInst *II) {
1429 // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
1430 auto *LHS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: 0));
1431 auto *RHS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: 1));
1432 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1433 if (!LHS || !RHS || LHS->getIntrinsicID() != MinMaxID ||
1434 RHS->getIntrinsicID() != MinMaxID ||
1435 (!LHS->hasOneUse() && !RHS->hasOneUse()))
1436 return nullptr;
1437
1438 Value *A = LHS->getArgOperand(i: 0);
1439 Value *B = LHS->getArgOperand(i: 1);
1440 Value *C = RHS->getArgOperand(i: 0);
1441 Value *D = RHS->getArgOperand(i: 1);
1442
1443 // Look for a common operand.
1444 Value *MinMaxOp = nullptr;
1445 Value *ThirdOp = nullptr;
1446 if (LHS->hasOneUse()) {
1447 // If the LHS is only used in this chain and the RHS is used outside of it,
1448 // reuse the RHS min/max because that will eliminate the LHS.
1449 if (D == A || C == A) {
1450 // min(min(a, b), min(c, a)) --> min(min(c, a), b)
1451 // min(min(a, b), min(a, d)) --> min(min(a, d), b)
1452 MinMaxOp = RHS;
1453 ThirdOp = B;
1454 } else if (D == B || C == B) {
1455 // min(min(a, b), min(c, b)) --> min(min(c, b), a)
1456 // min(min(a, b), min(b, d)) --> min(min(b, d), a)
1457 MinMaxOp = RHS;
1458 ThirdOp = A;
1459 }
1460 } else {
1461 assert(RHS->hasOneUse() && "Expected one-use operand");
1462 // Reuse the LHS. This will eliminate the RHS.
1463 if (D == A || D == B) {
1464 // min(min(a, b), min(c, a)) --> min(min(a, b), c)
1465 // min(min(a, b), min(c, b)) --> min(min(a, b), c)
1466 MinMaxOp = LHS;
1467 ThirdOp = C;
1468 } else if (C == A || C == B) {
1469 // min(min(a, b), min(b, d)) --> min(min(a, b), d)
1470 // min(min(a, b), min(c, b)) --> min(min(a, b), d)
1471 MinMaxOp = LHS;
1472 ThirdOp = D;
1473 }
1474 }
1475
1476 if (!MinMaxOp || !ThirdOp)
1477 return nullptr;
1478
1479 Module *Mod = II->getModule();
1480 Function *MinMax =
1481 Intrinsic::getOrInsertDeclaration(M: Mod, id: MinMaxID, Tys: II->getType());
1482 return CallInst::Create(Func: MinMax, Args: { MinMaxOp, ThirdOp });
1483}
1484
1485/// If all arguments of the intrinsic are unary shuffles with the same mask,
1486/// try to shuffle after the intrinsic.
1487Instruction *
1488InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) {
1489 if (!II->getType()->isVectorTy() ||
1490 !isTriviallyVectorizable(ID: II->getIntrinsicID()) ||
1491 !II->getCalledFunction()->isSpeculatable())
1492 return nullptr;
1493
1494 Value *X;
1495 Constant *C;
1496 ArrayRef<int> Mask;
1497 auto *NonConstArg = find_if_not(Range: II->args(), P: [&II](Use &Arg) {
1498 return isa<Constant>(Val: Arg.get()) ||
1499 isVectorIntrinsicWithScalarOpAtArg(ID: II->getIntrinsicID(),
1500 ScalarOpdIdx: Arg.getOperandNo(), TTI: nullptr);
1501 });
1502 if (!NonConstArg ||
1503 !match(V: NonConstArg, P: m_Shuffle(v1: m_Value(V&: X), v2: m_Poison(), mask: m_Mask(Mask))))
1504 return nullptr;
1505
1506 // At least 1 operand must be a shuffle with 1 use because we are creating 2
1507 // instructions.
1508 if (none_of(Range: II->args(), P: match_fn(P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(), v2: m_Value())))))
1509 return nullptr;
1510
1511 // See if all arguments are shuffled with the same mask.
1512 SmallVector<Value *, 4> NewArgs;
1513 Type *SrcTy = X->getType();
1514 for (Use &Arg : II->args()) {
1515 if (isVectorIntrinsicWithScalarOpAtArg(ID: II->getIntrinsicID(),
1516 ScalarOpdIdx: Arg.getOperandNo(), TTI: nullptr))
1517 NewArgs.push_back(Elt: Arg);
1518 else if (match(V: &Arg,
1519 P: m_Shuffle(v1: m_Value(V&: X), v2: m_Poison(), mask: m_SpecificMask(Mask))) &&
1520 X->getType() == SrcTy)
1521 NewArgs.push_back(Elt: X);
1522 else if (match(V: &Arg, P: m_ImmConstant(C))) {
1523 // If it's a constant, try find the constant that would be shuffled to C.
1524 if (Constant *ShuffledC =
1525 unshuffleConstant(ShMask: Mask, C, NewCTy: cast<VectorType>(Val: SrcTy)))
1526 NewArgs.push_back(Elt: ShuffledC);
1527 else
1528 return nullptr;
1529 } else
1530 return nullptr;
1531 }
1532
1533 // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
1534 Instruction *FPI = isa<FPMathOperator>(Val: II) ? II : nullptr;
1535 // Result type might be a different vector width.
1536 // TODO: Check that the result type isn't widened?
1537 VectorType *ResTy =
1538 VectorType::get(ElementType: II->getType()->getScalarType(), Other: cast<VectorType>(Val: SrcTy));
1539 Value *NewIntrinsic =
1540 Builder.CreateIntrinsic(RetTy: ResTy, ID: II->getIntrinsicID(), Args: NewArgs, FMFSource: FPI);
1541 return new ShuffleVectorInst(NewIntrinsic, Mask);
1542}
1543
1544/// If all arguments of the intrinsic are reverses, try to pull the reverse
1545/// after the intrinsic.
1546Value *InstCombinerImpl::foldReversedIntrinsicOperands(IntrinsicInst *II) {
1547 if (!isTriviallyVectorizable(ID: II->getIntrinsicID()))
1548 return nullptr;
1549
1550 // At least 1 operand must be a reverse with 1 use because we are creating 2
1551 // instructions.
1552 if (none_of(Range: II->args(), P: [](Value *V) {
1553 return match(V, P: m_OneUse(SubPattern: m_VecReverse(Op0: m_Value())));
1554 }))
1555 return nullptr;
1556
1557 Value *X;
1558 Constant *C;
1559 SmallVector<Value *> NewArgs;
1560 for (Use &Arg : II->args()) {
1561 if (isVectorIntrinsicWithScalarOpAtArg(ID: II->getIntrinsicID(),
1562 ScalarOpdIdx: Arg.getOperandNo(), TTI: nullptr))
1563 NewArgs.push_back(Elt: Arg);
1564 else if (match(V: &Arg, P: m_VecReverse(Op0: m_Value(V&: X))))
1565 NewArgs.push_back(Elt: X);
1566 else if (isSplatValue(V: Arg))
1567 NewArgs.push_back(Elt: Arg);
1568 else if (match(V: &Arg, P: m_ImmConstant(C)))
1569 NewArgs.push_back(Elt: Builder.CreateVectorReverse(V: C));
1570 else
1571 return nullptr;
1572 }
1573
1574 // intrinsic (reverse X), (reverse Y), ... --> reverse (intrinsic X, Y, ...)
1575 Instruction *FPI = isa<FPMathOperator>(Val: II) ? II : nullptr;
1576 Instruction *NewIntrinsic = Builder.CreateIntrinsic(
1577 RetTy: II->getType(), ID: II->getIntrinsicID(), Args: NewArgs, FMFSource: FPI);
1578 return Builder.CreateVectorReverse(V: NewIntrinsic);
1579}
1580
1581/// Fold the following cases and accepts bswap and bitreverse intrinsics:
1582/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
1583/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
1584template <Intrinsic::ID IntrID>
1585static Instruction *foldBitOrderCrossLogicOp(Value *V,
1586 InstCombiner::BuilderTy &Builder) {
1587 static_assert(IntrID == Intrinsic::bswap || IntrID == Intrinsic::bitreverse,
1588 "This helper only supports BSWAP and BITREVERSE intrinsics");
1589
1590 Value *X, *Y;
1591 // Find bitwise logic op. Check that it is a BinaryOperator explicitly so we
1592 // don't match ConstantExpr that aren't meaningful for this transform.
1593 if (match(V, P: m_OneUse(SubPattern: m_BitwiseLogic(L: m_Value(V&: X), R: m_Value(V&: Y)))) &&
1594 isa<BinaryOperator>(Val: V)) {
1595 Value *OldReorderX, *OldReorderY;
1596 BinaryOperator::BinaryOps Op = cast<BinaryOperator>(Val: V)->getOpcode();
1597
1598 // If both X and Y are bswap/bitreverse, the transform reduces the number
1599 // of instructions even if there's multiuse.
1600 // If only one operand is bswap/bitreverse, we need to ensure the operand
1601 // have only one use.
1602 if (match(X, m_Intrinsic<IntrID>(m_Value(V&: OldReorderX))) &&
1603 match(Y, m_Intrinsic<IntrID>(m_Value(V&: OldReorderY)))) {
1604 return BinaryOperator::Create(Op, S1: OldReorderX, S2: OldReorderY);
1605 }
1606
1607 if (match(X, m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: OldReorderX))))) {
1608 Value *NewReorder = Builder.CreateUnaryIntrinsic(ID: IntrID, V: Y);
1609 return BinaryOperator::Create(Op, S1: OldReorderX, S2: NewReorder);
1610 }
1611
1612 if (match(Y, m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: OldReorderY))))) {
1613 Value *NewReorder = Builder.CreateUnaryIntrinsic(ID: IntrID, V: X);
1614 return BinaryOperator::Create(Op, S1: NewReorder, S2: OldReorderY);
1615 }
1616 }
1617 return nullptr;
1618}
1619
1620/// Helper to match idempotent binary intrinsics, namely, intrinsics where
1621/// `f(f(x, y), y) == f(x, y)` holds.
1622static bool isIdempotentBinaryIntrinsic(Intrinsic::ID IID) {
1623 switch (IID) {
1624 case Intrinsic::smax:
1625 case Intrinsic::smin:
1626 case Intrinsic::umax:
1627 case Intrinsic::umin:
1628 case Intrinsic::maximum:
1629 case Intrinsic::minimum:
1630 case Intrinsic::maximumnum:
1631 case Intrinsic::minimumnum:
1632 case Intrinsic::maxnum:
1633 case Intrinsic::minnum:
1634 return true;
1635 default:
1636 return false;
1637 }
1638}
1639
1640/// Attempt to simplify value-accumulating recurrences of kind:
1641/// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
1642/// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
1643/// And let the idempotent binary intrinsic be hoisted, when the operands are
1644/// known to be loop-invariant.
1645static Value *foldIdempotentBinaryIntrinsicRecurrence(InstCombinerImpl &IC,
1646 IntrinsicInst *II) {
1647 PHINode *PN;
1648 Value *Init, *OtherOp;
1649
1650 // A binary intrinsic recurrence with loop-invariant operands is equivalent to
1651 // `call @llvm.binary.intrinsic(Init, OtherOp)`.
1652 auto IID = II->getIntrinsicID();
1653 if (!isIdempotentBinaryIntrinsic(IID) ||
1654 !matchSimpleBinaryIntrinsicRecurrence(I: II, P&: PN, Init, OtherOp) ||
1655 !IC.getDominatorTree().dominates(Def: OtherOp, User: PN))
1656 return nullptr;
1657
1658 auto *InvariantBinaryInst =
1659 IC.Builder.CreateBinaryIntrinsic(ID: IID, LHS: Init, RHS: OtherOp);
1660 if (isa<FPMathOperator>(Val: InvariantBinaryInst))
1661 cast<Instruction>(Val: InvariantBinaryInst)->copyFastMathFlags(I: II);
1662 return InvariantBinaryInst;
1663}
1664
1665static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
1666 if (!CanReorderLanes)
1667 return nullptr;
1668
1669 Value *V;
1670 if (match(V: Arg, P: m_VecReverse(Op0: m_Value(V))))
1671 return V;
1672
1673 ArrayRef<int> Mask;
1674 if (!isa<FixedVectorType>(Val: Arg->getType()) ||
1675 !match(V: Arg, P: m_Shuffle(v1: m_Value(V), v2: m_Undef(), mask: m_Mask(Mask))) ||
1676 !cast<ShuffleVectorInst>(Val: Arg)->isSingleSource())
1677 return nullptr;
1678
1679 int Sz = Mask.size();
1680 SmallBitVector UsedIndices(Sz);
1681 for (int Idx : Mask) {
1682 if (Idx == PoisonMaskElem || UsedIndices.test(Idx))
1683 return nullptr;
1684 UsedIndices.set(Idx);
1685 }
1686
1687 // Can remove shuffle iff just shuffled elements, no repeats, undefs, or
1688 // other changes.
1689 return UsedIndices.all() ? V : nullptr;
1690}
1691
1692/// Fold an unsigned minimum of trailing or leading zero bits counts:
1693/// umin(cttz(CtOp1, ZeroUndef), ConstOp) --> cttz(CtOp1 | (1 << ConstOp))
1694/// umin(ctlz(CtOp1, ZeroUndef), ConstOp) --> ctlz(CtOp1 | (SignedMin
1695/// >> ConstOp))
1696/// umin(cttz(CtOp1), cttz(CtOp2)) --> cttz(CtOp1 | CtOp2)
1697/// umin(ctlz(CtOp1), ctlz(CtOp2)) --> ctlz(CtOp1 | CtOp2)
1698template <Intrinsic::ID IntrID>
1699static Value *
1700foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1,
1701 const DataLayout &DL,
1702 InstCombiner::BuilderTy &Builder) {
1703 static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz,
1704 "This helper only supports cttz and ctlz intrinsics");
1705
1706 Value *CtOp1, *CtOp2;
1707 Value *ZeroUndef1, *ZeroUndef2;
1708 if (!match(I0, m_OneUse(
1709 m_Intrinsic<IntrID>(m_Value(V&: CtOp1), m_Value(V&: ZeroUndef1)))))
1710 return nullptr;
1711
1712 if (match(I1,
1713 m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: CtOp2), m_Value(V&: ZeroUndef2)))))
1714 return Builder.CreateBinaryIntrinsic(
1715 ID: IntrID, LHS: Builder.CreateOr(LHS: CtOp1, RHS: CtOp2),
1716 RHS: Builder.CreateOr(LHS: ZeroUndef1, RHS: ZeroUndef2));
1717
1718 unsigned BitWidth = I1->getType()->getScalarSizeInBits();
1719 auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
1720 if (!match(I1, m_CheckedInt(LessBitWidth)))
1721 // We have a constant >= BitWidth (which can be handled by CVP)
1722 // or a non-splat vector with elements < and >= BitWidth
1723 return nullptr;
1724
1725 Type *Ty = I1->getType();
1726 Constant *NewConst = ConstantFoldBinaryOpOperands(
1727 Opcode: IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr,
1728 LHS: IntrID == Intrinsic::cttz
1729 ? ConstantInt::get(Ty, V: 1)
1730 : ConstantInt::get(Ty, V: APInt::getSignedMinValue(numBits: BitWidth)),
1731 RHS: cast<Constant>(Val: I1), DL);
1732 return Builder.CreateBinaryIntrinsic(
1733 ID: IntrID, LHS: Builder.CreateOr(LHS: CtOp1, RHS: NewConst),
1734 RHS: ConstantInt::getTrue(Ty: ZeroUndef1->getType()));
1735}
1736
1737/// Return whether "X LOp (Y ROp Z)" is always equal to
1738/// "(X LOp Y) ROp (X LOp Z)".
1739static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW,
1740 bool HasNSW, Intrinsic::ID ROp) {
1741 switch (ROp) {
1742 case Intrinsic::umax:
1743 case Intrinsic::umin:
1744 if (HasNUW && LOp == Instruction::Add)
1745 return true;
1746 if (HasNUW && LOp == Instruction::Shl)
1747 return true;
1748 return false;
1749 case Intrinsic::smax:
1750 case Intrinsic::smin:
1751 return HasNSW && LOp == Instruction::Add;
1752 default:
1753 return false;
1754 }
1755}
1756
1757// Attempts to factorise a common term
1758// in an instruction that has the form "(A op' B) op (C op' D)
1759// where op is an intrinsic and op' is a binop
1760static Value *
1761foldIntrinsicUsingDistributiveLaws(IntrinsicInst *II,
1762 InstCombiner::BuilderTy &Builder) {
1763 Value *LHS = II->getOperand(i_nocapture: 0), *RHS = II->getOperand(i_nocapture: 1);
1764 Intrinsic::ID TopLevelOpcode = II->getIntrinsicID();
1765
1766 OverflowingBinaryOperator *Op0 = dyn_cast<OverflowingBinaryOperator>(Val: LHS);
1767 OverflowingBinaryOperator *Op1 = dyn_cast<OverflowingBinaryOperator>(Val: RHS);
1768
1769 if (!Op0 || !Op1)
1770 return nullptr;
1771
1772 if (Op0->getOpcode() != Op1->getOpcode())
1773 return nullptr;
1774
1775 if (!Op0->hasOneUse() || !Op1->hasOneUse())
1776 return nullptr;
1777
1778 Instruction::BinaryOps InnerOpcode =
1779 static_cast<Instruction::BinaryOps>(Op0->getOpcode());
1780 bool HasNUW = Op0->hasNoUnsignedWrap() && Op1->hasNoUnsignedWrap();
1781 bool HasNSW = Op0->hasNoSignedWrap() && Op1->hasNoSignedWrap();
1782
1783 if (!leftDistributesOverRight(LOp: InnerOpcode, HasNUW, HasNSW, ROp: TopLevelOpcode))
1784 return nullptr;
1785
1786 Value *A = Op0->getOperand(i_nocapture: 0);
1787 Value *B = Op0->getOperand(i_nocapture: 1);
1788 Value *C = Op1->getOperand(i_nocapture: 0);
1789 Value *D = Op1->getOperand(i_nocapture: 1);
1790
1791 // Attempts to swap variables such that A equals C or B equals D,
1792 // if the inner operation is commutative.
1793 if (Op0->isCommutative() && A != C && B != D) {
1794 if (A == D || B == C)
1795 std::swap(a&: C, b&: D);
1796 else
1797 return nullptr;
1798 }
1799
1800 BinaryOperator *NewBinop;
1801 if (A == C) {
1802 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(ID: TopLevelOpcode, LHS: B, RHS: D);
1803 NewBinop =
1804 cast<BinaryOperator>(Val: Builder.CreateBinOp(Opc: InnerOpcode, LHS: A, RHS: NewIntrinsic));
1805 } else if (B == D) {
1806 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(ID: TopLevelOpcode, LHS: A, RHS: C);
1807 NewBinop =
1808 cast<BinaryOperator>(Val: Builder.CreateBinOp(Opc: InnerOpcode, LHS: NewIntrinsic, RHS: B));
1809 } else {
1810 return nullptr;
1811 }
1812
1813 NewBinop->setHasNoUnsignedWrap(HasNUW);
1814 NewBinop->setHasNoSignedWrap(HasNSW);
1815
1816 return NewBinop;
1817}
1818
1819static Instruction *foldNeonShift(IntrinsicInst *II, InstCombinerImpl &IC) {
1820 Value *Arg0 = II->getArgOperand(i: 0);
1821 auto *ShiftConst = dyn_cast<Constant>(Val: II->getArgOperand(i: 1));
1822 if (!ShiftConst)
1823 return nullptr;
1824
1825 int ElemBits = Arg0->getType()->getScalarSizeInBits();
1826 bool AllPositive = true;
1827 bool AllNegative = true;
1828
1829 auto Check = [&](Constant *C) -> bool {
1830 if (auto *CI = dyn_cast_or_null<ConstantInt>(Val: C)) {
1831 const APInt &V = CI->getValue();
1832 if (V.isNonNegative()) {
1833 AllNegative = false;
1834 return AllPositive && V.ult(RHS: ElemBits);
1835 }
1836 AllPositive = false;
1837 return AllNegative && V.sgt(RHS: -ElemBits);
1838 }
1839 return false;
1840 };
1841
1842 if (auto *VTy = dyn_cast<FixedVectorType>(Val: Arg0->getType())) {
1843 for (unsigned I = 0, E = VTy->getNumElements(); I < E; ++I) {
1844 if (!Check(ShiftConst->getAggregateElement(Elt: I)))
1845 return nullptr;
1846 }
1847
1848 } else if (!Check(ShiftConst))
1849 return nullptr;
1850
1851 IRBuilderBase &B = IC.Builder;
1852 if (AllPositive)
1853 return IC.replaceInstUsesWith(I&: *II, V: B.CreateShl(LHS: Arg0, RHS: ShiftConst));
1854
1855 Value *NegAmt = B.CreateNeg(V: ShiftConst);
1856 Intrinsic::ID IID = II->getIntrinsicID();
1857 const bool IsSigned =
1858 IID == Intrinsic::arm_neon_vshifts || IID == Intrinsic::aarch64_neon_sshl;
1859 Value *Result =
1860 IsSigned ? B.CreateAShr(LHS: Arg0, RHS: NegAmt) : B.CreateLShr(LHS: Arg0, RHS: NegAmt);
1861 return IC.replaceInstUsesWith(I&: *II, V: Result);
1862}
1863
1864/// CallInst simplification. This mostly only handles folding of intrinsic
1865/// instructions. For normal calls, it allows visitCallBase to do the heavy
1866/// lifting.
1867Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
1868 // Don't try to simplify calls without uses. It will not do anything useful,
1869 // but will result in the following folds being skipped.
1870 if (!CI.use_empty()) {
1871 SmallVector<Value *, 8> Args(CI.args());
1872 if (Value *V = simplifyCall(Call: &CI, Callee: CI.getCalledOperand(), Args,
1873 Q: SQ.getWithInstruction(I: &CI)))
1874 return replaceInstUsesWith(I&: CI, V);
1875 }
1876
1877 if (Value *FreedOp = getFreedOperand(CB: &CI, TLI: &TLI))
1878 return visitFree(FI&: CI, FreedOp);
1879
1880 // If the caller function (i.e. us, the function that contains this CallInst)
1881 // is nounwind, mark the call as nounwind, even if the callee isn't.
1882 if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1883 CI.setDoesNotThrow();
1884 return &CI;
1885 }
1886
1887 IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: &CI);
1888 if (!II)
1889 return visitCallBase(Call&: CI);
1890
1891 // Intrinsics cannot occur in an invoke or a callbr, so handle them here
1892 // instead of in visitCallBase.
1893 if (auto *MI = dyn_cast<AnyMemIntrinsic>(Val: II)) {
1894 if (auto NumBytes = MI->getLengthInBytes()) {
1895 // memmove/cpy/set of zero bytes is a noop.
1896 if (NumBytes->isZero())
1897 return eraseInstFromFunction(I&: CI);
1898
1899 // For atomic unordered mem intrinsics if len is not a positive or
1900 // not a multiple of element size then behavior is undefined.
1901 if (MI->isAtomic() &&
1902 (NumBytes->isNegative() ||
1903 (NumBytes->getZExtValue() % MI->getElementSizeInBytes() != 0))) {
1904 CreateNonTerminatorUnreachable(InsertAt: MI);
1905 assert(MI->getType()->isVoidTy() &&
1906 "non void atomic unordered mem intrinsic");
1907 return eraseInstFromFunction(I&: *MI);
1908 }
1909 }
1910
1911 // No other transformations apply to volatile transfers.
1912 if (MI->isVolatile())
1913 return nullptr;
1914
1915 if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(Val: MI)) {
1916 // memmove(x,x,size) -> noop.
1917 if (MTI->getSource() == MTI->getDest())
1918 return eraseInstFromFunction(I&: CI);
1919 }
1920
1921 auto IsPointerUndefined = [MI](Value *Ptr) {
1922 return isa<ConstantPointerNull>(Val: Ptr) &&
1923 !NullPointerIsDefined(
1924 F: MI->getFunction(),
1925 AS: cast<PointerType>(Val: Ptr->getType())->getAddressSpace());
1926 };
1927 bool SrcIsUndefined = false;
1928 // If we can determine a pointer alignment that is bigger than currently
1929 // set, update the alignment.
1930 if (auto *MTI = dyn_cast<AnyMemTransferInst>(Val: MI)) {
1931 if (Instruction *I = SimplifyAnyMemTransfer(MI: MTI))
1932 return I;
1933 SrcIsUndefined = IsPointerUndefined(MTI->getRawSource());
1934 } else if (auto *MSI = dyn_cast<AnyMemSetInst>(Val: MI)) {
1935 if (Instruction *I = SimplifyAnyMemSet(MI: MSI))
1936 return I;
1937 }
1938
1939 // If src/dest is null, this memory intrinsic must be a noop.
1940 if (SrcIsUndefined || IsPointerUndefined(MI->getRawDest())) {
1941 Builder.CreateAssumption(Cond: Builder.CreateIsNull(Arg: MI->getLength()));
1942 return eraseInstFromFunction(I&: CI);
1943 }
1944
1945 // If we have a memmove and the source operation is a constant global,
1946 // then the source and dest pointers can't alias, so we can change this
1947 // into a call to memcpy.
1948 if (auto *MMI = dyn_cast<AnyMemMoveInst>(Val: MI)) {
1949 if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(Val: MMI->getSource()))
1950 if (GVSrc->isConstant()) {
1951 Module *M = CI.getModule();
1952 Intrinsic::ID MemCpyID =
1953 MMI->isAtomic()
1954 ? Intrinsic::memcpy_element_unordered_atomic
1955 : Intrinsic::memcpy;
1956 Type *Tys[3] = { CI.getArgOperand(i: 0)->getType(),
1957 CI.getArgOperand(i: 1)->getType(),
1958 CI.getArgOperand(i: 2)->getType() };
1959 CI.setCalledFunction(
1960 Intrinsic::getOrInsertDeclaration(M, id: MemCpyID, Tys));
1961 return II;
1962 }
1963 }
1964 }
1965
1966 // For fixed width vector result intrinsics, use the generic demanded vector
1967 // support.
1968 if (auto *IIFVTy = dyn_cast<FixedVectorType>(Val: II->getType())) {
1969 auto VWidth = IIFVTy->getNumElements();
1970 APInt PoisonElts(VWidth, 0);
1971 APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
1972 if (Value *V = SimplifyDemandedVectorElts(V: II, DemandedElts: AllOnesEltMask, PoisonElts)) {
1973 if (V != II)
1974 return replaceInstUsesWith(I&: *II, V);
1975 return II;
1976 }
1977 }
1978
1979 if (II->isCommutative()) {
1980 if (auto Pair = matchSymmetricPair(LHS: II->getOperand(i_nocapture: 0), RHS: II->getOperand(i_nocapture: 1))) {
1981 replaceOperand(I&: *II, OpNum: 0, V: Pair->first);
1982 replaceOperand(I&: *II, OpNum: 1, V: Pair->second);
1983 return II;
1984 }
1985
1986 if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(Call&: CI))
1987 return NewCall;
1988 }
1989
1990 // Unused constrained FP intrinsic calls may have declared side effect, which
1991 // prevents it from being removed. In some cases however the side effect is
1992 // actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
1993 // returns a replacement, the call may be removed.
1994 if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(Val: CI)) {
1995 if (simplifyConstrainedFPCall(Call: &CI, Q: SQ.getWithInstruction(I: &CI)))
1996 return eraseInstFromFunction(I&: CI);
1997 }
1998
1999 Intrinsic::ID IID = II->getIntrinsicID();
2000 switch (IID) {
2001 case Intrinsic::objectsize: {
2002 SmallVector<Instruction *> InsertedInstructions;
2003 if (Value *V = lowerObjectSizeCall(ObjectSize: II, DL, TLI: &TLI, AA, /*MustSucceed=*/false,
2004 InsertedInstructions: &InsertedInstructions)) {
2005 for (Instruction *Inserted : InsertedInstructions)
2006 Worklist.add(I: Inserted);
2007 return replaceInstUsesWith(I&: CI, V);
2008 }
2009 return nullptr;
2010 }
2011 case Intrinsic::abs: {
2012 Value *IIOperand = II->getArgOperand(i: 0);
2013 bool IntMinIsPoison = cast<Constant>(Val: II->getArgOperand(i: 1))->isOneValue();
2014
2015 // abs(-x) -> abs(x)
2016 Value *X;
2017 if (match(V: IIOperand, P: m_Neg(V: m_Value(V&: X)))) {
2018 if (cast<Instruction>(Val: IIOperand)->hasNoSignedWrap() || IntMinIsPoison)
2019 replaceOperand(I&: *II, OpNum: 1, V: Builder.getTrue());
2020 return replaceOperand(I&: *II, OpNum: 0, V: X);
2021 }
2022 if (match(V: IIOperand, P: m_c_Select(L: m_Neg(V: m_Value(V&: X)), R: m_Deferred(V: X))))
2023 return replaceOperand(I&: *II, OpNum: 0, V: X);
2024
2025 Value *Y;
2026 // abs(a * abs(b)) -> abs(a * b)
2027 if (match(V: IIOperand,
2028 P: m_OneUse(SubPattern: m_c_Mul(L: m_Value(V&: X),
2029 R: m_Intrinsic<Intrinsic::abs>(Op0: m_Value(V&: Y)))))) {
2030 bool NSW =
2031 cast<Instruction>(Val: IIOperand)->hasNoSignedWrap() && IntMinIsPoison;
2032 auto *XY = NSW ? Builder.CreateNSWMul(LHS: X, RHS: Y) : Builder.CreateMul(LHS: X, RHS: Y);
2033 return replaceOperand(I&: *II, OpNum: 0, V: XY);
2034 }
2035
2036 if (std::optional<bool> Known =
2037 getKnownSignOrZero(Op: IIOperand, SQ: SQ.getWithInstruction(I: II))) {
2038 // abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)
2039 // abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)
2040 if (!*Known)
2041 return replaceInstUsesWith(I&: *II, V: IIOperand);
2042
2043 // abs(x) -> -x if x < 0
2044 // abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y)
2045 if (IntMinIsPoison)
2046 return BinaryOperator::CreateNSWNeg(Op: IIOperand);
2047 return BinaryOperator::CreateNeg(Op: IIOperand);
2048 }
2049
2050 // abs (sext X) --> zext (abs X*)
2051 // Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
2052 if (match(V: IIOperand, P: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: X))))) {
2053 Value *NarrowAbs =
2054 Builder.CreateBinaryIntrinsic(ID: Intrinsic::abs, LHS: X, RHS: Builder.getFalse());
2055 return CastInst::Create(Instruction::ZExt, S: NarrowAbs, Ty: II->getType());
2056 }
2057
2058 // Match a complicated way to check if a number is odd/even:
2059 // abs (srem X, 2) --> and X, 1
2060 const APInt *C;
2061 if (match(V: IIOperand, P: m_SRem(L: m_Value(V&: X), R: m_APInt(Res&: C))) && *C == 2)
2062 return BinaryOperator::CreateAnd(V1: X, V2: ConstantInt::get(Ty: II->getType(), V: 1));
2063
2064 break;
2065 }
2066 case Intrinsic::umin: {
2067 Value *I0 = II->getArgOperand(i: 0), *I1 = II->getArgOperand(i: 1);
2068 // umin(x, 1) == zext(x != 0)
2069 if (match(V: I1, P: m_One())) {
2070 assert(II->getType()->getScalarSizeInBits() != 1 &&
2071 "Expected simplify of umin with max constant");
2072 Value *Zero = Constant::getNullValue(Ty: I0->getType());
2073 Value *Cmp = Builder.CreateICmpNE(LHS: I0, RHS: Zero);
2074 return CastInst::Create(Instruction::ZExt, S: Cmp, Ty: II->getType());
2075 }
2076 // umin(cttz(x), const) --> cttz(x | (1 << const))
2077 if (Value *FoldedCttz =
2078 foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::cttz>(
2079 I0, I1, DL, Builder))
2080 return replaceInstUsesWith(I&: *II, V: FoldedCttz);
2081 // umin(ctlz(x), const) --> ctlz(x | (SignedMin >> const))
2082 if (Value *FoldedCtlz =
2083 foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::ctlz>(
2084 I0, I1, DL, Builder))
2085 return replaceInstUsesWith(I&: *II, V: FoldedCtlz);
2086 [[fallthrough]];
2087 }
2088 case Intrinsic::umax: {
2089 Value *I0 = II->getArgOperand(i: 0), *I1 = II->getArgOperand(i: 1);
2090 Value *X, *Y;
2091 if (match(V: I0, P: m_ZExt(Op: m_Value(V&: X))) && match(V: I1, P: m_ZExt(Op: m_Value(V&: Y))) &&
2092 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2093 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y);
2094 return CastInst::Create(Instruction::ZExt, S: NarrowMaxMin, Ty: II->getType());
2095 }
2096 Constant *C;
2097 if (match(V: I0, P: m_ZExt(Op: m_Value(V&: X))) && match(V: I1, P: m_Constant(C)) &&
2098 I0->hasOneUse()) {
2099 if (Constant *NarrowC = getLosslessUnsignedTrunc(C, DestTy: X->getType(), DL)) {
2100 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: NarrowC);
2101 return CastInst::Create(Instruction::ZExt, S: NarrowMaxMin, Ty: II->getType());
2102 }
2103 }
2104 // If C is not 0:
2105 // umax(nuw_shl(x, C), x + 1) -> x == 0 ? 1 : nuw_shl(x, C)
2106 // If C is not 0 or 1:
2107 // umax(nuw_mul(x, C), x + 1) -> x == 0 ? 1 : nuw_mul(x, C)
2108 auto foldMaxMulShift = [&](Value *A, Value *B) -> Instruction * {
2109 const APInt *C;
2110 Value *X;
2111 if (!match(V: A, P: m_NUWShl(L: m_Value(V&: X), R: m_APInt(Res&: C))) &&
2112 !(match(V: A, P: m_NUWMul(L: m_Value(V&: X), R: m_APInt(Res&: C))) && !C->isOne()))
2113 return nullptr;
2114 if (C->isZero())
2115 return nullptr;
2116 if (!match(V: B, P: m_OneUse(SubPattern: m_Add(L: m_Specific(V: X), R: m_One()))))
2117 return nullptr;
2118
2119 Value *Cmp = Builder.CreateICmpEQ(LHS: X, RHS: ConstantInt::get(Ty: X->getType(), V: 0));
2120 Value *NewSelect =
2121 Builder.CreateSelect(C: Cmp, True: ConstantInt::get(Ty: X->getType(), V: 1), False: A);
2122 return replaceInstUsesWith(I&: *II, V: NewSelect);
2123 };
2124
2125 if (IID == Intrinsic::umax) {
2126 if (Instruction *I = foldMaxMulShift(I0, I1))
2127 return I;
2128 if (Instruction *I = foldMaxMulShift(I1, I0))
2129 return I;
2130 }
2131
2132 // If both operands of unsigned min/max are sign-extended, it is still ok
2133 // to narrow the operation.
2134 [[fallthrough]];
2135 }
2136 case Intrinsic::smax:
2137 case Intrinsic::smin: {
2138 Value *I0 = II->getArgOperand(i: 0), *I1 = II->getArgOperand(i: 1);
2139 Value *X, *Y;
2140 if (match(V: I0, P: m_SExt(Op: m_Value(V&: X))) && match(V: I1, P: m_SExt(Op: m_Value(V&: Y))) &&
2141 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2142 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y);
2143 return CastInst::Create(Instruction::SExt, S: NarrowMaxMin, Ty: II->getType());
2144 }
2145
2146 Constant *C;
2147 if (match(V: I0, P: m_SExt(Op: m_Value(V&: X))) && match(V: I1, P: m_Constant(C)) &&
2148 I0->hasOneUse()) {
2149 if (Constant *NarrowC = getLosslessSignedTrunc(C, DestTy: X->getType(), DL)) {
2150 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: NarrowC);
2151 return CastInst::Create(Instruction::SExt, S: NarrowMaxMin, Ty: II->getType());
2152 }
2153 }
2154
2155 // smax(smin(X, MinC), MaxC) -> smin(smax(X, MaxC), MinC) if MinC s>= MaxC
2156 // umax(umin(X, MinC), MaxC) -> umin(umax(X, MaxC), MinC) if MinC u>= MaxC
2157 const APInt *MinC, *MaxC;
2158 auto CreateCanonicalClampForm = [&](bool IsSigned) {
2159 auto MaxIID = IsSigned ? Intrinsic::smax : Intrinsic::umax;
2160 auto MinIID = IsSigned ? Intrinsic::smin : Intrinsic::umin;
2161 Value *NewMax = Builder.CreateBinaryIntrinsic(
2162 ID: MaxIID, LHS: X, RHS: ConstantInt::get(Ty: X->getType(), V: *MaxC));
2163 return replaceInstUsesWith(
2164 I&: *II, V: Builder.CreateBinaryIntrinsic(
2165 ID: MinIID, LHS: NewMax, RHS: ConstantInt::get(Ty: X->getType(), V: *MinC)));
2166 };
2167 if (IID == Intrinsic::smax &&
2168 match(V: I0, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::smin>(Op0: m_Value(V&: X),
2169 Op1: m_APInt(Res&: MinC)))) &&
2170 match(V: I1, P: m_APInt(Res&: MaxC)) && MinC->sgt(RHS: *MaxC))
2171 return CreateCanonicalClampForm(true);
2172 if (IID == Intrinsic::umax &&
2173 match(V: I0, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::umin>(Op0: m_Value(V&: X),
2174 Op1: m_APInt(Res&: MinC)))) &&
2175 match(V: I1, P: m_APInt(Res&: MaxC)) && MinC->ugt(RHS: *MaxC))
2176 return CreateCanonicalClampForm(false);
2177
2178 // umin(i1 X, i1 Y) -> and i1 X, Y
2179 // smax(i1 X, i1 Y) -> and i1 X, Y
2180 if ((IID == Intrinsic::umin || IID == Intrinsic::smax) &&
2181 II->getType()->isIntOrIntVectorTy(BitWidth: 1)) {
2182 return BinaryOperator::CreateAnd(V1: I0, V2: I1);
2183 }
2184
2185 // umax(i1 X, i1 Y) -> or i1 X, Y
2186 // smin(i1 X, i1 Y) -> or i1 X, Y
2187 if ((IID == Intrinsic::umax || IID == Intrinsic::smin) &&
2188 II->getType()->isIntOrIntVectorTy(BitWidth: 1)) {
2189 return BinaryOperator::CreateOr(V1: I0, V2: I1);
2190 }
2191
2192 // smin(smax(X, -1), 1) -> scmp(X, 0)
2193 // smax(smin(X, 1), -1) -> scmp(X, 0)
2194 // At this point, smax(smin(X, 1), -1) is changed to smin(smax(X, -1)
2195 // And i1's have been changed to and/ors
2196 // So we only need to check for smin
2197 if (IID == Intrinsic::smin) {
2198 if (match(V: I0, P: m_OneUse(SubPattern: m_SMax(L: m_Value(V&: X), R: m_AllOnes()))) &&
2199 match(V: I1, P: m_One())) {
2200 Value *Zero = ConstantInt::get(Ty: X->getType(), V: 0);
2201 return replaceInstUsesWith(
2202 I&: CI,
2203 V: Builder.CreateIntrinsic(RetTy: II->getType(), ID: Intrinsic::scmp, Args: {X, Zero}));
2204 }
2205 }
2206
2207 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2208 // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
2209 // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
2210 // TODO: Canonicalize neg after min/max if I1 is constant.
2211 if (match(V: I0, P: m_NSWNeg(V: m_Value(V&: X))) && match(V: I1, P: m_NSWNeg(V: m_Value(V&: Y))) &&
2212 (I0->hasOneUse() || I1->hasOneUse())) {
2213 Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMaxID: IID);
2214 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(ID: InvID, LHS: X, RHS: Y);
2215 return BinaryOperator::CreateNSWNeg(Op: InvMaxMin);
2216 }
2217 }
2218
2219 // (umax X, (xor X, Pow2))
2220 // -> (or X, Pow2)
2221 // (umin X, (xor X, Pow2))
2222 // -> (and X, ~Pow2)
2223 // (smax X, (xor X, Pos_Pow2))
2224 // -> (or X, Pos_Pow2)
2225 // (smin X, (xor X, Pos_Pow2))
2226 // -> (and X, ~Pos_Pow2)
2227 // (smax X, (xor X, Neg_Pow2))
2228 // -> (and X, ~Neg_Pow2)
2229 // (smin X, (xor X, Neg_Pow2))
2230 // -> (or X, Neg_Pow2)
2231 if ((match(V: I0, P: m_c_Xor(L: m_Specific(V: I1), R: m_Value(V&: X))) ||
2232 match(V: I1, P: m_c_Xor(L: m_Specific(V: I0), R: m_Value(V&: X)))) &&
2233 isKnownToBeAPowerOfTwo(V: X, /* OrZero */ true)) {
2234 bool UseOr = IID == Intrinsic::smax || IID == Intrinsic::umax;
2235 bool UseAndN = IID == Intrinsic::smin || IID == Intrinsic::umin;
2236
2237 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2238 auto KnownSign = getKnownSign(Op: X, SQ: SQ.getWithInstruction(I: II));
2239 if (KnownSign == std::nullopt) {
2240 UseOr = false;
2241 UseAndN = false;
2242 } else if (*KnownSign /* true is Signed. */) {
2243 UseOr ^= true;
2244 UseAndN ^= true;
2245 Type *Ty = I0->getType();
2246 // Negative power of 2 must be IntMin. It's possible to be able to
2247 // prove negative / power of 2 without actually having known bits, so
2248 // just get the value by hand.
2249 X = Constant::getIntegerValue(
2250 Ty, V: APInt::getSignedMinValue(numBits: Ty->getScalarSizeInBits()));
2251 }
2252 }
2253 if (UseOr)
2254 return BinaryOperator::CreateOr(V1: I0, V2: X);
2255 else if (UseAndN)
2256 return BinaryOperator::CreateAnd(V1: I0, V2: Builder.CreateNot(V: X));
2257 }
2258
2259 // If we can eliminate ~A and Y is free to invert:
2260 // max ~A, Y --> ~(min A, ~Y)
2261 //
2262 // Examples:
2263 // max ~A, ~Y --> ~(min A, Y)
2264 // max ~A, C --> ~(min A, ~C)
2265 // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
2266 auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
2267 Value *A;
2268 if (match(V: X, P: m_OneUse(SubPattern: m_Not(V: m_Value(V&: A)))) &&
2269 !isFreeToInvert(V: A, WillInvertAllUses: A->hasOneUse())) {
2270 if (Value *NotY = getFreelyInverted(V: Y, WillInvertAllUses: Y->hasOneUse(), Builder: &Builder)) {
2271 Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMaxID: IID);
2272 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(ID: InvID, LHS: A, RHS: NotY);
2273 return BinaryOperator::CreateNot(Op: InvMaxMin);
2274 }
2275 }
2276 return nullptr;
2277 };
2278
2279 if (Instruction *I = moveNotAfterMinMax(I0, I1))
2280 return I;
2281 if (Instruction *I = moveNotAfterMinMax(I1, I0))
2282 return I;
2283
2284 if (Instruction *I = moveAddAfterMinMax(II, Builder))
2285 return I;
2286
2287 // minmax (X & NegPow2C, Y & NegPow2C) --> minmax(X, Y) & NegPow2C
2288 const APInt *RHSC;
2289 if (match(V: I0, P: m_OneUse(SubPattern: m_And(L: m_Value(V&: X), R: m_NegatedPower2(V&: RHSC)))) &&
2290 match(V: I1, P: m_OneUse(SubPattern: m_And(L: m_Value(V&: Y), R: m_SpecificInt(V: *RHSC)))))
2291 return BinaryOperator::CreateAnd(V1: Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y),
2292 V2: ConstantInt::get(Ty: II->getType(), V: *RHSC));
2293
2294 // smax(X, -X) --> abs(X)
2295 // smin(X, -X) --> -abs(X)
2296 // umax(X, -X) --> -abs(X)
2297 // umin(X, -X) --> abs(X)
2298 if (isKnownNegation(X: I0, Y: I1)) {
2299 // We can choose either operand as the input to abs(), but if we can
2300 // eliminate the only use of a value, that's better for subsequent
2301 // transforms/analysis.
2302 if (I0->hasOneUse() && !I1->hasOneUse())
2303 std::swap(a&: I0, b&: I1);
2304
2305 // This is some variant of abs(). See if we can propagate 'nsw' to the abs
2306 // operation and potentially its negation.
2307 bool IntMinIsPoison = isKnownNegation(X: I0, Y: I1, /* NeedNSW */ true);
2308 Value *Abs = Builder.CreateBinaryIntrinsic(
2309 ID: Intrinsic::abs, LHS: I0,
2310 RHS: ConstantInt::getBool(Context&: II->getContext(), V: IntMinIsPoison));
2311
2312 // We don't have a "nabs" intrinsic, so negate if needed based on the
2313 // max/min operation.
2314 if (IID == Intrinsic::smin || IID == Intrinsic::umax)
2315 Abs = Builder.CreateNeg(V: Abs, Name: "nabs", HasNSW: IntMinIsPoison);
2316 return replaceInstUsesWith(I&: CI, V: Abs);
2317 }
2318
2319 if (Instruction *Sel = foldClampRangeOfTwo(II, Builder))
2320 return Sel;
2321
2322 if (Instruction *SAdd = matchSAddSubSat(MinMax1&: *II))
2323 return SAdd;
2324
2325 if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder, SQ))
2326 return replaceInstUsesWith(I&: *II, V: NewMinMax);
2327
2328 if (Instruction *R = reassociateMinMaxWithConstantInOperand(II, Builder))
2329 return R;
2330
2331 if (Instruction *NewMinMax = factorizeMinMaxTree(II))
2332 return NewMinMax;
2333
2334 // Try to fold minmax with constant RHS based on range information
2335 if (match(V: I1, P: m_APIntAllowPoison(Res&: RHSC))) {
2336 ICmpInst::Predicate Pred =
2337 ICmpInst::getNonStrictPredicate(pred: MinMaxIntrinsic::getPredicate(ID: IID));
2338 bool IsSigned = MinMaxIntrinsic::isSigned(ID: IID);
2339 ConstantRange LHS_CR = computeConstantRangeIncludingKnownBits(
2340 V: I0, ForSigned: IsSigned, SQ: SQ.getWithInstruction(I: II));
2341 if (!LHS_CR.isFullSet()) {
2342 if (LHS_CR.icmp(Pred, Other: *RHSC))
2343 return replaceInstUsesWith(I&: *II, V: I0);
2344 if (LHS_CR.icmp(Pred: ICmpInst::getSwappedPredicate(pred: Pred), Other: *RHSC))
2345 return replaceInstUsesWith(I&: *II,
2346 V: ConstantInt::get(Ty: II->getType(), V: *RHSC));
2347 }
2348 }
2349
2350 if (Value *V = foldIntrinsicUsingDistributiveLaws(II, Builder))
2351 return replaceInstUsesWith(I&: *II, V);
2352
2353 break;
2354 }
2355 case Intrinsic::scmp: {
2356 Value *I0 = II->getArgOperand(i: 0), *I1 = II->getArgOperand(i: 1);
2357 Value *LHS, *RHS;
2358 if (match(V: I0, P: m_NSWSub(L: m_Value(V&: LHS), R: m_Value(V&: RHS))) && match(V: I1, P: m_Zero()))
2359 return replaceInstUsesWith(
2360 I&: CI,
2361 V: Builder.CreateIntrinsic(RetTy: II->getType(), ID: Intrinsic::scmp, Args: {LHS, RHS}));
2362 break;
2363 }
2364 case Intrinsic::bitreverse: {
2365 Value *IIOperand = II->getArgOperand(i: 0);
2366 // bitrev (zext i1 X to ?) --> X ? SignBitC : 0
2367 Value *X;
2368 if (match(V: IIOperand, P: m_ZExt(Op: m_Value(V&: X))) &&
2369 X->getType()->isIntOrIntVectorTy(BitWidth: 1)) {
2370 Type *Ty = II->getType();
2371 APInt SignBit = APInt::getSignMask(BitWidth: Ty->getScalarSizeInBits());
2372 return SelectInst::Create(C: X, S1: ConstantInt::get(Ty, V: SignBit),
2373 S2: ConstantInt::getNullValue(Ty));
2374 }
2375
2376 if (Instruction *crossLogicOpFold =
2377 foldBitOrderCrossLogicOp<Intrinsic::bitreverse>(V: IIOperand, Builder))
2378 return crossLogicOpFold;
2379
2380 break;
2381 }
2382 case Intrinsic::bswap: {
2383 Value *IIOperand = II->getArgOperand(i: 0);
2384
2385 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
2386 // inverse-shift-of-bswap:
2387 // bswap (shl X, Y) --> lshr (bswap X), Y
2388 // bswap (lshr X, Y) --> shl (bswap X), Y
2389 Value *X, *Y;
2390 if (match(V: IIOperand, P: m_OneUse(SubPattern: m_LogicalShift(L: m_Value(V&: X), R: m_Value(V&: Y))))) {
2391 unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
2392 if (MaskedValueIsZero(V: Y, Mask: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: 3))) {
2393 Value *NewSwap = Builder.CreateUnaryIntrinsic(ID: Intrinsic::bswap, V: X);
2394 BinaryOperator::BinaryOps InverseShift =
2395 cast<BinaryOperator>(Val: IIOperand)->getOpcode() == Instruction::Shl
2396 ? Instruction::LShr
2397 : Instruction::Shl;
2398 return BinaryOperator::Create(Op: InverseShift, S1: NewSwap, S2: Y);
2399 }
2400 }
2401
2402 KnownBits Known = computeKnownBits(V: IIOperand, CxtI: II);
2403 uint64_t LZ = alignDown(Value: Known.countMinLeadingZeros(), Align: 8);
2404 uint64_t TZ = alignDown(Value: Known.countMinTrailingZeros(), Align: 8);
2405 unsigned BW = Known.getBitWidth();
2406
2407 // bswap(x) -> shift(x) if x has exactly one "active byte"
2408 if (BW - LZ - TZ == 8) {
2409 assert(LZ != TZ && "active byte cannot be in the middle");
2410 if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
2411 return BinaryOperator::CreateNUWShl(
2412 V1: IIOperand, V2: ConstantInt::get(Ty: IIOperand->getType(), V: LZ - TZ));
2413 // -> lshr(x) if the "active byte" is in the high part of x
2414 return BinaryOperator::CreateExactLShr(
2415 V1: IIOperand, V2: ConstantInt::get(Ty: IIOperand->getType(), V: TZ - LZ));
2416 }
2417
2418 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
2419 if (match(V: IIOperand, P: m_Trunc(Op: m_BSwap(Op0: m_Value(V&: X))))) {
2420 unsigned C = X->getType()->getScalarSizeInBits() - BW;
2421 Value *CV = ConstantInt::get(Ty: X->getType(), V: C);
2422 Value *V = Builder.CreateLShr(LHS: X, RHS: CV);
2423 return new TruncInst(V, IIOperand->getType());
2424 }
2425
2426 if (Instruction *crossLogicOpFold =
2427 foldBitOrderCrossLogicOp<Intrinsic::bswap>(V: IIOperand, Builder)) {
2428 return crossLogicOpFold;
2429 }
2430
2431 // Try to fold into bitreverse if bswap is the root of the expression tree.
2432 if (Instruction *BitOp = matchBSwapOrBitReverse(I&: *II, /*MatchBSwaps*/ false,
2433 /*MatchBitReversals*/ true))
2434 return BitOp;
2435 break;
2436 }
2437 case Intrinsic::masked_load:
2438 if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(II&: *II))
2439 return replaceInstUsesWith(I&: CI, V: SimplifiedMaskedOp);
2440 break;
2441 case Intrinsic::masked_store:
2442 return simplifyMaskedStore(II&: *II);
2443 case Intrinsic::masked_gather:
2444 return simplifyMaskedGather(II&: *II);
2445 case Intrinsic::masked_scatter:
2446 return simplifyMaskedScatter(II&: *II);
2447 case Intrinsic::launder_invariant_group:
2448 case Intrinsic::strip_invariant_group:
2449 if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(II&: *II, IC&: *this))
2450 return replaceInstUsesWith(I&: *II, V: SkippedBarrier);
2451 break;
2452 case Intrinsic::powi:
2453 if (ConstantInt *Power = dyn_cast<ConstantInt>(Val: II->getArgOperand(i: 1))) {
2454 // 0 and 1 are handled in instsimplify
2455 // powi(x, -1) -> 1/x
2456 if (Power->isMinusOne())
2457 return BinaryOperator::CreateFDivFMF(V1: ConstantFP::get(Ty: CI.getType(), V: 1.0),
2458 V2: II->getArgOperand(i: 0), FMFSource: II);
2459 // powi(x, 2) -> x*x
2460 if (Power->equalsInt(V: 2))
2461 return BinaryOperator::CreateFMulFMF(V1: II->getArgOperand(i: 0),
2462 V2: II->getArgOperand(i: 0), FMFSource: II);
2463
2464 if (!Power->getValue()[0]) {
2465 Value *X;
2466 // If power is even:
2467 // powi(-x, p) -> powi(x, p)
2468 // powi(fabs(x), p) -> powi(x, p)
2469 // powi(copysign(x, y), p) -> powi(x, p)
2470 if (match(V: II->getArgOperand(i: 0), P: m_FNeg(X: m_Value(V&: X))) ||
2471 match(V: II->getArgOperand(i: 0), P: m_FAbs(Op0: m_Value(V&: X))) ||
2472 match(V: II->getArgOperand(i: 0),
2473 P: m_Intrinsic<Intrinsic::copysign>(Op0: m_Value(V&: X), Op1: m_Value())))
2474 return replaceOperand(I&: *II, OpNum: 0, V: X);
2475 }
2476 }
2477 break;
2478
2479 case Intrinsic::cttz:
2480 case Intrinsic::ctlz:
2481 if (auto *I = foldCttzCtlz(II&: *II, IC&: *this))
2482 return I;
2483 break;
2484
2485 case Intrinsic::ctpop:
2486 if (auto *I = foldCtpop(II&: *II, IC&: *this))
2487 return I;
2488 break;
2489
2490 case Intrinsic::fshl:
2491 case Intrinsic::fshr: {
2492 Value *Op0 = II->getArgOperand(i: 0), *Op1 = II->getArgOperand(i: 1);
2493 Type *Ty = II->getType();
2494 unsigned BitWidth = Ty->getScalarSizeInBits();
2495 Constant *ShAmtC;
2496 if (match(V: II->getArgOperand(i: 2), P: m_ImmConstant(C&: ShAmtC))) {
2497 // Canonicalize a shift amount constant operand to modulo the bit-width.
2498 Constant *WidthC = ConstantInt::get(Ty, V: BitWidth);
2499 Constant *ModuloC =
2500 ConstantFoldBinaryOpOperands(Opcode: Instruction::URem, LHS: ShAmtC, RHS: WidthC, DL);
2501 if (!ModuloC)
2502 return nullptr;
2503 if (ModuloC != ShAmtC)
2504 return replaceOperand(I&: *II, OpNum: 2, V: ModuloC);
2505
2506 assert(match(ConstantFoldCompareInstOperands(ICmpInst::ICMP_UGT, WidthC,
2507 ShAmtC, DL),
2508 m_One()) &&
2509 "Shift amount expected to be modulo bitwidth");
2510
2511 // Canonicalize funnel shift right by constant to funnel shift left. This
2512 // is not entirely arbitrary. For historical reasons, the backend may
2513 // recognize rotate left patterns but miss rotate right patterns.
2514 if (IID == Intrinsic::fshr) {
2515 // fshr X, Y, C --> fshl X, Y, (BitWidth - C) if C is not zero.
2516 if (!isKnownNonZero(V: ShAmtC, Q: SQ.getWithInstruction(I: II)))
2517 return nullptr;
2518
2519 Constant *LeftShiftC = ConstantExpr::getSub(C1: WidthC, C2: ShAmtC);
2520 Module *Mod = II->getModule();
2521 Function *Fshl =
2522 Intrinsic::getOrInsertDeclaration(M: Mod, id: Intrinsic::fshl, Tys: Ty);
2523 return CallInst::Create(Func: Fshl, Args: { Op0, Op1, LeftShiftC });
2524 }
2525 assert(IID == Intrinsic::fshl &&
2526 "All funnel shifts by simple constants should go left");
2527
2528 // fshl(X, 0, C) --> shl X, C
2529 // fshl(X, undef, C) --> shl X, C
2530 if (match(V: Op1, P: m_ZeroInt()) || match(V: Op1, P: m_Undef()))
2531 return BinaryOperator::CreateShl(V1: Op0, V2: ShAmtC);
2532
2533 // fshl(0, X, C) --> lshr X, (BW-C)
2534 // fshl(undef, X, C) --> lshr X, (BW-C)
2535 if (match(V: Op0, P: m_ZeroInt()) || match(V: Op0, P: m_Undef()))
2536 return BinaryOperator::CreateLShr(V1: Op1,
2537 V2: ConstantExpr::getSub(C1: WidthC, C2: ShAmtC));
2538
2539 // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
2540 if (Op0 == Op1 && BitWidth == 16 && match(V: ShAmtC, P: m_SpecificInt(V: 8))) {
2541 Module *Mod = II->getModule();
2542 Function *Bswap =
2543 Intrinsic::getOrInsertDeclaration(M: Mod, id: Intrinsic::bswap, Tys: Ty);
2544 return CallInst::Create(Func: Bswap, Args: { Op0 });
2545 }
2546 if (Instruction *BitOp =
2547 matchBSwapOrBitReverse(I&: *II, /*MatchBSwaps*/ true,
2548 /*MatchBitReversals*/ true))
2549 return BitOp;
2550
2551 // R = fshl(X, X, C2)
2552 // fshl(R, R, C1) --> fshl(X, X, (C1 + C2) % bitsize)
2553 Value *InnerOp;
2554 const APInt *ShAmtInnerC, *ShAmtOuterC;
2555 if (match(V: Op0, P: m_FShl(Op0: m_Value(V&: InnerOp), Op1: m_Deferred(V: InnerOp),
2556 Op2: m_APInt(Res&: ShAmtInnerC))) &&
2557 match(V: ShAmtC, P: m_APInt(Res&: ShAmtOuterC)) && Op0 == Op1) {
2558 APInt Sum = *ShAmtOuterC + *ShAmtInnerC;
2559 APInt Modulo = Sum.urem(RHS: APInt(Sum.getBitWidth(), BitWidth));
2560 if (Modulo.isZero())
2561 return replaceInstUsesWith(I&: *II, V: InnerOp);
2562 Constant *ModuloC = ConstantInt::get(Ty, V: Modulo);
2563 return CallInst::Create(Func: cast<IntrinsicInst>(Val: Op0)->getCalledFunction(),
2564 Args: {InnerOp, InnerOp, ModuloC});
2565 }
2566 }
2567
2568 // fshl(X, X, Neg(Y)) --> fshr(X, X, Y)
2569 // fshr(X, X, Neg(Y)) --> fshl(X, X, Y)
2570 // if BitWidth is a power-of-2
2571 Value *Y;
2572 if (Op0 == Op1 && isPowerOf2_32(Value: BitWidth) &&
2573 match(V: II->getArgOperand(i: 2), P: m_Neg(V: m_Value(V&: Y)))) {
2574 Module *Mod = II->getModule();
2575 Function *OppositeShift = Intrinsic::getOrInsertDeclaration(
2576 M: Mod, id: IID == Intrinsic::fshl ? Intrinsic::fshr : Intrinsic::fshl, Tys: Ty);
2577 return CallInst::Create(Func: OppositeShift, Args: {Op0, Op1, Y});
2578 }
2579
2580 // fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) if bitwidth is a
2581 // power-of-2
2582 if (IID == Intrinsic::fshl && isPowerOf2_32(Value: BitWidth) &&
2583 match(V: Op1, P: m_ZeroInt())) {
2584 Value *Op2 = II->getArgOperand(i: 2);
2585 Value *And = Builder.CreateAnd(LHS: Op2, RHS: ConstantInt::get(Ty, V: BitWidth - 1));
2586 return BinaryOperator::CreateShl(V1: Op0, V2: And);
2587 }
2588
2589 // Left or right might be masked.
2590 if (SimplifyDemandedInstructionBits(Inst&: *II))
2591 return &CI;
2592
2593 // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
2594 // so only the low bits of the shift amount are demanded if the bitwidth is
2595 // a power-of-2.
2596 if (!isPowerOf2_32(Value: BitWidth))
2597 break;
2598 APInt Op2Demanded = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: Log2_32_Ceil(Value: BitWidth));
2599 KnownBits Op2Known(BitWidth);
2600 if (SimplifyDemandedBits(I: II, OpNo: 2, DemandedMask: Op2Demanded, Known&: Op2Known))
2601 return &CI;
2602 break;
2603 }
2604 case Intrinsic::ptrmask: {
2605 unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType());
2606 KnownBits Known(BitWidth);
2607 if (SimplifyDemandedInstructionBits(Inst&: *II, Known))
2608 return II;
2609
2610 Value *InnerPtr, *InnerMask;
2611 bool Changed = false;
2612 // Combine:
2613 // (ptrmask (ptrmask p, A), B)
2614 // -> (ptrmask p, (and A, B))
2615 if (match(V: II->getArgOperand(i: 0),
2616 P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ptrmask>(Op0: m_Value(V&: InnerPtr),
2617 Op1: m_Value(V&: InnerMask))))) {
2618 assert(II->getArgOperand(1)->getType() == InnerMask->getType() &&
2619 "Mask types must match");
2620 // TODO: If InnerMask == Op1, we could copy attributes from inner
2621 // callsite -> outer callsite.
2622 Value *NewMask = Builder.CreateAnd(LHS: II->getArgOperand(i: 1), RHS: InnerMask);
2623 replaceOperand(I&: CI, OpNum: 0, V: InnerPtr);
2624 replaceOperand(I&: CI, OpNum: 1, V: NewMask);
2625 Changed = true;
2626 }
2627
2628 // See if we can deduce non-null.
2629 if (!CI.hasRetAttr(Kind: Attribute::NonNull) &&
2630 (Known.isNonZero() ||
2631 isKnownNonZero(V: II, Q: getSimplifyQuery().getWithInstruction(I: II)))) {
2632 CI.addRetAttr(Kind: Attribute::NonNull);
2633 Changed = true;
2634 }
2635
2636 unsigned NewAlignmentLog =
2637 std::min(a: Value::MaxAlignmentExponent,
2638 b: std::min(a: BitWidth - 1, b: Known.countMinTrailingZeros()));
2639 // Known bits will capture if we had alignment information associated with
2640 // the pointer argument.
2641 if (NewAlignmentLog > Log2(A: CI.getRetAlign().valueOrOne())) {
2642 CI.addRetAttr(Attr: Attribute::getWithAlignment(
2643 Context&: CI.getContext(), Alignment: Align(uint64_t(1) << NewAlignmentLog)));
2644 Changed = true;
2645 }
2646 if (Changed)
2647 return &CI;
2648 break;
2649 }
2650 case Intrinsic::uadd_with_overflow:
2651 case Intrinsic::sadd_with_overflow: {
2652 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2653 return I;
2654
2655 // Given 2 constant operands whose sum does not overflow:
2656 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
2657 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
2658 Value *X;
2659 const APInt *C0, *C1;
2660 Value *Arg0 = II->getArgOperand(i: 0);
2661 Value *Arg1 = II->getArgOperand(i: 1);
2662 bool IsSigned = IID == Intrinsic::sadd_with_overflow;
2663 bool HasNWAdd = IsSigned
2664 ? match(V: Arg0, P: m_NSWAddLike(L: m_Value(V&: X), R: m_APInt(Res&: C0)))
2665 : match(V: Arg0, P: m_NUWAddLike(L: m_Value(V&: X), R: m_APInt(Res&: C0)));
2666 if (HasNWAdd && match(V: Arg1, P: m_APInt(Res&: C1))) {
2667 bool Overflow;
2668 APInt NewC =
2669 IsSigned ? C1->sadd_ov(RHS: *C0, Overflow) : C1->uadd_ov(RHS: *C0, Overflow);
2670 if (!Overflow)
2671 return replaceInstUsesWith(
2672 I&: *II, V: Builder.CreateBinaryIntrinsic(
2673 ID: IID, LHS: X, RHS: ConstantInt::get(Ty: Arg1->getType(), V: NewC)));
2674 }
2675 break;
2676 }
2677
2678 case Intrinsic::umul_with_overflow:
2679 case Intrinsic::smul_with_overflow:
2680 case Intrinsic::usub_with_overflow:
2681 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2682 return I;
2683 break;
2684
2685 case Intrinsic::ssub_with_overflow: {
2686 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2687 return I;
2688
2689 Constant *C;
2690 Value *Arg0 = II->getArgOperand(i: 0);
2691 Value *Arg1 = II->getArgOperand(i: 1);
2692 // Given a constant C that is not the minimum signed value
2693 // for an integer of a given bit width:
2694 //
2695 // ssubo X, C -> saddo X, -C
2696 if (match(V: Arg1, P: m_Constant(C)) && C->isNotMinSignedValue()) {
2697 Value *NegVal = ConstantExpr::getNeg(C);
2698 // Build a saddo call that is equivalent to the discovered
2699 // ssubo call.
2700 return replaceInstUsesWith(
2701 I&: *II, V: Builder.CreateBinaryIntrinsic(ID: Intrinsic::sadd_with_overflow,
2702 LHS: Arg0, RHS: NegVal));
2703 }
2704
2705 break;
2706 }
2707
2708 case Intrinsic::uadd_sat:
2709 case Intrinsic::sadd_sat:
2710 case Intrinsic::usub_sat:
2711 case Intrinsic::ssub_sat: {
2712 SaturatingInst *SI = cast<SaturatingInst>(Val: II);
2713 Type *Ty = SI->getType();
2714 Value *Arg0 = SI->getLHS();
2715 Value *Arg1 = SI->getRHS();
2716
2717 // Make use of known overflow information.
2718 OverflowResult OR = computeOverflow(BinaryOp: SI->getBinaryOp(), IsSigned: SI->isSigned(),
2719 LHS: Arg0, RHS: Arg1, CxtI: SI);
2720 switch (OR) {
2721 case OverflowResult::MayOverflow:
2722 break;
2723 case OverflowResult::NeverOverflows:
2724 if (SI->isSigned())
2725 return BinaryOperator::CreateNSW(Opc: SI->getBinaryOp(), V1: Arg0, V2: Arg1);
2726 else
2727 return BinaryOperator::CreateNUW(Opc: SI->getBinaryOp(), V1: Arg0, V2: Arg1);
2728 case OverflowResult::AlwaysOverflowsLow: {
2729 unsigned BitWidth = Ty->getScalarSizeInBits();
2730 APInt Min = APSInt::getMinValue(numBits: BitWidth, Unsigned: !SI->isSigned());
2731 return replaceInstUsesWith(I&: *SI, V: ConstantInt::get(Ty, V: Min));
2732 }
2733 case OverflowResult::AlwaysOverflowsHigh: {
2734 unsigned BitWidth = Ty->getScalarSizeInBits();
2735 APInt Max = APSInt::getMaxValue(numBits: BitWidth, Unsigned: !SI->isSigned());
2736 return replaceInstUsesWith(I&: *SI, V: ConstantInt::get(Ty, V: Max));
2737 }
2738 }
2739
2740 // usub_sat((sub nuw C, A), C1) -> usub_sat(usub_sat(C, C1), A)
2741 // which after that:
2742 // usub_sat((sub nuw C, A), C1) -> usub_sat(C - C1, A) if C1 u< C
2743 // usub_sat((sub nuw C, A), C1) -> 0 otherwise
2744 Constant *C, *C1;
2745 Value *A;
2746 if (IID == Intrinsic::usub_sat &&
2747 match(V: Arg0, P: m_NUWSub(L: m_ImmConstant(C), R: m_Value(V&: A))) &&
2748 match(V: Arg1, P: m_ImmConstant(C&: C1))) {
2749 auto *NewC = Builder.CreateBinaryIntrinsic(ID: Intrinsic::usub_sat, LHS: C, RHS: C1);
2750 auto *NewSub =
2751 Builder.CreateBinaryIntrinsic(ID: Intrinsic::usub_sat, LHS: NewC, RHS: A);
2752 return replaceInstUsesWith(I&: *SI, V: NewSub);
2753 }
2754
2755 // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
2756 if (IID == Intrinsic::ssub_sat && match(V: Arg1, P: m_Constant(C)) &&
2757 C->isNotMinSignedValue()) {
2758 Value *NegVal = ConstantExpr::getNeg(C);
2759 return replaceInstUsesWith(
2760 I&: *II, V: Builder.CreateBinaryIntrinsic(
2761 ID: Intrinsic::sadd_sat, LHS: Arg0, RHS: NegVal));
2762 }
2763
2764 // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
2765 // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
2766 // if Val and Val2 have the same sign
2767 if (auto *Other = dyn_cast<IntrinsicInst>(Val: Arg0)) {
2768 Value *X;
2769 const APInt *Val, *Val2;
2770 APInt NewVal;
2771 bool IsUnsigned =
2772 IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
2773 if (Other->getIntrinsicID() == IID &&
2774 match(V: Arg1, P: m_APInt(Res&: Val)) &&
2775 match(V: Other->getArgOperand(i: 0), P: m_Value(V&: X)) &&
2776 match(V: Other->getArgOperand(i: 1), P: m_APInt(Res&: Val2))) {
2777 if (IsUnsigned)
2778 NewVal = Val->uadd_sat(RHS: *Val2);
2779 else if (Val->isNonNegative() == Val2->isNonNegative()) {
2780 bool Overflow;
2781 NewVal = Val->sadd_ov(RHS: *Val2, Overflow);
2782 if (Overflow) {
2783 // Both adds together may add more than SignedMaxValue
2784 // without saturating the final result.
2785 break;
2786 }
2787 } else {
2788 // Cannot fold saturated addition with different signs.
2789 break;
2790 }
2791
2792 return replaceInstUsesWith(
2793 I&: *II, V: Builder.CreateBinaryIntrinsic(
2794 ID: IID, LHS: X, RHS: ConstantInt::get(Ty: II->getType(), V: NewVal)));
2795 }
2796 }
2797 break;
2798 }
2799
2800 case Intrinsic::minnum:
2801 case Intrinsic::maxnum:
2802 case Intrinsic::minimum:
2803 case Intrinsic::maximum: {
2804 Value *Arg0 = II->getArgOperand(i: 0);
2805 Value *Arg1 = II->getArgOperand(i: 1);
2806 Value *X, *Y;
2807 if (match(V: Arg0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Arg1, P: m_FNeg(X: m_Value(V&: Y))) &&
2808 (Arg0->hasOneUse() || Arg1->hasOneUse())) {
2809 // If both operands are negated, invert the call and negate the result:
2810 // min(-X, -Y) --> -(max(X, Y))
2811 // max(-X, -Y) --> -(min(X, Y))
2812 Intrinsic::ID NewIID;
2813 switch (IID) {
2814 case Intrinsic::maxnum:
2815 NewIID = Intrinsic::minnum;
2816 break;
2817 case Intrinsic::minnum:
2818 NewIID = Intrinsic::maxnum;
2819 break;
2820 case Intrinsic::maximum:
2821 NewIID = Intrinsic::minimum;
2822 break;
2823 case Intrinsic::minimum:
2824 NewIID = Intrinsic::maximum;
2825 break;
2826 default:
2827 llvm_unreachable("unexpected intrinsic ID");
2828 }
2829 Value *NewCall = Builder.CreateBinaryIntrinsic(ID: NewIID, LHS: X, RHS: Y, FMFSource: II);
2830 Instruction *FNeg = UnaryOperator::CreateFNeg(V: NewCall);
2831 FNeg->copyIRFlags(V: II);
2832 return FNeg;
2833 }
2834
2835 // m(m(X, C2), C1) -> m(X, C)
2836 const APFloat *C1, *C2;
2837 if (auto *M = dyn_cast<IntrinsicInst>(Val: Arg0)) {
2838 if (M->getIntrinsicID() == IID && match(V: Arg1, P: m_APFloat(Res&: C1)) &&
2839 ((match(V: M->getArgOperand(i: 0), P: m_Value(V&: X)) &&
2840 match(V: M->getArgOperand(i: 1), P: m_APFloat(Res&: C2))) ||
2841 (match(V: M->getArgOperand(i: 1), P: m_Value(V&: X)) &&
2842 match(V: M->getArgOperand(i: 0), P: m_APFloat(Res&: C2))))) {
2843 APFloat Res(0.0);
2844 switch (IID) {
2845 case Intrinsic::maxnum:
2846 Res = maxnum(A: *C1, B: *C2);
2847 break;
2848 case Intrinsic::minnum:
2849 Res = minnum(A: *C1, B: *C2);
2850 break;
2851 case Intrinsic::maximum:
2852 Res = maximum(A: *C1, B: *C2);
2853 break;
2854 case Intrinsic::minimum:
2855 Res = minimum(A: *C1, B: *C2);
2856 break;
2857 default:
2858 llvm_unreachable("unexpected intrinsic ID");
2859 }
2860 // TODO: Conservatively intersecting FMF. If Res == C2, the transform
2861 // was a simplification (so Arg0 and its original flags could
2862 // propagate?)
2863 Value *V = Builder.CreateBinaryIntrinsic(
2864 ID: IID, LHS: X, RHS: ConstantFP::get(Ty: Arg0->getType(), V: Res),
2865 FMFSource: FMFSource::intersect(A: II, B: M));
2866 return replaceInstUsesWith(I&: *II, V);
2867 }
2868 }
2869
2870 // m((fpext X), (fpext Y)) -> fpext (m(X, Y))
2871 if (match(V: Arg0, P: m_OneUse(SubPattern: m_FPExt(Op: m_Value(V&: X)))) &&
2872 match(V: Arg1, P: m_OneUse(SubPattern: m_FPExt(Op: m_Value(V&: Y)))) &&
2873 X->getType() == Y->getType()) {
2874 Value *NewCall =
2875 Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y, FMFSource: II, Name: II->getName());
2876 return new FPExtInst(NewCall, II->getType());
2877 }
2878
2879 // max X, -X --> fabs X
2880 // min X, -X --> -(fabs X)
2881 // TODO: Remove one-use limitation? That is obviously better for max,
2882 // hence why we don't check for one-use for that. However,
2883 // it would be an extra instruction for min (fnabs), but
2884 // that is still likely better for analysis and codegen.
2885 auto IsMinMaxOrXNegX = [IID, &X](Value *Op0, Value *Op1) {
2886 if (match(V: Op0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Op1, P: m_Specific(V: X)))
2887 return Op0->hasOneUse() ||
2888 (IID != Intrinsic::minimum && IID != Intrinsic::minnum);
2889 return false;
2890 };
2891
2892 if (IsMinMaxOrXNegX(Arg0, Arg1) || IsMinMaxOrXNegX(Arg1, Arg0)) {
2893 Value *R = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: X, FMFSource: II);
2894 if (IID == Intrinsic::minimum || IID == Intrinsic::minnum)
2895 R = Builder.CreateFNegFMF(V: R, FMFSource: II);
2896 return replaceInstUsesWith(I&: *II, V: R);
2897 }
2898
2899 break;
2900 }
2901 case Intrinsic::matrix_multiply: {
2902 // Optimize negation in matrix multiplication.
2903
2904 // -A * -B -> A * B
2905 Value *A, *B;
2906 if (match(V: II->getArgOperand(i: 0), P: m_FNeg(X: m_Value(V&: A))) &&
2907 match(V: II->getArgOperand(i: 1), P: m_FNeg(X: m_Value(V&: B)))) {
2908 replaceOperand(I&: *II, OpNum: 0, V: A);
2909 replaceOperand(I&: *II, OpNum: 1, V: B);
2910 return II;
2911 }
2912
2913 Value *Op0 = II->getOperand(i_nocapture: 0);
2914 Value *Op1 = II->getOperand(i_nocapture: 1);
2915 Value *OpNotNeg, *NegatedOp;
2916 unsigned NegatedOpArg, OtherOpArg;
2917 if (match(V: Op0, P: m_FNeg(X: m_Value(V&: OpNotNeg)))) {
2918 NegatedOp = Op0;
2919 NegatedOpArg = 0;
2920 OtherOpArg = 1;
2921 } else if (match(V: Op1, P: m_FNeg(X: m_Value(V&: OpNotNeg)))) {
2922 NegatedOp = Op1;
2923 NegatedOpArg = 1;
2924 OtherOpArg = 0;
2925 } else
2926 // Multiplication doesn't have a negated operand.
2927 break;
2928
2929 // Only optimize if the negated operand has only one use.
2930 if (!NegatedOp->hasOneUse())
2931 break;
2932
2933 Value *OtherOp = II->getOperand(i_nocapture: OtherOpArg);
2934 VectorType *RetTy = cast<VectorType>(Val: II->getType());
2935 VectorType *NegatedOpTy = cast<VectorType>(Val: NegatedOp->getType());
2936 VectorType *OtherOpTy = cast<VectorType>(Val: OtherOp->getType());
2937 ElementCount NegatedCount = NegatedOpTy->getElementCount();
2938 ElementCount OtherCount = OtherOpTy->getElementCount();
2939 ElementCount RetCount = RetTy->getElementCount();
2940 // (-A) * B -> A * (-B), if it is cheaper to negate B and vice versa.
2941 if (ElementCount::isKnownGT(LHS: NegatedCount, RHS: OtherCount) &&
2942 ElementCount::isKnownLT(LHS: OtherCount, RHS: RetCount)) {
2943 Value *InverseOtherOp = Builder.CreateFNeg(V: OtherOp);
2944 replaceOperand(I&: *II, OpNum: NegatedOpArg, V: OpNotNeg);
2945 replaceOperand(I&: *II, OpNum: OtherOpArg, V: InverseOtherOp);
2946 return II;
2947 }
2948 // (-A) * B -> -(A * B), if it is cheaper to negate the result
2949 if (ElementCount::isKnownGT(LHS: NegatedCount, RHS: RetCount)) {
2950 SmallVector<Value *, 5> NewArgs(II->args());
2951 NewArgs[NegatedOpArg] = OpNotNeg;
2952 Instruction *NewMul =
2953 Builder.CreateIntrinsic(RetTy: II->getType(), ID: IID, Args: NewArgs, FMFSource: II);
2954 return replaceInstUsesWith(I&: *II, V: Builder.CreateFNegFMF(V: NewMul, FMFSource: II));
2955 }
2956 break;
2957 }
2958 case Intrinsic::fmuladd: {
2959 // Try to simplify the underlying FMul.
2960 if (Value *V =
2961 simplifyFMulInst(LHS: II->getArgOperand(i: 0), RHS: II->getArgOperand(i: 1),
2962 FMF: II->getFastMathFlags(), Q: SQ.getWithInstruction(I: II)))
2963 return BinaryOperator::CreateFAddFMF(V1: V, V2: II->getArgOperand(i: 2),
2964 FMF: II->getFastMathFlags());
2965
2966 [[fallthrough]];
2967 }
2968 case Intrinsic::fma: {
2969 // fma fneg(x), fneg(y), z -> fma x, y, z
2970 Value *Src0 = II->getArgOperand(i: 0);
2971 Value *Src1 = II->getArgOperand(i: 1);
2972 Value *Src2 = II->getArgOperand(i: 2);
2973 Value *X, *Y;
2974 if (match(V: Src0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Src1, P: m_FNeg(X: m_Value(V&: Y)))) {
2975 replaceOperand(I&: *II, OpNum: 0, V: X);
2976 replaceOperand(I&: *II, OpNum: 1, V: Y);
2977 return II;
2978 }
2979
2980 // fma fabs(x), fabs(x), z -> fma x, x, z
2981 if (match(V: Src0, P: m_FAbs(Op0: m_Value(V&: X))) &&
2982 match(V: Src1, P: m_FAbs(Op0: m_Specific(V: X)))) {
2983 replaceOperand(I&: *II, OpNum: 0, V: X);
2984 replaceOperand(I&: *II, OpNum: 1, V: X);
2985 return II;
2986 }
2987
2988 // Try to simplify the underlying FMul. We can only apply simplifications
2989 // that do not require rounding.
2990 if (Value *V = simplifyFMAFMul(LHS: Src0, RHS: Src1, FMF: II->getFastMathFlags(),
2991 Q: SQ.getWithInstruction(I: II)))
2992 return BinaryOperator::CreateFAddFMF(V1: V, V2: Src2, FMF: II->getFastMathFlags());
2993
2994 // fma x, y, 0 -> fmul x, y
2995 // This is always valid for -0.0, but requires nsz for +0.0 as
2996 // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
2997 if (match(V: Src2, P: m_NegZeroFP()) ||
2998 (match(V: Src2, P: m_PosZeroFP()) && II->getFastMathFlags().noSignedZeros()))
2999 return BinaryOperator::CreateFMulFMF(V1: Src0, V2: Src1, FMFSource: II);
3000
3001 // fma x, -1.0, y -> fsub y, x
3002 if (match(V: Src1, P: m_SpecificFP(V: -1.0)))
3003 return BinaryOperator::CreateFSubFMF(V1: Src2, V2: Src0, FMFSource: II);
3004
3005 break;
3006 }
3007 case Intrinsic::copysign: {
3008 Value *Mag = II->getArgOperand(i: 0), *Sign = II->getArgOperand(i: 1);
3009 if (std::optional<bool> KnownSignBit = computeKnownFPSignBit(
3010 V: Sign, SQ: getSimplifyQuery().getWithInstruction(I: II))) {
3011 if (*KnownSignBit) {
3012 // If we know that the sign argument is negative, reduce to FNABS:
3013 // copysign Mag, -Sign --> fneg (fabs Mag)
3014 Value *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Mag, FMFSource: II);
3015 return replaceInstUsesWith(I&: *II, V: Builder.CreateFNegFMF(V: Fabs, FMFSource: II));
3016 }
3017
3018 // If we know that the sign argument is positive, reduce to FABS:
3019 // copysign Mag, +Sign --> fabs Mag
3020 Value *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Mag, FMFSource: II);
3021 return replaceInstUsesWith(I&: *II, V: Fabs);
3022 }
3023
3024 // Propagate sign argument through nested calls:
3025 // copysign Mag, (copysign ?, X) --> copysign Mag, X
3026 Value *X;
3027 if (match(V: Sign, P: m_Intrinsic<Intrinsic::copysign>(Op0: m_Value(), Op1: m_Value(V&: X)))) {
3028 Value *CopySign =
3029 Builder.CreateCopySign(LHS: Mag, RHS: X, FMFSource: FMFSource::intersect(A: II, B: Sign));
3030 return replaceInstUsesWith(I&: *II, V: CopySign);
3031 }
3032
3033 // Clear sign-bit of constant magnitude:
3034 // copysign -MagC, X --> copysign MagC, X
3035 // TODO: Support constant folding for fabs
3036 const APFloat *MagC;
3037 if (match(V: Mag, P: m_APFloat(Res&: MagC)) && MagC->isNegative()) {
3038 APFloat PosMagC = *MagC;
3039 PosMagC.clearSign();
3040 return replaceOperand(I&: *II, OpNum: 0, V: ConstantFP::get(Ty: Mag->getType(), V: PosMagC));
3041 }
3042
3043 // Peek through changes of magnitude's sign-bit. This call rewrites those:
3044 // copysign (fabs X), Sign --> copysign X, Sign
3045 // copysign (fneg X), Sign --> copysign X, Sign
3046 if (match(V: Mag, P: m_FAbs(Op0: m_Value(V&: X))) || match(V: Mag, P: m_FNeg(X: m_Value(V&: X))))
3047 return replaceOperand(I&: *II, OpNum: 0, V: X);
3048
3049 Type *SignEltTy = Sign->getType()->getScalarType();
3050
3051 Value *CastSrc;
3052 if (match(V: Sign,
3053 P: m_OneUse(SubPattern: m_ElementWiseBitCast(Op: m_OneUse(SubPattern: m_Value(V&: CastSrc))))) &&
3054 CastSrc->getType()->isIntOrIntVectorTy() &&
3055 APFloat::hasSignBitInMSB(SignEltTy->getFltSemantics())) {
3056 KnownBits Known(SignEltTy->getPrimitiveSizeInBits());
3057 if (SimplifyDemandedBits(I: cast<Instruction>(Val: Sign), Op: 0,
3058 DemandedMask: APInt::getSignMask(BitWidth: Known.getBitWidth()), Known,
3059 Q: SQ))
3060 return II;
3061 }
3062
3063 break;
3064 }
3065 case Intrinsic::fabs: {
3066 Value *Cond, *TVal, *FVal;
3067 Value *Arg = II->getArgOperand(i: 0);
3068 Value *X;
3069 // fabs (-X) --> fabs (X)
3070 if (match(V: Arg, P: m_FNeg(X: m_Value(V&: X)))) {
3071 CallInst *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: X, FMFSource: II);
3072 return replaceInstUsesWith(I&: CI, V: Fabs);
3073 }
3074
3075 if (match(V: Arg, P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: TVal), R: m_Value(V&: FVal)))) {
3076 // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
3077 if (Arg->hasOneUse() ? (isa<Constant>(Val: TVal) || isa<Constant>(Val: FVal))
3078 : (isa<Constant>(Val: TVal) && isa<Constant>(Val: FVal))) {
3079 CallInst *AbsT = Builder.CreateCall(Callee: II->getCalledFunction(), Args: {TVal});
3080 CallInst *AbsF = Builder.CreateCall(Callee: II->getCalledFunction(), Args: {FVal});
3081 SelectInst *SI = SelectInst::Create(C: Cond, S1: AbsT, S2: AbsF);
3082 FastMathFlags FMF1 = II->getFastMathFlags();
3083 FastMathFlags FMF2 = cast<SelectInst>(Val: Arg)->getFastMathFlags();
3084 FMF2.setNoSignedZeros(false);
3085 SI->setFastMathFlags(FMF1 | FMF2);
3086 return SI;
3087 }
3088 // fabs (select Cond, -FVal, FVal) --> fabs FVal
3089 if (match(V: TVal, P: m_FNeg(X: m_Specific(V: FVal))))
3090 return replaceOperand(I&: *II, OpNum: 0, V: FVal);
3091 // fabs (select Cond, TVal, -TVal) --> fabs TVal
3092 if (match(V: FVal, P: m_FNeg(X: m_Specific(V: TVal))))
3093 return replaceOperand(I&: *II, OpNum: 0, V: TVal);
3094 }
3095
3096 Value *Magnitude, *Sign;
3097 if (match(V: II->getArgOperand(i: 0),
3098 P: m_CopySign(Op0: m_Value(V&: Magnitude), Op1: m_Value(V&: Sign)))) {
3099 // fabs (copysign x, y) -> (fabs x)
3100 CallInst *AbsSign =
3101 Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Magnitude, FMFSource: II);
3102 return replaceInstUsesWith(I&: *II, V: AbsSign);
3103 }
3104
3105 [[fallthrough]];
3106 }
3107 case Intrinsic::ceil:
3108 case Intrinsic::floor:
3109 case Intrinsic::round:
3110 case Intrinsic::roundeven:
3111 case Intrinsic::nearbyint:
3112 case Intrinsic::rint:
3113 case Intrinsic::trunc: {
3114 Value *ExtSrc;
3115 if (match(V: II->getArgOperand(i: 0), P: m_OneUse(SubPattern: m_FPExt(Op: m_Value(V&: ExtSrc))))) {
3116 // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
3117 Value *NarrowII = Builder.CreateUnaryIntrinsic(ID: IID, V: ExtSrc, FMFSource: II);
3118 return new FPExtInst(NarrowII, II->getType());
3119 }
3120 break;
3121 }
3122 case Intrinsic::cos:
3123 case Intrinsic::amdgcn_cos:
3124 case Intrinsic::cosh: {
3125 Value *X, *Sign;
3126 Value *Src = II->getArgOperand(i: 0);
3127 if (match(V: Src, P: m_FNeg(X: m_Value(V&: X))) || match(V: Src, P: m_FAbs(Op0: m_Value(V&: X))) ||
3128 match(V: Src, P: m_CopySign(Op0: m_Value(V&: X), Op1: m_Value(V&: Sign)))) {
3129 // f(-x) --> f(x)
3130 // f(fabs(x)) --> f(x)
3131 // f(copysign(x, y)) --> f(x)
3132 // for f in {cos, cosh}
3133 return replaceOperand(I&: *II, OpNum: 0, V: X);
3134 }
3135 break;
3136 }
3137 case Intrinsic::sin:
3138 case Intrinsic::amdgcn_sin:
3139 case Intrinsic::sinh:
3140 case Intrinsic::tan:
3141 case Intrinsic::tanh: {
3142 Value *X;
3143 if (match(V: II->getArgOperand(i: 0), P: m_OneUse(SubPattern: m_FNeg(X: m_Value(V&: X))))) {
3144 // f(-x) --> -f(x)
3145 // for f in {sin, sinh, tan, tanh}
3146 Value *NewFunc = Builder.CreateUnaryIntrinsic(ID: IID, V: X, FMFSource: II);
3147 return UnaryOperator::CreateFNegFMF(Op: NewFunc, FMFSource: II);
3148 }
3149 break;
3150 }
3151 case Intrinsic::ldexp: {
3152 // ldexp(ldexp(x, a), b) -> ldexp(x, a + b)
3153 //
3154 // The danger is if the first ldexp would overflow to infinity or underflow
3155 // to zero, but the combined exponent avoids it. We ignore this with
3156 // reassoc.
3157 //
3158 // It's also safe to fold if we know both exponents are >= 0 or <= 0 since
3159 // it would just double down on the overflow/underflow which would occur
3160 // anyway.
3161 //
3162 // TODO: Could do better if we had range tracking for the input value
3163 // exponent. Also could broaden sign check to cover == 0 case.
3164 Value *Src = II->getArgOperand(i: 0);
3165 Value *Exp = II->getArgOperand(i: 1);
3166
3167 uint64_t ConstExp;
3168 if (match(V: Exp, P: m_ConstantInt(V&: ConstExp))) {
3169 // ldexp(x, K) -> fmul x, 2^K
3170 const fltSemantics &FPTy =
3171 Src->getType()->getScalarType()->getFltSemantics();
3172
3173 APFloat Scaled = scalbn(X: APFloat::getOne(Sem: FPTy), Exp: static_cast<int>(ConstExp),
3174 RM: APFloat::rmNearestTiesToEven);
3175 if (!Scaled.isZero() && !Scaled.isInfinity()) {
3176 // Skip overflow and underflow cases.
3177 Constant *FPConst = ConstantFP::get(Ty: Src->getType(), V: Scaled);
3178 return BinaryOperator::CreateFMulFMF(V1: Src, V2: FPConst, FMFSource: II);
3179 }
3180 }
3181
3182 Value *InnerSrc;
3183 Value *InnerExp;
3184 if (match(V: Src, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ldexp>(
3185 Op0: m_Value(V&: InnerSrc), Op1: m_Value(V&: InnerExp)))) &&
3186 Exp->getType() == InnerExp->getType()) {
3187 FastMathFlags FMF = II->getFastMathFlags();
3188 FastMathFlags InnerFlags = cast<FPMathOperator>(Val: Src)->getFastMathFlags();
3189
3190 if ((FMF.allowReassoc() && InnerFlags.allowReassoc()) ||
3191 signBitMustBeTheSame(Op0: Exp, Op1: InnerExp, SQ: SQ.getWithInstruction(I: II))) {
3192 // TODO: Add nsw/nuw probably safe if integer type exceeds exponent
3193 // width.
3194 Value *NewExp = Builder.CreateAdd(LHS: InnerExp, RHS: Exp);
3195 II->setArgOperand(i: 1, v: NewExp);
3196 II->setFastMathFlags(InnerFlags); // Or the inner flags.
3197 return replaceOperand(I&: *II, OpNum: 0, V: InnerSrc);
3198 }
3199 }
3200
3201 // ldexp(x, zext(i1 y)) -> fmul x, (select y, 2.0, 1.0)
3202 // ldexp(x, sext(i1 y)) -> fmul x, (select y, 0.5, 1.0)
3203 Value *ExtSrc;
3204 if (match(V: Exp, P: m_ZExt(Op: m_Value(V&: ExtSrc))) &&
3205 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3206 Value *Select =
3207 Builder.CreateSelect(C: ExtSrc, True: ConstantFP::get(Ty: II->getType(), V: 2.0),
3208 False: ConstantFP::get(Ty: II->getType(), V: 1.0));
3209 return BinaryOperator::CreateFMulFMF(V1: Src, V2: Select, FMFSource: II);
3210 }
3211 if (match(V: Exp, P: m_SExt(Op: m_Value(V&: ExtSrc))) &&
3212 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3213 Value *Select =
3214 Builder.CreateSelect(C: ExtSrc, True: ConstantFP::get(Ty: II->getType(), V: 0.5),
3215 False: ConstantFP::get(Ty: II->getType(), V: 1.0));
3216 return BinaryOperator::CreateFMulFMF(V1: Src, V2: Select, FMFSource: II);
3217 }
3218
3219 // ldexp(x, c ? exp : 0) -> c ? ldexp(x, exp) : x
3220 // ldexp(x, c ? 0 : exp) -> c ? x : ldexp(x, exp)
3221 ///
3222 // TODO: If we cared, should insert a canonicalize for x
3223 Value *SelectCond, *SelectLHS, *SelectRHS;
3224 if (match(V: II->getArgOperand(i: 1),
3225 P: m_OneUse(SubPattern: m_Select(C: m_Value(V&: SelectCond), L: m_Value(V&: SelectLHS),
3226 R: m_Value(V&: SelectRHS))))) {
3227 Value *NewLdexp = nullptr;
3228 Value *Select = nullptr;
3229 if (match(V: SelectRHS, P: m_ZeroInt())) {
3230 NewLdexp = Builder.CreateLdexp(Src, Exp: SelectLHS, FMFSource: II);
3231 Select = Builder.CreateSelect(C: SelectCond, True: NewLdexp, False: Src);
3232 } else if (match(V: SelectLHS, P: m_ZeroInt())) {
3233 NewLdexp = Builder.CreateLdexp(Src, Exp: SelectRHS, FMFSource: II);
3234 Select = Builder.CreateSelect(C: SelectCond, True: Src, False: NewLdexp);
3235 }
3236
3237 if (NewLdexp) {
3238 Select->takeName(V: II);
3239 return replaceInstUsesWith(I&: *II, V: Select);
3240 }
3241 }
3242
3243 break;
3244 }
3245 case Intrinsic::ptrauth_auth:
3246 case Intrinsic::ptrauth_resign: {
3247 // We don't support this optimization on intrinsic calls with deactivation
3248 // symbols, which are represented using operand bundles.
3249 if (II->hasOperandBundles())
3250 break;
3251
3252 // (sign|resign) + (auth|resign) can be folded by omitting the middle
3253 // sign+auth component if the key and discriminator match.
3254 bool NeedSign = II->getIntrinsicID() == Intrinsic::ptrauth_resign;
3255 Value *Ptr = II->getArgOperand(i: 0);
3256 Value *Key = II->getArgOperand(i: 1);
3257 Value *Disc = II->getArgOperand(i: 2);
3258
3259 // AuthKey will be the key we need to end up authenticating against in
3260 // whatever we replace this sequence with.
3261 Value *AuthKey = nullptr, *AuthDisc = nullptr, *BasePtr;
3262 if (const auto *CI = dyn_cast<CallBase>(Val: Ptr)) {
3263 // We don't support this optimization on intrinsic calls with deactivation
3264 // symbols, which are represented using operand bundles.
3265 if (CI->hasOperandBundles())
3266 break;
3267
3268 BasePtr = CI->getArgOperand(i: 0);
3269 if (CI->getIntrinsicID() == Intrinsic::ptrauth_sign) {
3270 if (CI->getArgOperand(i: 1) != Key || CI->getArgOperand(i: 2) != Disc)
3271 break;
3272 } else if (CI->getIntrinsicID() == Intrinsic::ptrauth_resign) {
3273 if (CI->getArgOperand(i: 3) != Key || CI->getArgOperand(i: 4) != Disc)
3274 break;
3275 AuthKey = CI->getArgOperand(i: 1);
3276 AuthDisc = CI->getArgOperand(i: 2);
3277 } else
3278 break;
3279 } else if (const auto *PtrToInt = dyn_cast<PtrToIntOperator>(Val: Ptr)) {
3280 // ptrauth constants are equivalent to a call to @llvm.ptrauth.sign for
3281 // our purposes, so check for that too.
3282 const auto *CPA = dyn_cast<ConstantPtrAuth>(Val: PtrToInt->getOperand(i_nocapture: 0));
3283 if (!CPA || !CPA->isKnownCompatibleWith(Key, Discriminator: Disc, DL))
3284 break;
3285
3286 // resign(ptrauth(p,ks,ds),ks,ds,kr,dr) -> ptrauth(p,kr,dr)
3287 if (NeedSign && isa<ConstantInt>(Val: II->getArgOperand(i: 4))) {
3288 auto *SignKey = cast<ConstantInt>(Val: II->getArgOperand(i: 3));
3289 auto *SignDisc = cast<ConstantInt>(Val: II->getArgOperand(i: 4));
3290 auto *Null = ConstantPointerNull::get(T: Builder.getPtrTy());
3291 auto *NewCPA = ConstantPtrAuth::get(Ptr: CPA->getPointer(), Key: SignKey,
3292 Disc: SignDisc, /*AddrDisc=*/Null,
3293 /*DeactivationSymbol=*/Null);
3294 replaceInstUsesWith(
3295 I&: *II, V: ConstantExpr::getPointerCast(C: NewCPA, Ty: II->getType()));
3296 return eraseInstFromFunction(I&: *II);
3297 }
3298
3299 // auth(ptrauth(p,k,d),k,d) -> p
3300 BasePtr = Builder.CreatePtrToInt(V: CPA->getPointer(), DestTy: II->getType());
3301 } else
3302 break;
3303
3304 unsigned NewIntrin;
3305 if (AuthKey && NeedSign) {
3306 // resign(0,1) + resign(1,2) = resign(0, 2)
3307 NewIntrin = Intrinsic::ptrauth_resign;
3308 } else if (AuthKey) {
3309 // resign(0,1) + auth(1) = auth(0)
3310 NewIntrin = Intrinsic::ptrauth_auth;
3311 } else if (NeedSign) {
3312 // sign(0) + resign(0, 1) = sign(1)
3313 NewIntrin = Intrinsic::ptrauth_sign;
3314 } else {
3315 // sign(0) + auth(0) = nop
3316 replaceInstUsesWith(I&: *II, V: BasePtr);
3317 return eraseInstFromFunction(I&: *II);
3318 }
3319
3320 SmallVector<Value *, 4> CallArgs;
3321 CallArgs.push_back(Elt: BasePtr);
3322 if (AuthKey) {
3323 CallArgs.push_back(Elt: AuthKey);
3324 CallArgs.push_back(Elt: AuthDisc);
3325 }
3326
3327 if (NeedSign) {
3328 CallArgs.push_back(Elt: II->getArgOperand(i: 3));
3329 CallArgs.push_back(Elt: II->getArgOperand(i: 4));
3330 }
3331
3332 Function *NewFn =
3333 Intrinsic::getOrInsertDeclaration(M: II->getModule(), id: NewIntrin);
3334 return CallInst::Create(Func: NewFn, Args: CallArgs);
3335 }
3336 case Intrinsic::arm_neon_vtbl1:
3337 case Intrinsic::arm_neon_vtbl2:
3338 case Intrinsic::arm_neon_vtbl3:
3339 case Intrinsic::arm_neon_vtbl4:
3340 case Intrinsic::aarch64_neon_tbl1:
3341 case Intrinsic::aarch64_neon_tbl2:
3342 case Intrinsic::aarch64_neon_tbl3:
3343 case Intrinsic::aarch64_neon_tbl4:
3344 return simplifyNeonTbl(II&: *II, IC&: *this, /*IsExtension=*/false);
3345 case Intrinsic::arm_neon_vtbx1:
3346 case Intrinsic::arm_neon_vtbx2:
3347 case Intrinsic::arm_neon_vtbx3:
3348 case Intrinsic::arm_neon_vtbx4:
3349 case Intrinsic::aarch64_neon_tbx1:
3350 case Intrinsic::aarch64_neon_tbx2:
3351 case Intrinsic::aarch64_neon_tbx3:
3352 case Intrinsic::aarch64_neon_tbx4:
3353 return simplifyNeonTbl(II&: *II, IC&: *this, /*IsExtension=*/true);
3354
3355 case Intrinsic::arm_neon_vmulls:
3356 case Intrinsic::arm_neon_vmullu:
3357 case Intrinsic::aarch64_neon_smull:
3358 case Intrinsic::aarch64_neon_umull: {
3359 Value *Arg0 = II->getArgOperand(i: 0);
3360 Value *Arg1 = II->getArgOperand(i: 1);
3361
3362 // Handle mul by zero first:
3363 if (isa<ConstantAggregateZero>(Val: Arg0) || isa<ConstantAggregateZero>(Val: Arg1)) {
3364 return replaceInstUsesWith(I&: CI, V: ConstantAggregateZero::get(Ty: II->getType()));
3365 }
3366
3367 // Check for constant LHS & RHS - in this case we just simplify.
3368 bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
3369 IID == Intrinsic::aarch64_neon_umull);
3370 VectorType *NewVT = cast<VectorType>(Val: II->getType());
3371 if (Constant *CV0 = dyn_cast<Constant>(Val: Arg0)) {
3372 if (Constant *CV1 = dyn_cast<Constant>(Val: Arg1)) {
3373 Value *V0 = Builder.CreateIntCast(V: CV0, DestTy: NewVT, /*isSigned=*/!Zext);
3374 Value *V1 = Builder.CreateIntCast(V: CV1, DestTy: NewVT, /*isSigned=*/!Zext);
3375 return replaceInstUsesWith(I&: CI, V: Builder.CreateMul(LHS: V0, RHS: V1));
3376 }
3377
3378 // Couldn't simplify - canonicalize constant to the RHS.
3379 std::swap(a&: Arg0, b&: Arg1);
3380 }
3381
3382 // Handle mul by one:
3383 if (Constant *CV1 = dyn_cast<Constant>(Val: Arg1))
3384 if (ConstantInt *Splat =
3385 dyn_cast_or_null<ConstantInt>(Val: CV1->getSplatValue()))
3386 if (Splat->isOne())
3387 return CastInst::CreateIntegerCast(S: Arg0, Ty: II->getType(),
3388 /*isSigned=*/!Zext);
3389
3390 break;
3391 }
3392 case Intrinsic::arm_neon_aesd:
3393 case Intrinsic::arm_neon_aese:
3394 case Intrinsic::aarch64_crypto_aesd:
3395 case Intrinsic::aarch64_crypto_aese:
3396 case Intrinsic::aarch64_sve_aesd:
3397 case Intrinsic::aarch64_sve_aese: {
3398 Value *DataArg = II->getArgOperand(i: 0);
3399 Value *KeyArg = II->getArgOperand(i: 1);
3400
3401 // Accept zero on either operand.
3402 if (!match(V: KeyArg, P: m_ZeroInt()))
3403 std::swap(a&: KeyArg, b&: DataArg);
3404
3405 // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
3406 Value *Data, *Key;
3407 if (match(V: KeyArg, P: m_ZeroInt()) &&
3408 match(V: DataArg, P: m_Xor(L: m_Value(V&: Data), R: m_Value(V&: Key)))) {
3409 replaceOperand(I&: *II, OpNum: 0, V: Data);
3410 replaceOperand(I&: *II, OpNum: 1, V: Key);
3411 return II;
3412 }
3413 break;
3414 }
3415 case Intrinsic::arm_neon_vshifts:
3416 case Intrinsic::arm_neon_vshiftu:
3417 case Intrinsic::aarch64_neon_sshl:
3418 case Intrinsic::aarch64_neon_ushl:
3419 return foldNeonShift(II, IC&: *this);
3420 case Intrinsic::hexagon_V6_vandvrt:
3421 case Intrinsic::hexagon_V6_vandvrt_128B: {
3422 // Simplify Q -> V -> Q conversion.
3423 if (auto Op0 = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: 0))) {
3424 Intrinsic::ID ID0 = Op0->getIntrinsicID();
3425 if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
3426 ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
3427 break;
3428 Value *Bytes = Op0->getArgOperand(i: 1), *Mask = II->getArgOperand(i: 1);
3429 uint64_t Bytes1 = computeKnownBits(V: Bytes, CxtI: Op0).One.getZExtValue();
3430 uint64_t Mask1 = computeKnownBits(V: Mask, CxtI: II).One.getZExtValue();
3431 // Check if every byte has common bits in Bytes and Mask.
3432 uint64_t C = Bytes1 & Mask1;
3433 if ((C & 0xFF) && (C & 0xFF00) && (C & 0xFF0000) && (C & 0xFF000000))
3434 return replaceInstUsesWith(I&: *II, V: Op0->getArgOperand(i: 0));
3435 }
3436 break;
3437 }
3438 case Intrinsic::stackrestore: {
3439 enum class ClassifyResult {
3440 None,
3441 Alloca,
3442 StackRestore,
3443 CallWithSideEffects,
3444 };
3445 auto Classify = [](const Instruction *I) {
3446 if (isa<AllocaInst>(Val: I))
3447 return ClassifyResult::Alloca;
3448
3449 if (auto *CI = dyn_cast<CallInst>(Val: I)) {
3450 if (auto *II = dyn_cast<IntrinsicInst>(Val: CI)) {
3451 if (II->getIntrinsicID() == Intrinsic::stackrestore)
3452 return ClassifyResult::StackRestore;
3453
3454 if (II->mayHaveSideEffects())
3455 return ClassifyResult::CallWithSideEffects;
3456 } else {
3457 // Consider all non-intrinsic calls to be side effects
3458 return ClassifyResult::CallWithSideEffects;
3459 }
3460 }
3461
3462 return ClassifyResult::None;
3463 };
3464
3465 // If the stacksave and the stackrestore are in the same BB, and there is
3466 // no intervening call, alloca, or stackrestore of a different stacksave,
3467 // remove the restore. This can happen when variable allocas are DCE'd.
3468 if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: 0))) {
3469 if (SS->getIntrinsicID() == Intrinsic::stacksave &&
3470 SS->getParent() == II->getParent()) {
3471 BasicBlock::iterator BI(SS);
3472 bool CannotRemove = false;
3473 for (++BI; &*BI != II; ++BI) {
3474 switch (Classify(&*BI)) {
3475 case ClassifyResult::None:
3476 // So far so good, look at next instructions.
3477 break;
3478
3479 case ClassifyResult::StackRestore:
3480 // If we found an intervening stackrestore for a different
3481 // stacksave, we can't remove the stackrestore. Otherwise, continue.
3482 if (cast<IntrinsicInst>(Val&: *BI).getArgOperand(i: 0) != SS)
3483 CannotRemove = true;
3484 break;
3485
3486 case ClassifyResult::Alloca:
3487 case ClassifyResult::CallWithSideEffects:
3488 // If we found an alloca, a non-intrinsic call, or an intrinsic
3489 // call with side effects, we can't remove the stackrestore.
3490 CannotRemove = true;
3491 break;
3492 }
3493 if (CannotRemove)
3494 break;
3495 }
3496
3497 if (!CannotRemove)
3498 return eraseInstFromFunction(I&: CI);
3499 }
3500 }
3501
3502 // Scan down this block to see if there is another stack restore in the
3503 // same block without an intervening call/alloca.
3504 BasicBlock::iterator BI(II);
3505 Instruction *TI = II->getParent()->getTerminator();
3506 bool CannotRemove = false;
3507 for (++BI; &*BI != TI; ++BI) {
3508 switch (Classify(&*BI)) {
3509 case ClassifyResult::None:
3510 // So far so good, look at next instructions.
3511 break;
3512
3513 case ClassifyResult::StackRestore:
3514 // If there is a stackrestore below this one, remove this one.
3515 return eraseInstFromFunction(I&: CI);
3516
3517 case ClassifyResult::Alloca:
3518 case ClassifyResult::CallWithSideEffects:
3519 // If we found an alloca, a non-intrinsic call, or an intrinsic call
3520 // with side effects (such as llvm.stacksave and llvm.read_register),
3521 // we can't remove the stack restore.
3522 CannotRemove = true;
3523 break;
3524 }
3525 if (CannotRemove)
3526 break;
3527 }
3528
3529 // If the stack restore is in a return, resume, or unwind block and if there
3530 // are no allocas or calls between the restore and the return, nuke the
3531 // restore.
3532 if (!CannotRemove && (isa<ReturnInst>(Val: TI) || isa<ResumeInst>(Val: TI)))
3533 return eraseInstFromFunction(I&: CI);
3534 break;
3535 }
3536 case Intrinsic::lifetime_end:
3537 // Asan needs to poison memory to detect invalid access which is possible
3538 // even for empty lifetime range.
3539 if (II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeAddress) ||
3540 II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeMemory) ||
3541 II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeHWAddress) ||
3542 II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeMemTag))
3543 break;
3544
3545 if (removeTriviallyEmptyRange(EndI&: *II, IC&: *this, IsStart: [](const IntrinsicInst &I) {
3546 return I.getIntrinsicID() == Intrinsic::lifetime_start;
3547 }))
3548 return nullptr;
3549 break;
3550 case Intrinsic::assume: {
3551 Value *IIOperand = II->getArgOperand(i: 0);
3552 SmallVector<OperandBundleDef, 4> OpBundles;
3553 II->getOperandBundlesAsDefs(Defs&: OpBundles);
3554
3555 /// This will remove the boolean Condition from the assume given as
3556 /// argument and remove the assume if it becomes useless.
3557 /// always returns nullptr for use as a return values.
3558 auto RemoveConditionFromAssume = [&](Instruction *Assume) -> Instruction * {
3559 assert(isa<AssumeInst>(Assume));
3560 if (isAssumeWithEmptyBundle(Assume: *cast<AssumeInst>(Val: II)))
3561 return eraseInstFromFunction(I&: CI);
3562 replaceUse(U&: II->getOperandUse(i: 0), NewValue: ConstantInt::getTrue(Context&: II->getContext()));
3563 return nullptr;
3564 };
3565 // Remove an assume if it is followed by an identical assume.
3566 // TODO: Do we need this? Unless there are conflicting assumptions, the
3567 // computeKnownBits(IIOperand) below here eliminates redundant assumes.
3568 Instruction *Next = II->getNextNode();
3569 if (match(V: Next, P: m_Intrinsic<Intrinsic::assume>(Op0: m_Specific(V: IIOperand))))
3570 return RemoveConditionFromAssume(Next);
3571
3572 // Canonicalize assume(a && b) -> assume(a); assume(b);
3573 // Note: New assumption intrinsics created here are registered by
3574 // the InstCombineIRInserter object.
3575 FunctionType *AssumeIntrinsicTy = II->getFunctionType();
3576 Value *AssumeIntrinsic = II->getCalledOperand();
3577 Value *A, *B;
3578 if (match(V: IIOperand, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3579 Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic, Args: A, OpBundles,
3580 Name: II->getName());
3581 Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic, Args: B, Name: II->getName());
3582 return eraseInstFromFunction(I&: *II);
3583 }
3584 // assume(!(a || b)) -> assume(!a); assume(!b);
3585 if (match(V: IIOperand, P: m_Not(V: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B))))) {
3586 Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic,
3587 Args: Builder.CreateNot(V: A), OpBundles, Name: II->getName());
3588 Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic,
3589 Args: Builder.CreateNot(V: B), Name: II->getName());
3590 return eraseInstFromFunction(I&: *II);
3591 }
3592
3593 // assume( (load addr) != null ) -> add 'nonnull' metadata to load
3594 // (if assume is valid at the load)
3595 Instruction *LHS;
3596 if (match(V: IIOperand, P: m_SpecificICmp(MatchPred: ICmpInst::ICMP_NE, L: m_Instruction(I&: LHS),
3597 R: m_Zero())) &&
3598 LHS->getOpcode() == Instruction::Load &&
3599 LHS->getType()->isPointerTy() &&
3600 isValidAssumeForContext(I: II, CxtI: LHS, DT: &DT)) {
3601 MDNode *MD = MDNode::get(Context&: II->getContext(), MDs: {});
3602 LHS->setMetadata(KindID: LLVMContext::MD_nonnull, Node: MD);
3603 LHS->setMetadata(KindID: LLVMContext::MD_noundef, Node: MD);
3604 return RemoveConditionFromAssume(II);
3605
3606 // TODO: apply nonnull return attributes to calls and invokes
3607 // TODO: apply range metadata for range check patterns?
3608 }
3609
3610 for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
3611 OperandBundleUse OBU = II->getOperandBundleAt(Index: Idx);
3612
3613 // Separate storage assumptions apply to the underlying allocations, not
3614 // any particular pointer within them. When evaluating the hints for AA
3615 // purposes we getUnderlyingObject them; by precomputing the answers here
3616 // we can avoid having to do so repeatedly there.
3617 if (OBU.getTagName() == "separate_storage") {
3618 assert(OBU.Inputs.size() == 2);
3619 auto MaybeSimplifyHint = [&](const Use &U) {
3620 Value *Hint = U.get();
3621 // Not having a limit is safe because InstCombine removes unreachable
3622 // code.
3623 Value *UnderlyingObject = getUnderlyingObject(V: Hint, /*MaxLookup*/ 0);
3624 if (Hint != UnderlyingObject)
3625 replaceUse(U&: const_cast<Use &>(U), NewValue: UnderlyingObject);
3626 };
3627 MaybeSimplifyHint(OBU.Inputs[0]);
3628 MaybeSimplifyHint(OBU.Inputs[1]);
3629 }
3630
3631 // Try to remove redundant alignment assumptions.
3632 if (OBU.getTagName() == "align" && OBU.Inputs.size() == 2) {
3633 RetainedKnowledge RK = getKnowledgeFromOperandInAssume(
3634 Assume&: *cast<AssumeInst>(Val: II), Idx: II->arg_size() + Idx);
3635 if (!RK || RK.AttrKind != Attribute::Alignment ||
3636 !isPowerOf2_64(Value: RK.ArgValue) || !isa<ConstantInt>(Val: RK.IRArgValue))
3637 continue;
3638
3639 // Remove align 1 bundles; they don't add any useful information.
3640 if (RK.ArgValue == 1)
3641 return CallBase::removeOperandBundle(CB: II, ID: OBU.getTagID());
3642
3643 // Don't try to remove align assumptions for pointers derived from
3644 // arguments. We might lose information if the function gets inline and
3645 // the align argument attribute disappears.
3646 Value *UO = getUnderlyingObject(V: RK.WasOn);
3647 if (!UO || isa<Argument>(Val: UO))
3648 continue;
3649
3650 // Compute known bits for the pointer, passing nullptr as context to
3651 // avoid computeKnownBits using the assumption we are about to remove
3652 // for reasoning.
3653 KnownBits Known = computeKnownBits(V: RK.WasOn, /*CtxI=*/CxtI: nullptr);
3654 unsigned TZ = std::min(a: Known.countMinTrailingZeros(),
3655 b: Value::MaxAlignmentExponent);
3656 if ((1ULL << TZ) < RK.ArgValue)
3657 continue;
3658 return CallBase::removeOperandBundle(CB: II, ID: OBU.getTagID());
3659 }
3660 }
3661
3662 // Convert nonnull assume like:
3663 // %A = icmp ne i32* %PTR, null
3664 // call void @llvm.assume(i1 %A)
3665 // into
3666 // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
3667 if (EnableKnowledgeRetention &&
3668 match(V: IIOperand,
3669 P: m_SpecificICmp(MatchPred: ICmpInst::ICMP_NE, L: m_Value(V&: A), R: m_Zero())) &&
3670 A->getType()->isPointerTy()) {
3671 if (auto *Replacement = buildAssumeFromKnowledge(
3672 Knowledge: {RetainedKnowledge{Attribute::NonNull, 0, A}}, CtxI: Next, AC: &AC, DT: &DT)) {
3673
3674 Replacement->insertBefore(InsertPos: Next->getIterator());
3675 AC.registerAssumption(CI: Replacement);
3676 return RemoveConditionFromAssume(II);
3677 }
3678 }
3679
3680 // Convert alignment assume like:
3681 // %B = ptrtoint i32* %A to i64
3682 // %C = and i64 %B, Constant
3683 // %D = icmp eq i64 %C, 0
3684 // call void @llvm.assume(i1 %D)
3685 // into
3686 // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)]
3687 uint64_t AlignMask = 1;
3688 if (EnableKnowledgeRetention &&
3689 (match(V: IIOperand, P: m_Not(V: m_Trunc(Op: m_Value(V&: A)))) ||
3690 match(V: IIOperand,
3691 P: m_SpecificICmp(MatchPred: ICmpInst::ICMP_EQ,
3692 L: m_And(L: m_Value(V&: A), R: m_ConstantInt(V&: AlignMask)),
3693 R: m_Zero())))) {
3694 if (isPowerOf2_64(Value: AlignMask + 1)) {
3695 uint64_t Offset = 0;
3696 match(V: A, P: m_Add(L: m_Value(V&: A), R: m_ConstantInt(V&: Offset)));
3697 if (match(V: A, P: m_PtrToIntOrAddr(Op: m_Value(V&: A)))) {
3698 /// Note: this doesn't preserve the offset information but merges
3699 /// offset and alignment.
3700 /// TODO: we can generate a GEP instead of merging the alignment with
3701 /// the offset.
3702 RetainedKnowledge RK{Attribute::Alignment,
3703 (unsigned)MinAlign(A: Offset, B: AlignMask + 1), A};
3704 if (auto *Replacement =
3705 buildAssumeFromKnowledge(Knowledge: RK, CtxI: Next, AC: &AC, DT: &DT)) {
3706
3707 Replacement->insertAfter(InsertPos: II->getIterator());
3708 AC.registerAssumption(CI: Replacement);
3709 }
3710 return RemoveConditionFromAssume(II);
3711 }
3712 }
3713 }
3714
3715 /// Canonicalize Knowledge in operand bundles.
3716 if (EnableKnowledgeRetention && II->hasOperandBundles()) {
3717 for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
3718 auto &BOI = II->bundle_op_info_begin()[Idx];
3719 RetainedKnowledge RK =
3720 llvm::getKnowledgeFromBundle(Assume&: cast<AssumeInst>(Val&: *II), BOI);
3721 if (BOI.End - BOI.Begin > 2)
3722 continue; // Prevent reducing knowledge in an align with offset since
3723 // extracting a RetainedKnowledge from them looses offset
3724 // information
3725 RetainedKnowledge CanonRK =
3726 llvm::simplifyRetainedKnowledge(Assume: cast<AssumeInst>(Val: II), RK,
3727 AC: &getAssumptionCache(),
3728 DT: &getDominatorTree());
3729 if (CanonRK == RK)
3730 continue;
3731 if (!CanonRK) {
3732 if (BOI.End - BOI.Begin > 0) {
3733 Worklist.pushValue(V: II->op_begin()[BOI.Begin]);
3734 Value::dropDroppableUse(U&: II->op_begin()[BOI.Begin]);
3735 }
3736 continue;
3737 }
3738 assert(RK.AttrKind == CanonRK.AttrKind);
3739 if (BOI.End - BOI.Begin > 0)
3740 II->op_begin()[BOI.Begin].set(CanonRK.WasOn);
3741 if (BOI.End - BOI.Begin > 1)
3742 II->op_begin()[BOI.Begin + 1].set(ConstantInt::get(
3743 Ty: Type::getInt64Ty(C&: II->getContext()), V: CanonRK.ArgValue));
3744 if (RK.WasOn)
3745 Worklist.pushValue(V: RK.WasOn);
3746 return II;
3747 }
3748 }
3749
3750 // If there is a dominating assume with the same condition as this one,
3751 // then this one is redundant, and should be removed.
3752 KnownBits Known(1);
3753 computeKnownBits(V: IIOperand, Known, CxtI: II);
3754 if (Known.isAllOnes() && isAssumeWithEmptyBundle(Assume: cast<AssumeInst>(Val&: *II)))
3755 return eraseInstFromFunction(I&: *II);
3756
3757 // assume(false) is unreachable.
3758 if (match(V: IIOperand, P: m_CombineOr(L: m_Zero(), R: m_Undef()))) {
3759 CreateNonTerminatorUnreachable(InsertAt: II);
3760 return eraseInstFromFunction(I&: *II);
3761 }
3762
3763 // Update the cache of affected values for this assumption (we might be
3764 // here because we just simplified the condition).
3765 AC.updateAffectedValues(CI: cast<AssumeInst>(Val: II));
3766 break;
3767 }
3768 case Intrinsic::experimental_guard: {
3769 // Is this guard followed by another guard? We scan forward over a small
3770 // fixed window of instructions to handle common cases with conditions
3771 // computed between guards.
3772 Instruction *NextInst = II->getNextNode();
3773 for (unsigned i = 0; i < GuardWideningWindow; i++) {
3774 // Note: Using context-free form to avoid compile time blow up
3775 if (!isSafeToSpeculativelyExecute(I: NextInst))
3776 break;
3777 NextInst = NextInst->getNextNode();
3778 }
3779 Value *NextCond = nullptr;
3780 if (match(V: NextInst,
3781 P: m_Intrinsic<Intrinsic::experimental_guard>(Op0: m_Value(V&: NextCond)))) {
3782 Value *CurrCond = II->getArgOperand(i: 0);
3783
3784 // Remove a guard that it is immediately preceded by an identical guard.
3785 // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3786 if (CurrCond != NextCond) {
3787 Instruction *MoveI = II->getNextNode();
3788 while (MoveI != NextInst) {
3789 auto *Temp = MoveI;
3790 MoveI = MoveI->getNextNode();
3791 Temp->moveBefore(InsertPos: II->getIterator());
3792 }
3793 replaceOperand(I&: *II, OpNum: 0, V: Builder.CreateAnd(LHS: CurrCond, RHS: NextCond));
3794 }
3795 eraseInstFromFunction(I&: *NextInst);
3796 return II;
3797 }
3798 break;
3799 }
3800 case Intrinsic::vector_insert: {
3801 Value *Vec = II->getArgOperand(i: 0);
3802 Value *SubVec = II->getArgOperand(i: 1);
3803 Value *Idx = II->getArgOperand(i: 2);
3804 auto *DstTy = dyn_cast<FixedVectorType>(Val: II->getType());
3805 auto *VecTy = dyn_cast<FixedVectorType>(Val: Vec->getType());
3806 auto *SubVecTy = dyn_cast<FixedVectorType>(Val: SubVec->getType());
3807
3808 // Only canonicalize if the destination vector, Vec, and SubVec are all
3809 // fixed vectors.
3810 if (DstTy && VecTy && SubVecTy) {
3811 unsigned DstNumElts = DstTy->getNumElements();
3812 unsigned VecNumElts = VecTy->getNumElements();
3813 unsigned SubVecNumElts = SubVecTy->getNumElements();
3814 unsigned IdxN = cast<ConstantInt>(Val: Idx)->getZExtValue();
3815
3816 // An insert that entirely overwrites Vec with SubVec is a nop.
3817 if (VecNumElts == SubVecNumElts)
3818 return replaceInstUsesWith(I&: CI, V: SubVec);
3819
3820 // Widen SubVec into a vector of the same width as Vec, since
3821 // shufflevector requires the two input vectors to be the same width.
3822 // Elements beyond the bounds of SubVec within the widened vector are
3823 // undefined.
3824 SmallVector<int, 8> WidenMask;
3825 unsigned i;
3826 for (i = 0; i != SubVecNumElts; ++i)
3827 WidenMask.push_back(Elt: i);
3828 for (; i != VecNumElts; ++i)
3829 WidenMask.push_back(Elt: PoisonMaskElem);
3830
3831 Value *WidenShuffle = Builder.CreateShuffleVector(V: SubVec, Mask: WidenMask);
3832
3833 SmallVector<int, 8> Mask;
3834 for (unsigned i = 0; i != IdxN; ++i)
3835 Mask.push_back(Elt: i);
3836 for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
3837 Mask.push_back(Elt: i);
3838 for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
3839 Mask.push_back(Elt: i);
3840
3841 Value *Shuffle = Builder.CreateShuffleVector(V1: Vec, V2: WidenShuffle, Mask);
3842 return replaceInstUsesWith(I&: CI, V: Shuffle);
3843 }
3844 break;
3845 }
3846 case Intrinsic::vector_extract: {
3847 Value *Vec = II->getArgOperand(i: 0);
3848 Value *Idx = II->getArgOperand(i: 1);
3849
3850 Type *ReturnType = II->getType();
3851 // (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
3852 // ExtractIdx)
3853 unsigned ExtractIdx = cast<ConstantInt>(Val: Idx)->getZExtValue();
3854 Value *InsertTuple, *InsertIdx, *InsertValue;
3855 if (match(V: Vec, P: m_Intrinsic<Intrinsic::vector_insert>(Op0: m_Value(V&: InsertTuple),
3856 Op1: m_Value(V&: InsertValue),
3857 Op2: m_Value(V&: InsertIdx))) &&
3858 InsertValue->getType() == ReturnType) {
3859 unsigned Index = cast<ConstantInt>(Val: InsertIdx)->getZExtValue();
3860 // Case where we get the same index right after setting it.
3861 // extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
3862 // InsertValue
3863 if (ExtractIdx == Index)
3864 return replaceInstUsesWith(I&: CI, V: InsertValue);
3865 // If we are getting a different index than what was set in the
3866 // insert.vector intrinsic. We can just set the input tuple to the one up
3867 // in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
3868 // InsertIndex), ExtractIndex)
3869 // --> extract.vector(InsertTuple, ExtractIndex)
3870 else
3871 return replaceOperand(I&: CI, OpNum: 0, V: InsertTuple);
3872 }
3873
3874 auto *DstTy = dyn_cast<VectorType>(Val: ReturnType);
3875 auto *VecTy = dyn_cast<VectorType>(Val: Vec->getType());
3876
3877 if (DstTy && VecTy) {
3878 auto DstEltCnt = DstTy->getElementCount();
3879 auto VecEltCnt = VecTy->getElementCount();
3880 unsigned IdxN = cast<ConstantInt>(Val: Idx)->getZExtValue();
3881
3882 // Extracting the entirety of Vec is a nop.
3883 if (DstEltCnt == VecTy->getElementCount()) {
3884 replaceInstUsesWith(I&: CI, V: Vec);
3885 return eraseInstFromFunction(I&: CI);
3886 }
3887
3888 // Only canonicalize to shufflevector if the destination vector and
3889 // Vec are fixed vectors.
3890 if (VecEltCnt.isScalable() || DstEltCnt.isScalable())
3891 break;
3892
3893 SmallVector<int, 8> Mask;
3894 for (unsigned i = 0; i != DstEltCnt.getKnownMinValue(); ++i)
3895 Mask.push_back(Elt: IdxN + i);
3896
3897 Value *Shuffle = Builder.CreateShuffleVector(V: Vec, Mask);
3898 return replaceInstUsesWith(I&: CI, V: Shuffle);
3899 }
3900 break;
3901 }
3902 case Intrinsic::experimental_vp_reverse: {
3903 Value *X;
3904 Value *Vec = II->getArgOperand(i: 0);
3905 Value *Mask = II->getArgOperand(i: 1);
3906 if (!match(V: Mask, P: m_AllOnes()))
3907 break;
3908 Value *EVL = II->getArgOperand(i: 2);
3909 // TODO: Canonicalize experimental.vp.reverse after unop/binops?
3910 // rev(unop rev(X)) --> unop X
3911 if (match(V: Vec,
3912 P: m_OneUse(SubPattern: m_UnOp(X: m_Intrinsic<Intrinsic::experimental_vp_reverse>(
3913 Op0: m_Value(V&: X), Op1: m_AllOnes(), Op2: m_Specific(V: EVL)))))) {
3914 auto *OldUnOp = cast<UnaryOperator>(Val: Vec);
3915 auto *NewUnOp = UnaryOperator::CreateWithCopiedFlags(
3916 Opc: OldUnOp->getOpcode(), V: X, CopyO: OldUnOp, Name: OldUnOp->getName(),
3917 InsertBefore: II->getIterator());
3918 return replaceInstUsesWith(I&: CI, V: NewUnOp);
3919 }
3920 break;
3921 }
3922 case Intrinsic::vector_reduce_or:
3923 case Intrinsic::vector_reduce_and: {
3924 // Canonicalize logical or/and reductions:
3925 // Or reduction for i1 is represented as:
3926 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
3927 // %res = cmp ne iReduxWidth %val, 0
3928 // And reduction for i1 is represented as:
3929 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
3930 // %res = cmp eq iReduxWidth %val, 11111
3931 Value *Arg = II->getArgOperand(i: 0);
3932 Value *Vect;
3933
3934 if (Value *NewOp =
3935 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
3936 replaceUse(U&: II->getOperandUse(i: 0), NewValue: NewOp);
3937 return II;
3938 }
3939
3940 if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3941 if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType()))
3942 if (FTy->getElementType() == Builder.getInt1Ty()) {
3943 Value *Res = Builder.CreateBitCast(
3944 V: Vect, DestTy: Builder.getIntNTy(N: FTy->getNumElements()));
3945 if (IID == Intrinsic::vector_reduce_and) {
3946 Res = Builder.CreateICmpEQ(
3947 LHS: Res, RHS: ConstantInt::getAllOnesValue(Ty: Res->getType()));
3948 } else {
3949 assert(IID == Intrinsic::vector_reduce_or &&
3950 "Expected or reduction.");
3951 Res = Builder.CreateIsNotNull(Arg: Res);
3952 }
3953 if (Arg != Vect)
3954 Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res,
3955 DestTy: II->getType());
3956 return replaceInstUsesWith(I&: CI, V: Res);
3957 }
3958 }
3959 [[fallthrough]];
3960 }
3961 case Intrinsic::vector_reduce_add: {
3962 if (IID == Intrinsic::vector_reduce_add) {
3963 // Convert vector_reduce_add(ZExt(<n x i1>)) to
3964 // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
3965 // Convert vector_reduce_add(SExt(<n x i1>)) to
3966 // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
3967 // Convert vector_reduce_add(<n x i1>) to
3968 // Trunc(ctpop(bitcast <n x i1> to in)).
3969 Value *Arg = II->getArgOperand(i: 0);
3970 Value *Vect;
3971
3972 if (Value *NewOp =
3973 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
3974 replaceUse(U&: II->getOperandUse(i: 0), NewValue: NewOp);
3975 return II;
3976 }
3977
3978 if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3979 if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType()))
3980 if (FTy->getElementType() == Builder.getInt1Ty()) {
3981 Value *V = Builder.CreateBitCast(
3982 V: Vect, DestTy: Builder.getIntNTy(N: FTy->getNumElements()));
3983 Value *Res = Builder.CreateUnaryIntrinsic(ID: Intrinsic::ctpop, V);
3984 Res = Builder.CreateZExtOrTrunc(V: Res, DestTy: II->getType());
3985 if (Arg != Vect &&
3986 cast<Instruction>(Val: Arg)->getOpcode() == Instruction::SExt)
3987 Res = Builder.CreateNeg(V: Res);
3988 return replaceInstUsesWith(I&: CI, V: Res);
3989 }
3990 }
3991
3992 // vector.reduce.add.vNiM(splat(%x)) -> mul(%x, N)
3993 if (Value *Splat = getSplatValue(V: Arg)) {
3994 ElementCount VecToReduceCount =
3995 cast<VectorType>(Val: Arg->getType())->getElementCount();
3996 if (VecToReduceCount.isFixed()) {
3997 unsigned VectorSize = VecToReduceCount.getFixedValue();
3998 return BinaryOperator::CreateMul(
3999 V1: Splat,
4000 V2: ConstantInt::get(Ty: Splat->getType(), V: VectorSize, /*IsSigned=*/false,
4001 /*ImplicitTrunc=*/true));
4002 }
4003 }
4004 }
4005 [[fallthrough]];
4006 }
4007 case Intrinsic::vector_reduce_xor: {
4008 if (IID == Intrinsic::vector_reduce_xor) {
4009 // Exclusive disjunction reduction over the vector with
4010 // (potentially-extended) i1 element type is actually a
4011 // (potentially-extended) arithmetic `add` reduction over the original
4012 // non-extended value:
4013 // vector_reduce_xor(?ext(<n x i1>))
4014 // -->
4015 // ?ext(vector_reduce_add(<n x i1>))
4016 Value *Arg = II->getArgOperand(i: 0);
4017 Value *Vect;
4018
4019 if (Value *NewOp =
4020 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4021 replaceUse(U&: II->getOperandUse(i: 0), NewValue: NewOp);
4022 return II;
4023 }
4024
4025 if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
4026 if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType()))
4027 if (VTy->getElementType() == Builder.getInt1Ty()) {
4028 Value *Res = Builder.CreateAddReduce(Src: Vect);
4029 if (Arg != Vect)
4030 Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res,
4031 DestTy: II->getType());
4032 return replaceInstUsesWith(I&: CI, V: Res);
4033 }
4034 }
4035 }
4036 [[fallthrough]];
4037 }
4038 case Intrinsic::vector_reduce_mul: {
4039 if (IID == Intrinsic::vector_reduce_mul) {
4040 // Multiplicative reduction over the vector with (potentially-extended)
4041 // i1 element type is actually a (potentially zero-extended)
4042 // logical `and` reduction over the original non-extended value:
4043 // vector_reduce_mul(?ext(<n x i1>))
4044 // -->
4045 // zext(vector_reduce_and(<n x i1>))
4046 Value *Arg = II->getArgOperand(i: 0);
4047 Value *Vect;
4048
4049 if (Value *NewOp =
4050 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4051 replaceUse(U&: II->getOperandUse(i: 0), NewValue: NewOp);
4052 return II;
4053 }
4054
4055 if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
4056 if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType()))
4057 if (VTy->getElementType() == Builder.getInt1Ty()) {
4058 Value *Res = Builder.CreateAndReduce(Src: Vect);
4059 Res = Builder.CreateZExt(V: Res, DestTy: II->getType());
4060 return replaceInstUsesWith(I&: CI, V: Res);
4061 }
4062 }
4063 }
4064 [[fallthrough]];
4065 }
4066 case Intrinsic::vector_reduce_umin:
4067 case Intrinsic::vector_reduce_umax: {
4068 if (IID == Intrinsic::vector_reduce_umin ||
4069 IID == Intrinsic::vector_reduce_umax) {
4070 // UMin/UMax reduction over the vector with (potentially-extended)
4071 // i1 element type is actually a (potentially-extended)
4072 // logical `and`/`or` reduction over the original non-extended value:
4073 // vector_reduce_u{min,max}(?ext(<n x i1>))
4074 // -->
4075 // ?ext(vector_reduce_{and,or}(<n x i1>))
4076 Value *Arg = II->getArgOperand(i: 0);
4077 Value *Vect;
4078
4079 if (Value *NewOp =
4080 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4081 replaceUse(U&: II->getOperandUse(i: 0), NewValue: NewOp);
4082 return II;
4083 }
4084
4085 if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
4086 if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType()))
4087 if (VTy->getElementType() == Builder.getInt1Ty()) {
4088 Value *Res = IID == Intrinsic::vector_reduce_umin
4089 ? Builder.CreateAndReduce(Src: Vect)
4090 : Builder.CreateOrReduce(Src: Vect);
4091 if (Arg != Vect)
4092 Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res,
4093 DestTy: II->getType());
4094 return replaceInstUsesWith(I&: CI, V: Res);
4095 }
4096 }
4097 }
4098 [[fallthrough]];
4099 }
4100 case Intrinsic::vector_reduce_smin:
4101 case Intrinsic::vector_reduce_smax: {
4102 if (IID == Intrinsic::vector_reduce_smin ||
4103 IID == Intrinsic::vector_reduce_smax) {
4104 // SMin/SMax reduction over the vector with (potentially-extended)
4105 // i1 element type is actually a (potentially-extended)
4106 // logical `and`/`or` reduction over the original non-extended value:
4107 // vector_reduce_s{min,max}(<n x i1>)
4108 // -->
4109 // vector_reduce_{or,and}(<n x i1>)
4110 // and
4111 // vector_reduce_s{min,max}(sext(<n x i1>))
4112 // -->
4113 // sext(vector_reduce_{or,and}(<n x i1>))
4114 // and
4115 // vector_reduce_s{min,max}(zext(<n x i1>))
4116 // -->
4117 // zext(vector_reduce_{and,or}(<n x i1>))
4118 Value *Arg = II->getArgOperand(i: 0);
4119 Value *Vect;
4120
4121 if (Value *NewOp =
4122 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4123 replaceUse(U&: II->getOperandUse(i: 0), NewValue: NewOp);
4124 return II;
4125 }
4126
4127 if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
4128 if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType()))
4129 if (VTy->getElementType() == Builder.getInt1Ty()) {
4130 Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
4131 if (Arg != Vect)
4132 ExtOpc = cast<CastInst>(Val: Arg)->getOpcode();
4133 Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
4134 (ExtOpc == Instruction::CastOps::ZExt))
4135 ? Builder.CreateAndReduce(Src: Vect)
4136 : Builder.CreateOrReduce(Src: Vect);
4137 if (Arg != Vect)
4138 Res = Builder.CreateCast(Op: ExtOpc, V: Res, DestTy: II->getType());
4139 return replaceInstUsesWith(I&: CI, V: Res);
4140 }
4141 }
4142 }
4143 [[fallthrough]];
4144 }
4145 case Intrinsic::vector_reduce_fmax:
4146 case Intrinsic::vector_reduce_fmin:
4147 case Intrinsic::vector_reduce_fadd:
4148 case Intrinsic::vector_reduce_fmul: {
4149 bool CanReorderLanes = (IID != Intrinsic::vector_reduce_fadd &&
4150 IID != Intrinsic::vector_reduce_fmul) ||
4151 II->hasAllowReassoc();
4152 const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd ||
4153 IID == Intrinsic::vector_reduce_fmul)
4154 ? 1
4155 : 0;
4156 Value *Arg = II->getArgOperand(i: ArgIdx);
4157 if (Value *NewOp = simplifyReductionOperand(Arg, CanReorderLanes)) {
4158 replaceUse(U&: II->getOperandUse(i: ArgIdx), NewValue: NewOp);
4159 return nullptr;
4160 }
4161 break;
4162 }
4163 case Intrinsic::is_fpclass: {
4164 if (Instruction *I = foldIntrinsicIsFPClass(II&: *II))
4165 return I;
4166 break;
4167 }
4168 case Intrinsic::threadlocal_address: {
4169 Align MinAlign = getKnownAlignment(V: II->getArgOperand(i: 0), DL, CxtI: II, AC: &AC, DT: &DT);
4170 MaybeAlign Align = II->getRetAlign();
4171 if (MinAlign > Align.valueOrOne()) {
4172 II->addRetAttr(Attr: Attribute::getWithAlignment(Context&: II->getContext(), Alignment: MinAlign));
4173 return II;
4174 }
4175 break;
4176 }
4177 case Intrinsic::frexp: {
4178 Value *X;
4179 // The first result is idempotent with the added complication of the struct
4180 // return, and the second result is zero because the value is already
4181 // normalized.
4182 if (match(V: II->getArgOperand(i: 0), P: m_ExtractValue<0>(V: m_Value(V&: X)))) {
4183 if (match(V: X, P: m_Intrinsic<Intrinsic::frexp>(Op0: m_Value()))) {
4184 X = Builder.CreateInsertValue(
4185 Agg: X, Val: Constant::getNullValue(Ty: II->getType()->getStructElementType(N: 1)),
4186 Idxs: 1);
4187 return replaceInstUsesWith(I&: *II, V: X);
4188 }
4189 }
4190 break;
4191 }
4192 case Intrinsic::get_active_lane_mask: {
4193 const APInt *Op0, *Op1;
4194 if (match(V: II->getOperand(i_nocapture: 0), P: m_StrictlyPositive(V&: Op0)) &&
4195 match(V: II->getOperand(i_nocapture: 1), P: m_APInt(Res&: Op1))) {
4196 Type *OpTy = II->getOperand(i_nocapture: 0)->getType();
4197 return replaceInstUsesWith(
4198 I&: *II, V: Builder.CreateIntrinsic(
4199 RetTy: II->getType(), ID: Intrinsic::get_active_lane_mask,
4200 Args: {Constant::getNullValue(Ty: OpTy),
4201 ConstantInt::get(Ty: OpTy, V: Op1->usub_sat(RHS: *Op0))}));
4202 }
4203 break;
4204 }
4205 case Intrinsic::experimental_get_vector_length: {
4206 // get.vector.length(Cnt, MaxLanes) --> Cnt when Cnt <= MaxLanes
4207 unsigned BitWidth =
4208 std::max(a: II->getArgOperand(i: 0)->getType()->getScalarSizeInBits(),
4209 b: II->getType()->getScalarSizeInBits());
4210 ConstantRange Cnt =
4211 computeConstantRangeIncludingKnownBits(V: II->getArgOperand(i: 0), ForSigned: false,
4212 SQ: SQ.getWithInstruction(I: II))
4213 .zextOrTrunc(BitWidth);
4214 ConstantRange MaxLanes = cast<ConstantInt>(Val: II->getArgOperand(i: 1))
4215 ->getValue()
4216 .zextOrTrunc(width: Cnt.getBitWidth());
4217 if (cast<ConstantInt>(Val: II->getArgOperand(i: 2))->isOne())
4218 MaxLanes = MaxLanes.multiply(
4219 Other: getVScaleRange(F: II->getFunction(), BitWidth: Cnt.getBitWidth()));
4220
4221 if (Cnt.icmp(Pred: CmpInst::ICMP_ULE, Other: MaxLanes))
4222 return replaceInstUsesWith(
4223 I&: *II, V: Builder.CreateZExtOrTrunc(V: II->getArgOperand(i: 0), DestTy: II->getType()));
4224 return nullptr;
4225 }
4226 default: {
4227 // Handle target specific intrinsics
4228 std::optional<Instruction *> V = targetInstCombineIntrinsic(II&: *II);
4229 if (V)
4230 return *V;
4231 break;
4232 }
4233 }
4234
4235 // Try to fold intrinsic into select/phi operands. This is legal if:
4236 // * The intrinsic is speculatable.
4237 // * The operand is one of the following:
4238 // - a phi.
4239 // - a select with a scalar condition.
4240 // - a select with a vector condition and II is not a cross lane operation.
4241 if (isSafeToSpeculativelyExecuteWithVariableReplaced(I: &CI)) {
4242 for (Value *Op : II->args()) {
4243 if (auto *Sel = dyn_cast<SelectInst>(Val: Op)) {
4244 bool IsVectorCond = Sel->getCondition()->getType()->isVectorTy();
4245 if (IsVectorCond &&
4246 (!isNotCrossLaneOperation(I: II) || !II->getType()->isVectorTy()))
4247 continue;
4248 // Don't replace a scalar select with a more expensive vector select if
4249 // we can't simplify both arms of the select.
4250 bool SimplifyBothArms =
4251 !Op->getType()->isVectorTy() && II->getType()->isVectorTy();
4252 if (Instruction *R = FoldOpIntoSelect(
4253 Op&: *II, SI: Sel, /*FoldWithMultiUse=*/false, SimplifyBothArms))
4254 return R;
4255 }
4256 if (auto *Phi = dyn_cast<PHINode>(Val: Op))
4257 if (Instruction *R = foldOpIntoPhi(I&: *II, PN: Phi))
4258 return R;
4259 }
4260 }
4261
4262 if (Instruction *Shuf = foldShuffledIntrinsicOperands(II))
4263 return Shuf;
4264
4265 if (Value *Reverse = foldReversedIntrinsicOperands(II))
4266 return replaceInstUsesWith(I&: *II, V: Reverse);
4267
4268 if (Value *Res = foldIdempotentBinaryIntrinsicRecurrence(IC&: *this, II))
4269 return replaceInstUsesWith(I&: *II, V: Res);
4270
4271 // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
4272 // context, so it is handled in visitCallBase and we should trigger it.
4273 return visitCallBase(Call&: *II);
4274}
4275
4276// Fence instruction simplification
4277Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) {
4278 auto *NFI = dyn_cast<FenceInst>(Val: FI.getNextNode());
4279 // This check is solely here to handle arbitrary target-dependent syncscopes.
4280 // TODO: Can remove if does not matter in practice.
4281 if (NFI && FI.isIdenticalTo(I: NFI))
4282 return eraseInstFromFunction(I&: FI);
4283
4284 // Returns true if FI1 is identical or stronger fence than FI2.
4285 auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) {
4286 auto FI1SyncScope = FI1->getSyncScopeID();
4287 // Consider same scope, where scope is global or single-thread.
4288 if (FI1SyncScope != FI2->getSyncScopeID() ||
4289 (FI1SyncScope != SyncScope::System &&
4290 FI1SyncScope != SyncScope::SingleThread))
4291 return false;
4292
4293 return isAtLeastOrStrongerThan(AO: FI1->getOrdering(), Other: FI2->getOrdering());
4294 };
4295 if (NFI && isIdenticalOrStrongerFence(NFI, &FI))
4296 return eraseInstFromFunction(I&: FI);
4297
4298 if (auto *PFI = dyn_cast_or_null<FenceInst>(Val: FI.getPrevNode()))
4299 if (isIdenticalOrStrongerFence(PFI, &FI))
4300 return eraseInstFromFunction(I&: FI);
4301 return nullptr;
4302}
4303
4304// InvokeInst simplification
4305Instruction *InstCombinerImpl::visitInvokeInst(InvokeInst &II) {
4306 return visitCallBase(Call&: II);
4307}
4308
4309// CallBrInst simplification
4310Instruction *InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) {
4311 return visitCallBase(Call&: CBI);
4312}
4313
4314static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) {
4315 if (!CI->hasFnAttr(Kind: "modular-format"))
4316 return nullptr;
4317
4318 SmallVector<StringRef> Args(
4319 llvm::split(Str: CI->getFnAttr(Kind: "modular-format").getValueAsString(), Separator: ','));
4320 // TODO: Make use of the first two arguments
4321 unsigned FirstArgIdx;
4322 [[maybe_unused]] bool Error;
4323 Error = Args[2].getAsInteger(Radix: 10, Result&: FirstArgIdx);
4324 assert(!Error && "invalid first arg index");
4325 --FirstArgIdx;
4326 StringRef FnName = Args[3];
4327 StringRef ImplName = Args[4];
4328 ArrayRef<StringRef> AllAspects = ArrayRef<StringRef>(Args).drop_front(N: 5);
4329
4330 if (AllAspects.empty())
4331 return nullptr;
4332
4333 SmallVector<StringRef> NeededAspects;
4334 for (StringRef Aspect : AllAspects) {
4335 if (Aspect == "float") {
4336 if (llvm::any_of(
4337 Range: llvm::make_range(x: std::next(x: CI->arg_begin(), n: FirstArgIdx),
4338 y: CI->arg_end()),
4339 P: [](Value *V) { return V->getType()->isFloatingPointTy(); }))
4340 NeededAspects.push_back(Elt: "float");
4341 } else {
4342 // Unknown aspects are always considered to be needed.
4343 NeededAspects.push_back(Elt: Aspect);
4344 }
4345 }
4346
4347 if (NeededAspects.size() == AllAspects.size())
4348 return nullptr;
4349
4350 Module *M = CI->getModule();
4351 LLVMContext &Ctx = M->getContext();
4352 Function *Callee = CI->getCalledFunction();
4353 FunctionCallee ModularFn = M->getOrInsertFunction(
4354 Name: FnName, T: Callee->getFunctionType(),
4355 AttributeList: Callee->getAttributes().removeFnAttribute(C&: Ctx, Kind: "modular-format"));
4356 CallInst *New = cast<CallInst>(Val: CI->clone());
4357 New->setCalledFunction(ModularFn);
4358 New->removeFnAttr(Kind: "modular-format");
4359 B.Insert(I: New);
4360
4361 const auto ReferenceAspect = [&](StringRef Aspect) {
4362 SmallString<20> Name = ImplName;
4363 Name += '_';
4364 Name += Aspect;
4365 Function *RelocNoneFn =
4366 Intrinsic::getOrInsertDeclaration(M, id: Intrinsic::reloc_none);
4367 B.CreateCall(Callee: RelocNoneFn,
4368 Args: {MetadataAsValue::get(Context&: Ctx, MD: MDString::get(Context&: Ctx, Str: Name))});
4369 };
4370
4371 llvm::sort(C&: NeededAspects);
4372 for (StringRef Request : NeededAspects)
4373 ReferenceAspect(Request);
4374
4375 return New;
4376}
4377
4378Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
4379 if (!CI->getCalledFunction()) return nullptr;
4380
4381 // Skip optimizing notail and musttail calls so
4382 // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
4383 // LibCallSimplifier::optimizeCall should try to preserve tail calls though.
4384 if (CI->isMustTailCall() || CI->isNoTailCall())
4385 return nullptr;
4386
4387 auto InstCombineRAUW = [this](Instruction *From, Value *With) {
4388 replaceInstUsesWith(I&: *From, V: With);
4389 };
4390 auto InstCombineErase = [this](Instruction *I) {
4391 eraseInstFromFunction(I&: *I);
4392 };
4393 LibCallSimplifier Simplifier(DL, &TLI, &DT, &DC, &AC, ORE, BFI, PSI,
4394 InstCombineRAUW, InstCombineErase);
4395 if (Value *With = Simplifier.optimizeCall(CI, B&: Builder)) {
4396 ++NumSimplified;
4397 return CI->use_empty() ? CI : replaceInstUsesWith(I&: *CI, V: With);
4398 }
4399 if (Value *With = optimizeModularFormat(CI, B&: Builder)) {
4400 ++NumSimplified;
4401 return CI->use_empty() ? CI : replaceInstUsesWith(I&: *CI, V: With);
4402 }
4403
4404 return nullptr;
4405}
4406
4407static IntrinsicInst *findInitTrampolineFromAlloca(Value *TrampMem) {
4408 // Strip off at most one level of pointer casts, looking for an alloca. This
4409 // is good enough in practice and simpler than handling any number of casts.
4410 Value *Underlying = TrampMem->stripPointerCasts();
4411 if (Underlying != TrampMem &&
4412 (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
4413 return nullptr;
4414 if (!isa<AllocaInst>(Val: Underlying))
4415 return nullptr;
4416
4417 IntrinsicInst *InitTrampoline = nullptr;
4418 for (User *U : TrampMem->users()) {
4419 IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: U);
4420 if (!II)
4421 return nullptr;
4422 if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
4423 if (InitTrampoline)
4424 // More than one init_trampoline writes to this value. Give up.
4425 return nullptr;
4426 InitTrampoline = II;
4427 continue;
4428 }
4429 if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
4430 // Allow any number of calls to adjust.trampoline.
4431 continue;
4432 return nullptr;
4433 }
4434
4435 // No call to init.trampoline found.
4436 if (!InitTrampoline)
4437 return nullptr;
4438
4439 // Check that the alloca is being used in the expected way.
4440 if (InitTrampoline->getOperand(i_nocapture: 0) != TrampMem)
4441 return nullptr;
4442
4443 return InitTrampoline;
4444}
4445
4446static IntrinsicInst *findInitTrampolineFromBB(IntrinsicInst *AdjustTramp,
4447 Value *TrampMem) {
4448 // Visit all the previous instructions in the basic block, and try to find a
4449 // init.trampoline which has a direct path to the adjust.trampoline.
4450 for (BasicBlock::iterator I = AdjustTramp->getIterator(),
4451 E = AdjustTramp->getParent()->begin();
4452 I != E;) {
4453 Instruction *Inst = &*--I;
4454 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val&: I))
4455 if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
4456 II->getOperand(i_nocapture: 0) == TrampMem)
4457 return II;
4458 if (Inst->mayWriteToMemory())
4459 return nullptr;
4460 }
4461 return nullptr;
4462}
4463
4464// Given a call to llvm.adjust.trampoline, find and return the corresponding
4465// call to llvm.init.trampoline if the call to the trampoline can be optimized
4466// to a direct call to a function. Otherwise return NULL.
4467static IntrinsicInst *findInitTrampoline(Value *Callee) {
4468 Callee = Callee->stripPointerCasts();
4469 IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Val: Callee);
4470 if (!AdjustTramp ||
4471 AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
4472 return nullptr;
4473
4474 Value *TrampMem = AdjustTramp->getOperand(i_nocapture: 0);
4475
4476 if (IntrinsicInst *IT = findInitTrampolineFromAlloca(TrampMem))
4477 return IT;
4478 if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
4479 return IT;
4480 return nullptr;
4481}
4482
4483Instruction *InstCombinerImpl::foldPtrAuthIntrinsicCallee(CallBase &Call) {
4484 const Value *Callee = Call.getCalledOperand();
4485 const auto *IPC = dyn_cast<IntToPtrInst>(Val: Callee);
4486 if (!IPC || !IPC->isNoopCast(DL))
4487 return nullptr;
4488
4489 const auto *II = dyn_cast<IntrinsicInst>(Val: IPC->getOperand(i_nocapture: 0));
4490 if (!II)
4491 return nullptr;
4492
4493 Intrinsic::ID IIID = II->getIntrinsicID();
4494 if (IIID != Intrinsic::ptrauth_resign && IIID != Intrinsic::ptrauth_sign)
4495 return nullptr;
4496
4497 // Isolate the ptrauth bundle from the others.
4498 std::optional<OperandBundleUse> PtrAuthBundleOrNone;
4499 SmallVector<OperandBundleDef, 2> NewBundles;
4500 for (unsigned BI = 0, BE = Call.getNumOperandBundles(); BI != BE; ++BI) {
4501 OperandBundleUse Bundle = Call.getOperandBundleAt(Index: BI);
4502 if (Bundle.getTagID() == LLVMContext::OB_ptrauth)
4503 PtrAuthBundleOrNone = Bundle;
4504 else
4505 NewBundles.emplace_back(Args&: Bundle);
4506 }
4507
4508 if (!PtrAuthBundleOrNone)
4509 return nullptr;
4510
4511 Value *NewCallee = nullptr;
4512 switch (IIID) {
4513 // call(ptrauth.resign(p)), ["ptrauth"()] -> call p, ["ptrauth"()]
4514 // assuming the call bundle and the sign operands match.
4515 case Intrinsic::ptrauth_resign: {
4516 // Resign result key should match bundle.
4517 if (II->getOperand(i_nocapture: 3) != PtrAuthBundleOrNone->Inputs[0])
4518 return nullptr;
4519 // Resign result discriminator should match bundle.
4520 if (II->getOperand(i_nocapture: 4) != PtrAuthBundleOrNone->Inputs[1])
4521 return nullptr;
4522
4523 // Resign input (auth) key should also match: we can't change the key on
4524 // the new call we're generating, because we don't know what keys are valid.
4525 if (II->getOperand(i_nocapture: 1) != PtrAuthBundleOrNone->Inputs[0])
4526 return nullptr;
4527
4528 Value *NewBundleOps[] = {II->getOperand(i_nocapture: 1), II->getOperand(i_nocapture: 2)};
4529 NewBundles.emplace_back(Args: "ptrauth", Args&: NewBundleOps);
4530 NewCallee = II->getOperand(i_nocapture: 0);
4531 break;
4532 }
4533
4534 // call(ptrauth.sign(p)), ["ptrauth"()] -> call p
4535 // assuming the call bundle and the sign operands match.
4536 // Non-ptrauth indirect calls are undesirable, but so is ptrauth.sign.
4537 case Intrinsic::ptrauth_sign: {
4538 // Sign key should match bundle.
4539 if (II->getOperand(i_nocapture: 1) != PtrAuthBundleOrNone->Inputs[0])
4540 return nullptr;
4541 // Sign discriminator should match bundle.
4542 if (II->getOperand(i_nocapture: 2) != PtrAuthBundleOrNone->Inputs[1])
4543 return nullptr;
4544 NewCallee = II->getOperand(i_nocapture: 0);
4545 break;
4546 }
4547 default:
4548 llvm_unreachable("unexpected intrinsic ID");
4549 }
4550
4551 if (!NewCallee)
4552 return nullptr;
4553
4554 NewCallee = Builder.CreateBitOrPointerCast(V: NewCallee, DestTy: Callee->getType());
4555 CallBase *NewCall = CallBase::Create(CB: &Call, Bundles: NewBundles);
4556 NewCall->setCalledOperand(NewCallee);
4557 return NewCall;
4558}
4559
4560Instruction *InstCombinerImpl::foldPtrAuthConstantCallee(CallBase &Call) {
4561 auto *CPA = dyn_cast<ConstantPtrAuth>(Val: Call.getCalledOperand());
4562 if (!CPA)
4563 return nullptr;
4564
4565 auto *CalleeF = dyn_cast<Function>(Val: CPA->getPointer());
4566 // If the ptrauth constant isn't based on a function pointer, bail out.
4567 if (!CalleeF)
4568 return nullptr;
4569
4570 // Inspect the call ptrauth bundle to check it matches the ptrauth constant.
4571 auto PAB = Call.getOperandBundle(ID: LLVMContext::OB_ptrauth);
4572 if (!PAB)
4573 return nullptr;
4574
4575 auto *Key = cast<ConstantInt>(Val: PAB->Inputs[0]);
4576 Value *Discriminator = PAB->Inputs[1];
4577
4578 // If the bundle doesn't match, this is probably going to fail to auth.
4579 if (!CPA->isKnownCompatibleWith(Key, Discriminator, DL))
4580 return nullptr;
4581
4582 // If the bundle matches the constant, proceed in making this a direct call.
4583 auto *NewCall = CallBase::removeOperandBundle(CB: &Call, ID: LLVMContext::OB_ptrauth);
4584 NewCall->setCalledOperand(CalleeF);
4585 return NewCall;
4586}
4587
4588bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
4589 const TargetLibraryInfo *TLI) {
4590 // Note: We only handle cases which can't be driven from generic attributes
4591 // here. So, for example, nonnull and noalias (which are common properties
4592 // of some allocation functions) are expected to be handled via annotation
4593 // of the respective allocator declaration with generic attributes.
4594 bool Changed = false;
4595
4596 if (!Call.getType()->isPointerTy())
4597 return Changed;
4598
4599 std::optional<APInt> Size = getAllocSize(CB: &Call, TLI);
4600 if (Size && *Size != 0) {
4601 // TODO: We really should just emit deref_or_null here and then
4602 // let the generic inference code combine that with nonnull.
4603 if (Call.hasRetAttr(Kind: Attribute::NonNull)) {
4604 Changed = !Call.hasRetAttr(Kind: Attribute::Dereferenceable);
4605 Call.addRetAttr(Attr: Attribute::getWithDereferenceableBytes(
4606 Context&: Call.getContext(), Bytes: Size->getLimitedValue()));
4607 } else {
4608 Changed = !Call.hasRetAttr(Kind: Attribute::DereferenceableOrNull);
4609 Call.addRetAttr(Attr: Attribute::getWithDereferenceableOrNullBytes(
4610 Context&: Call.getContext(), Bytes: Size->getLimitedValue()));
4611 }
4612 }
4613
4614 // Add alignment attribute if alignment is a power of two constant.
4615 Value *Alignment = getAllocAlignment(V: &Call, TLI);
4616 if (!Alignment)
4617 return Changed;
4618
4619 ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Val: Alignment);
4620 if (AlignOpC && AlignOpC->getValue().ult(RHS: llvm::Value::MaximumAlignment)) {
4621 uint64_t AlignmentVal = AlignOpC->getZExtValue();
4622 if (llvm::isPowerOf2_64(Value: AlignmentVal)) {
4623 Align ExistingAlign = Call.getRetAlign().valueOrOne();
4624 Align NewAlign = Align(AlignmentVal);
4625 if (NewAlign > ExistingAlign) {
4626 Call.addRetAttr(
4627 Attr: Attribute::getWithAlignment(Context&: Call.getContext(), Alignment: NewAlign));
4628 Changed = true;
4629 }
4630 }
4631 }
4632 return Changed;
4633}
4634
4635/// Improvements for call, callbr and invoke instructions.
4636Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
4637 bool Changed = annotateAnyAllocSite(Call, TLI: &TLI);
4638
4639 // Mark any parameters that are known to be non-null with the nonnull
4640 // attribute. This is helpful for inlining calls to functions with null
4641 // checks on their arguments.
4642 SmallVector<unsigned, 4> ArgNos;
4643 unsigned ArgNo = 0;
4644
4645 for (Value *V : Call.args()) {
4646 if (V->getType()->isPointerTy()) {
4647 // Simplify the nonnull operand if the parameter is known to be nonnull.
4648 // Otherwise, try to infer nonnull for it.
4649 bool HasDereferenceable = Call.getParamDereferenceableBytes(i: ArgNo) > 0;
4650 if (Call.paramHasAttr(ArgNo, Kind: Attribute::NonNull) ||
4651 (HasDereferenceable &&
4652 !NullPointerIsDefined(F: Call.getFunction(),
4653 AS: V->getType()->getPointerAddressSpace()))) {
4654 if (Value *Res = simplifyNonNullOperand(V, HasDereferenceable)) {
4655 replaceOperand(I&: Call, OpNum: ArgNo, V: Res);
4656 Changed = true;
4657 }
4658 } else if (isKnownNonZero(V,
4659 Q: getSimplifyQuery().getWithInstruction(I: &Call))) {
4660 ArgNos.push_back(Elt: ArgNo);
4661 }
4662 }
4663 ArgNo++;
4664 }
4665
4666 assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
4667
4668 if (!ArgNos.empty()) {
4669 AttributeList AS = Call.getAttributes();
4670 LLVMContext &Ctx = Call.getContext();
4671 AS = AS.addParamAttribute(C&: Ctx, ArgNos,
4672 A: Attribute::get(Context&: Ctx, Kind: Attribute::NonNull));
4673 Call.setAttributes(AS);
4674 Changed = true;
4675 }
4676
4677 // If the callee is a pointer to a function, attempt to move any casts to the
4678 // arguments of the call/callbr/invoke.
4679 Value *Callee = Call.getCalledOperand();
4680 Function *CalleeF = dyn_cast<Function>(Val: Callee);
4681 if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) &&
4682 transformConstExprCastCall(Call))
4683 return nullptr;
4684
4685 if (CalleeF) {
4686 // Remove the convergent attr on calls when the callee is not convergent.
4687 if (Call.isConvergent() && !CalleeF->isConvergent() &&
4688 !CalleeF->isIntrinsic()) {
4689 LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
4690 << "\n");
4691 Call.setNotConvergent();
4692 return &Call;
4693 }
4694
4695 // If the call and callee calling conventions don't match, and neither one
4696 // of the calling conventions is compatible with C calling convention
4697 // this call must be unreachable, as the call is undefined.
4698 if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
4699 !(CalleeF->getCallingConv() == llvm::CallingConv::C &&
4700 TargetLibraryInfoImpl::isCallingConvCCompatible(CI: &Call)) &&
4701 !(Call.getCallingConv() == llvm::CallingConv::C &&
4702 TargetLibraryInfoImpl::isCallingConvCCompatible(Callee: CalleeF))) &&
4703 // Only do this for calls to a function with a body. A prototype may
4704 // not actually end up matching the implementation's calling conv for a
4705 // variety of reasons (e.g. it may be written in assembly).
4706 !CalleeF->isDeclaration()) {
4707 Instruction *OldCall = &Call;
4708 CreateNonTerminatorUnreachable(InsertAt: OldCall);
4709 // If OldCall does not return void then replaceInstUsesWith poison.
4710 // This allows ValueHandlers and custom metadata to adjust itself.
4711 if (!OldCall->getType()->isVoidTy())
4712 replaceInstUsesWith(I&: *OldCall, V: PoisonValue::get(T: OldCall->getType()));
4713 if (isa<CallInst>(Val: OldCall))
4714 return eraseInstFromFunction(I&: *OldCall);
4715
4716 // We cannot remove an invoke or a callbr, because it would change thexi
4717 // CFG, just change the callee to a null pointer.
4718 cast<CallBase>(Val: OldCall)->setCalledFunction(
4719 FTy: CalleeF->getFunctionType(),
4720 Fn: Constant::getNullValue(Ty: CalleeF->getType()));
4721 return nullptr;
4722 }
4723 }
4724
4725 // Calling a null function pointer is undefined if a null address isn't
4726 // dereferenceable.
4727 if ((isa<ConstantPointerNull>(Val: Callee) &&
4728 !NullPointerIsDefined(F: Call.getFunction())) ||
4729 isa<UndefValue>(Val: Callee)) {
4730 // If Call does not return void then replaceInstUsesWith poison.
4731 // This allows ValueHandlers and custom metadata to adjust itself.
4732 if (!Call.getType()->isVoidTy())
4733 replaceInstUsesWith(I&: Call, V: PoisonValue::get(T: Call.getType()));
4734
4735 if (Call.isTerminator()) {
4736 // Can't remove an invoke or callbr because we cannot change the CFG.
4737 return nullptr;
4738 }
4739
4740 // This instruction is not reachable, just remove it.
4741 CreateNonTerminatorUnreachable(InsertAt: &Call);
4742 return eraseInstFromFunction(I&: Call);
4743 }
4744
4745 if (IntrinsicInst *II = findInitTrampoline(Callee))
4746 return transformCallThroughTrampoline(Call, Tramp&: *II);
4747
4748 // Combine calls involving pointer authentication intrinsics.
4749 if (Instruction *NewCall = foldPtrAuthIntrinsicCallee(Call))
4750 return NewCall;
4751
4752 // Combine calls to ptrauth constants.
4753 if (Instruction *NewCall = foldPtrAuthConstantCallee(Call))
4754 return NewCall;
4755
4756 if (isa<InlineAsm>(Val: Callee) && !Call.doesNotThrow()) {
4757 InlineAsm *IA = cast<InlineAsm>(Val: Callee);
4758 if (!IA->canThrow()) {
4759 // Normal inline asm calls cannot throw - mark them
4760 // 'nounwind'.
4761 Call.setDoesNotThrow();
4762 Changed = true;
4763 }
4764 }
4765
4766 // Try to optimize the call if possible, we require DataLayout for most of
4767 // this. None of these calls are seen as possibly dead so go ahead and
4768 // delete the instruction now.
4769 if (CallInst *CI = dyn_cast<CallInst>(Val: &Call)) {
4770 Instruction *I = tryOptimizeCall(CI);
4771 // If we changed something return the result, etc. Otherwise let
4772 // the fallthrough check.
4773 if (I) return eraseInstFromFunction(I&: *I);
4774 }
4775
4776 if (!Call.use_empty() && !Call.isMustTailCall())
4777 if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
4778 Type *CallTy = Call.getType();
4779 Type *RetArgTy = ReturnedArg->getType();
4780 if (RetArgTy->canLosslesslyBitCastTo(Ty: CallTy))
4781 return replaceInstUsesWith(
4782 I&: Call, V: Builder.CreateBitOrPointerCast(V: ReturnedArg, DestTy: CallTy));
4783 }
4784
4785 // Drop unnecessary callee_type metadata from calls that were converted
4786 // into direct calls.
4787 if (Call.getMetadata(KindID: LLVMContext::MD_callee_type) && !Call.isIndirectCall()) {
4788 Call.setMetadata(KindID: LLVMContext::MD_callee_type, Node: nullptr);
4789 Changed = true;
4790 }
4791
4792 // Drop unnecessary kcfi operand bundles from calls that were converted
4793 // into direct calls.
4794 auto Bundle = Call.getOperandBundle(ID: LLVMContext::OB_kcfi);
4795 if (Bundle && !Call.isIndirectCall()) {
4796 DEBUG_WITH_TYPE(DEBUG_TYPE "-kcfi", {
4797 if (CalleeF) {
4798 ConstantInt *FunctionType = nullptr;
4799 ConstantInt *ExpectedType = cast<ConstantInt>(Bundle->Inputs[0]);
4800
4801 if (MDNode *MD = CalleeF->getMetadata(LLVMContext::MD_kcfi_type))
4802 FunctionType = mdconst::extract<ConstantInt>(MD->getOperand(0));
4803
4804 if (FunctionType &&
4805 FunctionType->getZExtValue() != ExpectedType->getZExtValue())
4806 dbgs() << Call.getModule()->getName()
4807 << ": warning: kcfi: " << Call.getCaller()->getName()
4808 << ": call to " << CalleeF->getName()
4809 << " using a mismatching function pointer type\n";
4810 }
4811 });
4812
4813 return CallBase::removeOperandBundle(CB: &Call, ID: LLVMContext::OB_kcfi);
4814 }
4815
4816 if (isRemovableAlloc(V: &Call, TLI: &TLI))
4817 return visitAllocSite(FI&: Call);
4818
4819 // Handle intrinsics which can be used in both call and invoke context.
4820 switch (Call.getIntrinsicID()) {
4821 case Intrinsic::experimental_gc_statepoint: {
4822 GCStatepointInst &GCSP = *cast<GCStatepointInst>(Val: &Call);
4823 SmallPtrSet<Value *, 32> LiveGcValues;
4824 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4825 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4826
4827 // Remove the relocation if unused.
4828 if (GCR.use_empty()) {
4829 eraseInstFromFunction(I&: GCR);
4830 continue;
4831 }
4832
4833 Value *DerivedPtr = GCR.getDerivedPtr();
4834 Value *BasePtr = GCR.getBasePtr();
4835
4836 // Undef is undef, even after relocation.
4837 if (isa<UndefValue>(Val: DerivedPtr) || isa<UndefValue>(Val: BasePtr)) {
4838 replaceInstUsesWith(I&: GCR, V: UndefValue::get(T: GCR.getType()));
4839 eraseInstFromFunction(I&: GCR);
4840 continue;
4841 }
4842
4843 if (auto *PT = dyn_cast<PointerType>(Val: GCR.getType())) {
4844 // The relocation of null will be null for most any collector.
4845 // TODO: provide a hook for this in GCStrategy. There might be some
4846 // weird collector this property does not hold for.
4847 if (isa<ConstantPointerNull>(Val: DerivedPtr)) {
4848 // Use null-pointer of gc_relocate's type to replace it.
4849 replaceInstUsesWith(I&: GCR, V: ConstantPointerNull::get(T: PT));
4850 eraseInstFromFunction(I&: GCR);
4851 continue;
4852 }
4853
4854 // isKnownNonNull -> nonnull attribute
4855 if (!GCR.hasRetAttr(Kind: Attribute::NonNull) &&
4856 isKnownNonZero(V: DerivedPtr,
4857 Q: getSimplifyQuery().getWithInstruction(I: &Call))) {
4858 GCR.addRetAttr(Kind: Attribute::NonNull);
4859 // We discovered new fact, re-check users.
4860 Worklist.pushUsersToWorkList(I&: GCR);
4861 }
4862 }
4863
4864 // If we have two copies of the same pointer in the statepoint argument
4865 // list, canonicalize to one. This may let us common gc.relocates.
4866 if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
4867 GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
4868 auto *OpIntTy = GCR.getOperand(i_nocapture: 2)->getType();
4869 GCR.setOperand(i_nocapture: 2, Val_nocapture: ConstantInt::get(Ty: OpIntTy, V: GCR.getBasePtrIndex()));
4870 }
4871
4872 // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
4873 // Canonicalize on the type from the uses to the defs
4874
4875 // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
4876 LiveGcValues.insert(Ptr: BasePtr);
4877 LiveGcValues.insert(Ptr: DerivedPtr);
4878 }
4879 std::optional<OperandBundleUse> Bundle =
4880 GCSP.getOperandBundle(ID: LLVMContext::OB_gc_live);
4881 unsigned NumOfGCLives = LiveGcValues.size();
4882 if (!Bundle || NumOfGCLives == Bundle->Inputs.size())
4883 break;
4884 // We can reduce the size of gc live bundle.
4885 DenseMap<Value *, unsigned> Val2Idx;
4886 std::vector<Value *> NewLiveGc;
4887 for (Value *V : Bundle->Inputs) {
4888 auto [It, Inserted] = Val2Idx.try_emplace(Key: V);
4889 if (!Inserted)
4890 continue;
4891 if (LiveGcValues.count(Ptr: V)) {
4892 It->second = NewLiveGc.size();
4893 NewLiveGc.push_back(x: V);
4894 } else
4895 It->second = NumOfGCLives;
4896 }
4897 // Update all gc.relocates
4898 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4899 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4900 Value *BasePtr = GCR.getBasePtr();
4901 assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
4902 "Missed live gc for base pointer");
4903 auto *OpIntTy1 = GCR.getOperand(i_nocapture: 1)->getType();
4904 GCR.setOperand(i_nocapture: 1, Val_nocapture: ConstantInt::get(Ty: OpIntTy1, V: Val2Idx[BasePtr]));
4905 Value *DerivedPtr = GCR.getDerivedPtr();
4906 assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
4907 "Missed live gc for derived pointer");
4908 auto *OpIntTy2 = GCR.getOperand(i_nocapture: 2)->getType();
4909 GCR.setOperand(i_nocapture: 2, Val_nocapture: ConstantInt::get(Ty: OpIntTy2, V: Val2Idx[DerivedPtr]));
4910 }
4911 // Create new statepoint instruction.
4912 OperandBundleDef NewBundle("gc-live", std::move(NewLiveGc));
4913 return CallBase::Create(CB: &Call, Bundle: NewBundle);
4914 }
4915 default: { break; }
4916 }
4917
4918 return Changed ? &Call : nullptr;
4919}
4920
4921/// If the callee is a constexpr cast of a function, attempt to move the cast to
4922/// the arguments of the call/invoke.
4923/// CallBrInst is not supported.
4924bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
4925 auto *Callee =
4926 dyn_cast<Function>(Val: Call.getCalledOperand()->stripPointerCasts());
4927 if (!Callee)
4928 return false;
4929
4930 assert(!isa<CallBrInst>(Call) &&
4931 "CallBr's don't have a single point after a def to insert at");
4932
4933 // Don't perform the transform for declarations, which may not be fully
4934 // accurate. For example, void @foo() is commonly used as a placeholder for
4935 // unknown prototypes.
4936 if (Callee->isDeclaration())
4937 return false;
4938
4939 // If this is a call to a thunk function, don't remove the cast. Thunks are
4940 // used to transparently forward all incoming parameters and outgoing return
4941 // values, so it's important to leave the cast in place.
4942 if (Callee->hasFnAttribute(Kind: "thunk"))
4943 return false;
4944
4945 // If this is a call to a naked function, the assembly might be
4946 // using an argument, or otherwise rely on the frame layout,
4947 // the function prototype will mismatch.
4948 if (Callee->hasFnAttribute(Kind: Attribute::Naked))
4949 return false;
4950
4951 // If this is a musttail call, the callee's prototype must match the caller's
4952 // prototype with the exception of pointee types. The code below doesn't
4953 // implement that, so we can't do this transform.
4954 // TODO: Do the transform if it only requires adding pointer casts.
4955 if (Call.isMustTailCall())
4956 return false;
4957
4958 Instruction *Caller = &Call;
4959 const AttributeList &CallerPAL = Call.getAttributes();
4960
4961 // Okay, this is a cast from a function to a different type. Unless doing so
4962 // would cause a type conversion of one of our arguments, change this call to
4963 // be a direct call with arguments casted to the appropriate types.
4964 FunctionType *FT = Callee->getFunctionType();
4965 Type *OldRetTy = Caller->getType();
4966 Type *NewRetTy = FT->getReturnType();
4967
4968 // Check to see if we are changing the return type...
4969 if (OldRetTy != NewRetTy) {
4970
4971 if (NewRetTy->isStructTy())
4972 return false; // TODO: Handle multiple return values.
4973
4974 if (!CastInst::isBitOrNoopPointerCastable(SrcTy: NewRetTy, DestTy: OldRetTy, DL)) {
4975 if (!Caller->use_empty())
4976 return false; // Cannot transform this return value.
4977 }
4978
4979 if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
4980 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
4981 if (RAttrs.overlaps(AM: AttributeFuncs::typeIncompatible(
4982 Ty: NewRetTy, AS: CallerPAL.getRetAttrs())))
4983 return false; // Attribute not compatible with transformed value.
4984 }
4985
4986 // If the callbase is an invoke instruction, and the return value is
4987 // used by a PHI node in a successor, we cannot change the return type of
4988 // the call because there is no place to put the cast instruction (without
4989 // breaking the critical edge). Bail out in this case.
4990 if (!Caller->use_empty()) {
4991 BasicBlock *PhisNotSupportedBlock = nullptr;
4992 if (auto *II = dyn_cast<InvokeInst>(Val: Caller))
4993 PhisNotSupportedBlock = II->getNormalDest();
4994 if (PhisNotSupportedBlock)
4995 for (User *U : Caller->users())
4996 if (PHINode *PN = dyn_cast<PHINode>(Val: U))
4997 if (PN->getParent() == PhisNotSupportedBlock)
4998 return false;
4999 }
5000 }
5001
5002 unsigned NumActualArgs = Call.arg_size();
5003 unsigned NumCommonArgs = std::min(a: FT->getNumParams(), b: NumActualArgs);
5004
5005 // Prevent us turning:
5006 // declare void @takes_i32_inalloca(i32* inalloca)
5007 // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
5008 //
5009 // into:
5010 // call void @takes_i32_inalloca(i32* null)
5011 //
5012 // Similarly, avoid folding away bitcasts of byval calls.
5013 if (Callee->getAttributes().hasAttrSomewhere(Kind: Attribute::InAlloca) ||
5014 Callee->getAttributes().hasAttrSomewhere(Kind: Attribute::Preallocated))
5015 return false;
5016
5017 auto AI = Call.arg_begin();
5018 for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
5019 Type *ParamTy = FT->getParamType(i);
5020 Type *ActTy = (*AI)->getType();
5021
5022 if (!CastInst::isBitOrNoopPointerCastable(SrcTy: ActTy, DestTy: ParamTy, DL))
5023 return false; // Cannot transform this parameter value.
5024
5025 // Check if there are any incompatible attributes we cannot drop safely.
5026 if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(ArgNo: i))
5027 .overlaps(AM: AttributeFuncs::typeIncompatible(
5028 Ty: ParamTy, AS: CallerPAL.getParamAttrs(ArgNo: i),
5029 ASK: AttributeFuncs::ASK_UNSAFE_TO_DROP)))
5030 return false; // Attribute not compatible with transformed value.
5031
5032 if (Call.isInAllocaArgument(ArgNo: i) ||
5033 CallerPAL.hasParamAttr(ArgNo: i, Kind: Attribute::Preallocated))
5034 return false; // Cannot transform to and from inalloca/preallocated.
5035
5036 if (CallerPAL.hasParamAttr(ArgNo: i, Kind: Attribute::SwiftError))
5037 return false;
5038
5039 if (CallerPAL.hasParamAttr(ArgNo: i, Kind: Attribute::ByVal) !=
5040 Callee->getAttributes().hasParamAttr(ArgNo: i, Kind: Attribute::ByVal))
5041 return false; // Cannot transform to or from byval.
5042 }
5043
5044 if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
5045 !CallerPAL.isEmpty()) {
5046 // In this case we have more arguments than the new function type, but we
5047 // won't be dropping them. Check that these extra arguments have attributes
5048 // that are compatible with being a vararg call argument.
5049 unsigned SRetIdx;
5050 if (CallerPAL.hasAttrSomewhere(Kind: Attribute::StructRet, Index: &SRetIdx) &&
5051 SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
5052 return false;
5053 }
5054
5055 // Okay, we decided that this is a safe thing to do: go ahead and start
5056 // inserting cast instructions as necessary.
5057 SmallVector<Value *, 8> Args;
5058 SmallVector<AttributeSet, 8> ArgAttrs;
5059 Args.reserve(N: NumActualArgs);
5060 ArgAttrs.reserve(N: NumActualArgs);
5061
5062 // Get any return attributes.
5063 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5064
5065 // If the return value is not being used, the type may not be compatible
5066 // with the existing attributes. Wipe out any problematic attributes.
5067 RAttrs.remove(
5068 AM: AttributeFuncs::typeIncompatible(Ty: NewRetTy, AS: CallerPAL.getRetAttrs()));
5069
5070 LLVMContext &Ctx = Call.getContext();
5071 AI = Call.arg_begin();
5072 for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
5073 Type *ParamTy = FT->getParamType(i);
5074
5075 Value *NewArg = *AI;
5076 if ((*AI)->getType() != ParamTy)
5077 NewArg = Builder.CreateBitOrPointerCast(V: *AI, DestTy: ParamTy);
5078 Args.push_back(Elt: NewArg);
5079
5080 // Add any parameter attributes except the ones incompatible with the new
5081 // type. Note that we made sure all incompatible ones are safe to drop.
5082 AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
5083 Ty: ParamTy, AS: CallerPAL.getParamAttrs(ArgNo: i), ASK: AttributeFuncs::ASK_SAFE_TO_DROP);
5084 ArgAttrs.push_back(
5085 Elt: CallerPAL.getParamAttrs(ArgNo: i).removeAttributes(C&: Ctx, AttrsToRemove: IncompatibleAttrs));
5086 }
5087
5088 // If the function takes more arguments than the call was taking, add them
5089 // now.
5090 for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
5091 Args.push_back(Elt: Constant::getNullValue(Ty: FT->getParamType(i)));
5092 ArgAttrs.push_back(Elt: AttributeSet());
5093 }
5094
5095 // If we are removing arguments to the function, emit an obnoxious warning.
5096 if (FT->getNumParams() < NumActualArgs) {
5097 // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
5098 if (FT->isVarArg()) {
5099 // Add all of the arguments in their promoted form to the arg list.
5100 for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
5101 Type *PTy = getPromotedType(Ty: (*AI)->getType());
5102 Value *NewArg = *AI;
5103 if (PTy != (*AI)->getType()) {
5104 // Must promote to pass through va_arg area!
5105 Instruction::CastOps opcode =
5106 CastInst::getCastOpcode(Val: *AI, SrcIsSigned: false, Ty: PTy, DstIsSigned: false);
5107 NewArg = Builder.CreateCast(Op: opcode, V: *AI, DestTy: PTy);
5108 }
5109 Args.push_back(Elt: NewArg);
5110
5111 // Add any parameter attributes.
5112 ArgAttrs.push_back(Elt: CallerPAL.getParamAttrs(ArgNo: i));
5113 }
5114 }
5115 }
5116
5117 AttributeSet FnAttrs = CallerPAL.getFnAttrs();
5118
5119 if (NewRetTy->isVoidTy())
5120 Caller->setName(""); // Void type should not have a name.
5121
5122 assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
5123 "missing argument attributes");
5124 AttributeList NewCallerPAL = AttributeList::get(
5125 C&: Ctx, FnAttrs, RetAttrs: AttributeSet::get(C&: Ctx, B: RAttrs), ArgAttrs);
5126
5127 SmallVector<OperandBundleDef, 1> OpBundles;
5128 Call.getOperandBundlesAsDefs(Defs&: OpBundles);
5129
5130 CallBase *NewCall;
5131 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: Caller)) {
5132 NewCall = Builder.CreateInvoke(Callee, NormalDest: II->getNormalDest(),
5133 UnwindDest: II->getUnwindDest(), Args, OpBundles);
5134 } else {
5135 NewCall = Builder.CreateCall(Callee, Args, OpBundles);
5136 cast<CallInst>(Val: NewCall)->setTailCallKind(
5137 cast<CallInst>(Val: Caller)->getTailCallKind());
5138 }
5139 NewCall->takeName(V: Caller);
5140 NewCall->setCallingConv(Call.getCallingConv());
5141 NewCall->setAttributes(NewCallerPAL);
5142
5143 // Preserve prof metadata if any.
5144 NewCall->copyMetadata(SrcInst: *Caller, WL: {LLVMContext::MD_prof});
5145
5146 // Insert a cast of the return type as necessary.
5147 Instruction *NC = NewCall;
5148 Value *NV = NC;
5149 if (OldRetTy != NV->getType() && !Caller->use_empty()) {
5150 assert(!NV->getType()->isVoidTy());
5151 NV = NC = CastInst::CreateBitOrPointerCast(S: NC, Ty: OldRetTy);
5152 NC->setDebugLoc(Caller->getDebugLoc());
5153
5154 auto OptInsertPt = NewCall->getInsertionPointAfterDef();
5155 assert(OptInsertPt && "No place to insert cast");
5156 InsertNewInstBefore(New: NC, Old: *OptInsertPt);
5157 Worklist.pushUsersToWorkList(I&: *Caller);
5158 }
5159
5160 if (!Caller->use_empty())
5161 replaceInstUsesWith(I&: *Caller, V: NV);
5162 else if (Caller->hasValueHandle()) {
5163 if (OldRetTy == NV->getType())
5164 ValueHandleBase::ValueIsRAUWd(Old: Caller, New: NV);
5165 else
5166 // We cannot call ValueIsRAUWd with a different type, and the
5167 // actual tracked value will disappear.
5168 ValueHandleBase::ValueIsDeleted(V: Caller);
5169 }
5170
5171 eraseInstFromFunction(I&: *Caller);
5172 return true;
5173}
5174
5175/// Turn a call to a function created by init_trampoline / adjust_trampoline
5176/// intrinsic pair into a direct call to the underlying function.
5177Instruction *
5178InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
5179 IntrinsicInst &Tramp) {
5180 FunctionType *FTy = Call.getFunctionType();
5181 AttributeList Attrs = Call.getAttributes();
5182
5183 // If the call already has the 'nest' attribute somewhere then give up -
5184 // otherwise 'nest' would occur twice after splicing in the chain.
5185 if (Attrs.hasAttrSomewhere(Kind: Attribute::Nest))
5186 return nullptr;
5187
5188 Function *NestF = cast<Function>(Val: Tramp.getArgOperand(i: 1)->stripPointerCasts());
5189 FunctionType *NestFTy = NestF->getFunctionType();
5190
5191 AttributeList NestAttrs = NestF->getAttributes();
5192 if (!NestAttrs.isEmpty()) {
5193 unsigned NestArgNo = 0;
5194 Type *NestTy = nullptr;
5195 AttributeSet NestAttr;
5196
5197 // Look for a parameter marked with the 'nest' attribute.
5198 for (FunctionType::param_iterator I = NestFTy->param_begin(),
5199 E = NestFTy->param_end();
5200 I != E; ++NestArgNo, ++I) {
5201 AttributeSet AS = NestAttrs.getParamAttrs(ArgNo: NestArgNo);
5202 if (AS.hasAttribute(Kind: Attribute::Nest)) {
5203 // Record the parameter type and any other attributes.
5204 NestTy = *I;
5205 NestAttr = AS;
5206 break;
5207 }
5208 }
5209
5210 if (NestTy) {
5211 std::vector<Value*> NewArgs;
5212 std::vector<AttributeSet> NewArgAttrs;
5213 NewArgs.reserve(n: Call.arg_size() + 1);
5214 NewArgAttrs.reserve(n: Call.arg_size());
5215
5216 // Insert the nest argument into the call argument list, which may
5217 // mean appending it. Likewise for attributes.
5218
5219 {
5220 unsigned ArgNo = 0;
5221 auto I = Call.arg_begin(), E = Call.arg_end();
5222 do {
5223 if (ArgNo == NestArgNo) {
5224 // Add the chain argument and attributes.
5225 Value *NestVal = Tramp.getArgOperand(i: 2);
5226 if (NestVal->getType() != NestTy)
5227 NestVal = Builder.CreateBitCast(V: NestVal, DestTy: NestTy, Name: "nest");
5228 NewArgs.push_back(x: NestVal);
5229 NewArgAttrs.push_back(x: NestAttr);
5230 }
5231
5232 if (I == E)
5233 break;
5234
5235 // Add the original argument and attributes.
5236 NewArgs.push_back(x: *I);
5237 NewArgAttrs.push_back(x: Attrs.getParamAttrs(ArgNo));
5238
5239 ++ArgNo;
5240 ++I;
5241 } while (true);
5242 }
5243
5244 // The trampoline may have been bitcast to a bogus type (FTy).
5245 // Handle this by synthesizing a new function type, equal to FTy
5246 // with the chain parameter inserted.
5247
5248 std::vector<Type*> NewTypes;
5249 NewTypes.reserve(n: FTy->getNumParams()+1);
5250
5251 // Insert the chain's type into the list of parameter types, which may
5252 // mean appending it.
5253 {
5254 unsigned ArgNo = 0;
5255 FunctionType::param_iterator I = FTy->param_begin(),
5256 E = FTy->param_end();
5257
5258 do {
5259 if (ArgNo == NestArgNo)
5260 // Add the chain's type.
5261 NewTypes.push_back(x: NestTy);
5262
5263 if (I == E)
5264 break;
5265
5266 // Add the original type.
5267 NewTypes.push_back(x: *I);
5268
5269 ++ArgNo;
5270 ++I;
5271 } while (true);
5272 }
5273
5274 // Replace the trampoline call with a direct call. Let the generic
5275 // code sort out any function type mismatches.
5276 FunctionType *NewFTy =
5277 FunctionType::get(Result: FTy->getReturnType(), Params: NewTypes, isVarArg: FTy->isVarArg());
5278 AttributeList NewPAL =
5279 AttributeList::get(C&: FTy->getContext(), FnAttrs: Attrs.getFnAttrs(),
5280 RetAttrs: Attrs.getRetAttrs(), ArgAttrs: NewArgAttrs);
5281
5282 SmallVector<OperandBundleDef, 1> OpBundles;
5283 Call.getOperandBundlesAsDefs(Defs&: OpBundles);
5284
5285 Instruction *NewCaller;
5286 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &Call)) {
5287 NewCaller = InvokeInst::Create(Ty: NewFTy, Func: NestF, IfNormal: II->getNormalDest(),
5288 IfException: II->getUnwindDest(), Args: NewArgs, Bundles: OpBundles);
5289 cast<InvokeInst>(Val: NewCaller)->setCallingConv(II->getCallingConv());
5290 cast<InvokeInst>(Val: NewCaller)->setAttributes(NewPAL);
5291 } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Val: &Call)) {
5292 NewCaller =
5293 CallBrInst::Create(Ty: NewFTy, Func: NestF, DefaultDest: CBI->getDefaultDest(),
5294 IndirectDests: CBI->getIndirectDests(), Args: NewArgs, Bundles: OpBundles);
5295 cast<CallBrInst>(Val: NewCaller)->setCallingConv(CBI->getCallingConv());
5296 cast<CallBrInst>(Val: NewCaller)->setAttributes(NewPAL);
5297 } else {
5298 NewCaller = CallInst::Create(Ty: NewFTy, Func: NestF, Args: NewArgs, Bundles: OpBundles);
5299 cast<CallInst>(Val: NewCaller)->setTailCallKind(
5300 cast<CallInst>(Val&: Call).getTailCallKind());
5301 cast<CallInst>(Val: NewCaller)->setCallingConv(
5302 cast<CallInst>(Val&: Call).getCallingConv());
5303 cast<CallInst>(Val: NewCaller)->setAttributes(NewPAL);
5304 }
5305 NewCaller->setDebugLoc(Call.getDebugLoc());
5306
5307 return NewCaller;
5308 }
5309 }
5310
5311 // Replace the trampoline call with a direct call. Since there is no 'nest'
5312 // parameter, there is no need to adjust the argument list. Let the generic
5313 // code sort out any function type mismatches.
5314 Call.setCalledFunction(FTy, Fn: NestF);
5315 return &Call;
5316}
5317